[tor-commits] [nyx/master] Overhaul daybreak handling

atagar at torproject.org atagar at torproject.org
Tue May 5 05:42:06 UTC 2015


commit 85f98793c90f13b5bdc9a8922ae40380614373f4
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Apr 26 11:38:36 2015 -0700

    Overhaul daybreak handling
    
    I never liked our old approach for grouping entries by day. Oh, and it was
    inefficient as hell.
    
    Replacing it with simpler and more performant grouping.
---
 nyx/log_panel.py                          |  159 +++++++++--------------------
 nyx/util/log.py                           |   33 +++++-
 test/util/log/data/daybreak_deduplication |   37 +++++++
 test/util/log/log_group.py                |   61 ++++++++++-
 4 files changed, 176 insertions(+), 114 deletions(-)

diff --git a/nyx/log_panel.py b/nyx/log_panel.py
index f61d9c6..2038a69 100644
--- a/nyx/log_panel.py
+++ b/nyx/log_panel.py
@@ -22,10 +22,7 @@ import nyx.popups
 
 from nyx import __version__
 from nyx.util import panel, tor_controller, ui_tools
-from nyx.util.log import LogGroup, LogEntry, read_tor_log
-
-DAYBREAK_EVENT = 'DAYBREAK'  # special event for marking when the date changes
-TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone
+from nyx.util.log import LogGroup, LogEntry, read_tor_log, days_since
 
 ENTRY_INDENT = 2  # spaces an entry's message is indented after the first line
 
@@ -65,32 +62,11 @@ DUPLICATE_MSG = ' [%i duplicate%s hidden]'
 
 CONTENT_HEIGHT_REDRAW_THRESHOLD = 3
 
-# cached values and the arguments that generated it for the get_daybreaks and
-# get_duplicates functions
-
-CACHED_DAYBREAKS_ARGUMENTS = (None, None)  # events, current day
-CACHED_DAYBREAKS_RESULT = None
-
 # maximum number of regex filters we'll remember
 
 MAX_REGEX_FILTERS = 5
 
 
-def days_since(timestamp = None):
-  """
-  Provides the number of days since the epoch converted to local time (rounded
-  down).
-
-  Arguments:
-    timestamp - unix timestamp to convert, current time if undefined
-  """
-
-  if timestamp is None:
-    timestamp = time.time()
-
-  return int((timestamp - TIMEZONE_OFFSET) / 86400)
-
-
 def log_file_path():
   for log_entry in tor_controller().get_conf('Log', [], True):
     entry_comp = log_entry.split()  # looking for an entry like: notice file /var/log/tor/notices.log
@@ -99,48 +75,6 @@ def log_file_path():
       return CONFIG['tor.chroot'] + entry_comp[2]
 
 
-def get_daybreaks(events, ignore_time_for_cache = False):
-  """
-  Provides the input events back with special 'DAYBREAK_EVENT' markers inserted
-  whenever the date changed between log entries (or since the most recent
-  event). The timestamp matches the beginning of the day for the following
-  entry.
-
-  Arguments:
-    events             - chronologically ordered listing of events
-    ignore_time_for_cache - skips taking the day into consideration for providing
-                         cached results if true
-  """
-
-  global CACHED_DAYBREAKS_ARGUMENTS, CACHED_DAYBREAKS_RESULT
-
-  if not events:
-    return []
-
-  new_listing = []
-  current_day = days_since()
-  last_day = current_day
-
-  if CACHED_DAYBREAKS_ARGUMENTS[0] == events and \
-    (ignore_time_for_cache or CACHED_DAYBREAKS_ARGUMENTS[1] == current_day):
-    return list(CACHED_DAYBREAKS_RESULT)
-
-  for entry in events:
-    event_day = days_since(entry.timestamp)
-
-    if event_day != last_day:
-      marker_timestamp = (event_day * 86400) + TIMEZONE_OFFSET
-      new_listing.append(LogEntry(marker_timestamp, DAYBREAK_EVENT, ''))
-
-    new_listing.append(entry)
-    last_day = event_day
-
-  CACHED_DAYBREAKS_ARGUMENTS = (list(events), current_day)
-  CACHED_DAYBREAKS_RESULT = list(new_listing)
-
-  return new_listing
-
-
 class LogPanel(panel.Panel, threading.Thread, logging.Handler):
   """
   Listens for and displays tor, nyx, and stem events. This can prepopulate
@@ -188,7 +122,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
     self.scroll = 0
 
     self.set_pause_attr('_msg_log')
-    self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+    self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
 
     self._last_update = -1               # time the content was last revised
     self._halt = False                   # terminates thread if true
@@ -260,7 +194,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
     with self.vals_lock:
       # clears the event log
 
-      self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+      self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
 
       # fetches past tor events from log file, if available
 
@@ -477,7 +411,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
     """
 
     with self.vals_lock:
-      self._msg_log = LogGroup(CONFIG['cache.log_panel.size'])
+      self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
       self.redraw(True)
 
   def save_snapshot(self, path):
@@ -635,6 +569,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
       # determines if we have the minimum width to show date dividers
 
       show_daybreaks = width - divider_indent >= 3
+      last_day = deduplicated_log[0][0].days_since()
 
       while deduplicated_log:
         entry, duplicate_count = deduplicated_log.pop(0)
@@ -644,7 +579,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
 
         # checks if we should be showing a divider with the date
 
-        if entry.type == DAYBREAK_EVENT:
+        if last_day != entry.days_since():
           # bottom of the divider
 
           if seen_first_date_divider:
@@ -669,57 +604,59 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
 
           seen_first_date_divider = True
           line_count += 1
-        else:
-          # entry contents to be displayed, tuples of the form:
-          # (msg, formatting, includeLinebreak)
 
-          display_queue = []
+        # entry contents to be displayed, tuples of the form:
+        # (msg, formatting, includeLinebreak)
 
-          msg_comp = entry.display_message.split('\n')
+        display_queue = []
 
-          for i in range(len(msg_comp)):
-            font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL  # emphasizes ERR messages
-            display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1))
+        msg_comp = entry.display_message.split('\n')
 
-          if duplicate_count:
-            plural_label = 's' if duplicate_count > 1 else ''
-            duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label)
-            display_queue.append((duplicate_msg, duplicate_attr, False))
+        for i in range(len(msg_comp)):
+          font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL  # emphasizes ERR messages
+          display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1))
 
-          cursor_location, line_offset = msg_indent, 0
-          max_entries_per_line = CONFIG['features.log.max_lines_per_entry']
+        if duplicate_count:
+          plural_label = 's' if duplicate_count > 1 else ''
+          duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label)
+          display_queue.append((duplicate_msg, duplicate_attr, False))
 
-          while display_queue:
-            msg, format, include_break = display_queue.pop(0)
-            draw_line = line_count + line_offset
+        # TODO: a fix made line_offset unused, and probably broke max_entries_per_line... not sure if we care
 
-            if line_offset == max_entries_per_line:
-              break
+        cursor_location, line_offset = msg_indent, 0
+        max_entries_per_line = CONFIG['features.log.max_lines_per_entry']
 
-            max_msg_size = width - cursor_location - 1
+        while display_queue:
+          msg, format, include_break = display_queue.pop(0)
+          draw_line = line_count + line_offset
 
-            if len(msg) > max_msg_size:
-              # message is too long - break it up
-              if line_offset == max_entries_per_line - 1:
-                msg = str_tools.crop(msg, max_msg_size)
-              else:
-                msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True)
-                display_queue.insert(0, (remainder.strip(), format, include_break))
+          if line_offset == max_entries_per_line:
+            break
 
-              include_break = True
+          max_msg_size = width - cursor_location - 1
+
+          if len(msg) > max_msg_size:
+            # message is too long - break it up
+            if line_offset == max_entries_per_line - 1:
+              msg = str_tools.crop(msg, max_msg_size)
+            else:
+              msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True)
+              display_queue.insert(0, (remainder.strip(), format, include_break))
 
-            if draw_line < height and draw_line >= 1:
-              if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks:
-                self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr)
-                self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr)
+            include_break = True
 
-              self.addstr(draw_line, cursor_location, msg, *format)
+          if draw_line < height and draw_line >= 1:
+            if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks:
+              self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr)
+              self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr)
 
-            cursor_location += len(msg)
+            self.addstr(draw_line, cursor_location, msg, *format)
 
-            if include_break or not display_queue:
-              line_offset += 1
-              cursor_location = msg_indent + ENTRY_INDENT
+          cursor_location += len(msg)
+
+          if include_break or not display_queue:
+            line_count += 1
+            cursor_location = msg_indent + ENTRY_INDENT
 
           line_count += line_offset
 
@@ -733,6 +670,8 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
 
           line_count += 1
 
+        last_day = entry.days_since()
+
       # redraw the display if...
       # - last_content_height was off by too much
       # - we're off the bottom of the page
@@ -769,10 +708,10 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
     responsive if additions are less frequent.
     """
 
-    last_day = days_since()  # used to determine if the date has changed
+    last_day = days_since(time.time())  # used to determine if the date has changed
 
     while not self._halt:
-      current_day = days_since()
+      current_day = days_since(time.time())
       time_since_reset = time.time() - self._last_update
       max_log_update_rate = CONFIG['features.log.maxRefreshRate'] / 1000.0
 
diff --git a/nyx/util/log.py b/nyx/util/log.py
index 545eb75..58126a7 100644
--- a/nyx/util/log.py
+++ b/nyx/util/log.py
@@ -19,6 +19,19 @@ except ImportError:
   from stem.util.lru_cache import lru_cache
 
 TOR_RUNLEVELS = ['DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERR']
+TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone
+
+
+def days_since(timestamp):
+  """
+  Provides the number of days since a given unix timestamp, by local time.
+
+  :param int timestamp: unix timestamp to provide time since
+
+  :reutrns: **int** with the number of days since this event
+  """
+
+  return int((timestamp - TIMEZONE_OFFSET) / 86400)
 
 
 @lru_cache()
@@ -47,19 +60,24 @@ class LogGroup(object):
   and supports deduplication.
   """
 
-  def __init__(self, max_size):
+  def __init__(self, max_size, group_by_day = False):
     self._max_size = max_size
+    self._group_by_day = group_by_day
     self._entries = []
     self._lock = threading.RLock()
 
   def add(self, timestamp, type, message):
-    entry = LogEntry(timestamp, type, message)
+    self.add_entry(LogEntry(timestamp, type, message))
 
+  def add_entry(self, entry):
     with self._lock:
       duplicate = None
+      our_day = entry.days_since()
 
       for existing_entry in self._entries:
-        if entry.is_duplicate_of(existing_entry):
+        if self._group_by_day and our_day != existing_entry.days_since():
+          break
+        elif entry.is_duplicate_of(existing_entry):
           duplicate = existing_entry
           break
 
@@ -150,6 +168,15 @@ class LogEntry(object):
 
     return False
 
+  def days_since(self):
+    """
+    Provides the number of days since this event, by local time.
+
+    :reutrns: **int** with the number of days since this event
+    """
+
+    return days_since(self.timestamp)
+
   def __eq__(self, other):
     if isinstance(other, LogEntry):
       return hash(self) == hash(other)
diff --git a/test/util/log/data/daybreak_deduplication b/test/util/log/data/daybreak_deduplication
new file mode 100644
index 0000000..1849ad3
--- /dev/null
+++ b/test/util/log/data/daybreak_deduplication
@@ -0,0 +1,37 @@
+Apr 24 19:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 0:00 hours, with 0 circuits open. I've sent 4.94 MB and received 130.17 MB.
+Apr 24 19:50:42.000 [notice] Average packaged cell fullness: 65.101%. TLS write overhead: 11%
+Apr 25 01:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 6:00 hours, with 0 circuits open. I've sent 5.00 MB and received 131.87 MB.
+Apr 25 01:50:42.000 [notice] Average packaged cell fullness: 64.927%. TLS write overhead: 11%
+Apr 25 07:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 12:00 hours, with 0 circuits open. I've sent 5.08 MB and received 134.19 MB.
+Apr 25 07:50:42.000 [notice] Average packaged cell fullness: 64.587%. TLS write overhead: 11%
+Apr 25 11:44:21.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 11:44:33.000 [notice] Interrupt: exiting cleanly.
+Apr 25 11:44:36.000 [notice] Tor 0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) opening log file.
+Apr 25 11:44:36.492 [notice] Tor v0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) running on Linux with Libevent 2.0.16-stable, OpenSSL 1.0.1 and Zlib 1.2.3.4.
+Apr 25 11:44:36.492 [notice] Tor can't help you if you use it wrong! Learn how to be safe at https://www.torproject.org/download/download#warning
+Apr 25 11:44:36.492 [notice] This version is not a stable Tor release. Expect more bugs than usual.
+Apr 25 11:44:36.525 [notice] Read configuration file "/home/atagar/.tor/torrc".
+Apr 25 11:44:36.530 [notice] Opening Socks listener on 127.0.0.1:9050
+Apr 25 11:44:36.530 [notice] Opening Control listener on 127.0.0.1:9051
+Apr 25 11:44:36.000 [notice] Bootstrapped 0%: Starting
+Apr 25 11:44:39.000 [notice] Bootstrapped 45%: Asking for relay descriptors
+Apr 25 11:44:40.000 [notice] Bootstrapped 50%: Loading relay descriptors
+Apr 25 11:44:47.000 [notice] Bootstrapped 55%: Loading relay descriptors
+Apr 25 11:44:47.000 [notice] Bootstrapped 62%: Loading relay descriptors
+Apr 25 11:44:48.000 [notice] Bootstrapped 72%: Loading relay descriptors
+Apr 25 11:44:48.000 [notice] Bootstrapped 80%: Connecting to the Tor network
+Apr 25 11:44:48.000 [notice] Bootstrapped 90%: Establishing a Tor circuit
+Apr 25 11:44:49.000 [notice] Tor has successfully opened a circuit. Looks like client functionality is working.
+Apr 25 11:44:49.000 [notice] Bootstrapped 100%: Done
+Apr 25 11:45:03.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 17:44:40.000 [notice] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.
+Apr 25 18:11:56.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 18:52:47.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 19:02:44.000 [notice] New control connection opened from 127.0.0.1.
+Apr 25 23:44:40.000 [notice] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.
+Apr 26 05:44:40.000 [notice] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.
+Apr 26 10:16:38.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:19:24.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:21:31.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:24:27.000 [notice] New control connection opened from 127.0.0.1.
+Apr 26 10:24:46.000 [notice] New control connection opened from 127.0.0.1.
diff --git a/test/util/log/log_group.py b/test/util/log/log_group.py
index 732ee14..d1f1f46 100644
--- a/test/util/log/log_group.py
+++ b/test/util/log/log_group.py
@@ -1,6 +1,7 @@
+import os
 import unittest
 
-from nyx.util.log import LogGroup, LogEntry
+from nyx.util.log import LogGroup, LogEntry, read_tor_log
 
 
 class TestLogGroup(unittest.TestCase):
@@ -85,3 +86,61 @@ class TestLogGroup(unittest.TestCase):
     self.assertEqual(None, group_items[0].duplicates)
     self.assertEqual(bootstrap_messages, [e.message for e in group_items[1].duplicates])
     self.assertEqual([False, False, True, True, False], [e.is_duplicate for e in group_items])
+
+  def test_deduplication_with_daybreaks(self):
+    group = LogGroup(100, group_by_day = True)
+    test_log_path = os.path.join(os.path.dirname(__file__), 'data', 'daybreak_deduplication')
+
+    for entry in reversed(list(read_tor_log(test_log_path))):
+      group.add_entry(entry)
+
+    # Entries should consist of two days of results...
+    #
+    # Day 1:
+    # 10:24:27 [NOTICE] New control connection opened from 127.0.0.1.
+    # 10:21:31 [NOTICE] New control connection opened from 127.0.0.1.
+    # 10:19:24 [NOTICE] New control connection opened from 127.0.0.1.
+    # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1.
+    # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1.
+    # 05:44:40 [NOTICE] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.
+    #
+    # Day 2:
+    # 23:44:40 [NOTICE] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.
+    # 19:02:44 [NOTICE] New control connection opened from 127.0.0.1.
+    # 18:52:47 [NOTICE] New control connection opened from 127.0.0.1.
+    # 18:11:56 [NOTICE] New control connection opened from 127.0.0.1.
+    # 17:44:40 [NOTICE] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.
+    # 11:45:03 [NOTICE] New control connection opened from 127.0.0.1.
+    # 11:44:49 [NOTICE] Bootstrapped 100%: Done
+    # ... etc...
+
+    group_items = list(group)
+
+    # First day
+
+    self.assertEqual('New control connection opened from 127.0.0.1.', group_items[0].message)
+    self.assertEqual(5, len(group_items[0].duplicates))
+    self.assertFalse(group_items[0].is_duplicate)
+
+    for entry in group_items[1:5]:
+      self.assertEqual('New control connection opened from 127.0.0.1.', entry.message)
+      self.assertEqual(5, len(entry.duplicates))
+      self.assertTrue(entry.is_duplicate)
+
+    self.assertEqual("Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.", group_items[5].message)
+    self.assertEqual(None, group_items[5].duplicates)
+    self.assertFalse(group_items[5].is_duplicate)
+
+    # Second day
+
+    self.assertEqual("Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.", group_items[6].message)
+    self.assertEqual(2, len(group_items[6].duplicates))
+    self.assertFalse(group_items[6].is_duplicate)
+
+    self.assertEqual('New control connection opened from 127.0.0.1.', group_items[8].message)
+    self.assertEqual(4, len(group_items[8].duplicates))
+    self.assertTrue(group_items[8].is_duplicate)
+
+    self.assertEqual("Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.", group_items[10].message)
+    self.assertEqual(2, len(group_items[10].duplicates))
+    self.assertTrue(group_items[10].is_duplicate)





More information about the tor-commits mailing list