commit 85f98793c90f13b5bdc9a8922ae40380614373f4 Author: Damian Johnson atagar@torproject.org Date: Sun Apr 26 11:38:36 2015 -0700
Overhaul daybreak handling
I never liked our old approach for grouping entries by day. Oh, and it was inefficient as hell.
Replacing it with simpler and more performant grouping. --- nyx/log_panel.py | 159 +++++++++-------------------- nyx/util/log.py | 33 +++++- test/util/log/data/daybreak_deduplication | 37 +++++++ test/util/log/log_group.py | 61 ++++++++++- 4 files changed, 176 insertions(+), 114 deletions(-)
diff --git a/nyx/log_panel.py b/nyx/log_panel.py index f61d9c6..2038a69 100644 --- a/nyx/log_panel.py +++ b/nyx/log_panel.py @@ -22,10 +22,7 @@ import nyx.popups
from nyx import __version__ from nyx.util import panel, tor_controller, ui_tools -from nyx.util.log import LogGroup, LogEntry, read_tor_log - -DAYBREAK_EVENT = 'DAYBREAK' # special event for marking when the date changes -TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone +from nyx.util.log import LogGroup, LogEntry, read_tor_log, days_since
ENTRY_INDENT = 2 # spaces an entry's message is indented after the first line
@@ -65,32 +62,11 @@ DUPLICATE_MSG = ' [%i duplicate%s hidden]'
CONTENT_HEIGHT_REDRAW_THRESHOLD = 3
-# cached values and the arguments that generated it for the get_daybreaks and -# get_duplicates functions - -CACHED_DAYBREAKS_ARGUMENTS = (None, None) # events, current day -CACHED_DAYBREAKS_RESULT = None - # maximum number of regex filters we'll remember
MAX_REGEX_FILTERS = 5
-def days_since(timestamp = None): - """ - Provides the number of days since the epoch converted to local time (rounded - down). - - Arguments: - timestamp - unix timestamp to convert, current time if undefined - """ - - if timestamp is None: - timestamp = time.time() - - return int((timestamp - TIMEZONE_OFFSET) / 86400) - - def log_file_path(): for log_entry in tor_controller().get_conf('Log', [], True): entry_comp = log_entry.split() # looking for an entry like: notice file /var/log/tor/notices.log @@ -99,48 +75,6 @@ def log_file_path(): return CONFIG['tor.chroot'] + entry_comp[2]
-def get_daybreaks(events, ignore_time_for_cache = False): - """ - Provides the input events back with special 'DAYBREAK_EVENT' markers inserted - whenever the date changed between log entries (or since the most recent - event). The timestamp matches the beginning of the day for the following - entry. - - Arguments: - events - chronologically ordered listing of events - ignore_time_for_cache - skips taking the day into consideration for providing - cached results if true - """ - - global CACHED_DAYBREAKS_ARGUMENTS, CACHED_DAYBREAKS_RESULT - - if not events: - return [] - - new_listing = [] - current_day = days_since() - last_day = current_day - - if CACHED_DAYBREAKS_ARGUMENTS[0] == events and \ - (ignore_time_for_cache or CACHED_DAYBREAKS_ARGUMENTS[1] == current_day): - return list(CACHED_DAYBREAKS_RESULT) - - for entry in events: - event_day = days_since(entry.timestamp) - - if event_day != last_day: - marker_timestamp = (event_day * 86400) + TIMEZONE_OFFSET - new_listing.append(LogEntry(marker_timestamp, DAYBREAK_EVENT, '')) - - new_listing.append(entry) - last_day = event_day - - CACHED_DAYBREAKS_ARGUMENTS = (list(events), current_day) - CACHED_DAYBREAKS_RESULT = list(new_listing) - - return new_listing - - class LogPanel(panel.Panel, threading.Thread, logging.Handler): """ Listens for and displays tor, nyx, and stem events. This can prepopulate @@ -188,7 +122,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler): self.scroll = 0
self.set_pause_attr('_msg_log') - self._msg_log = LogGroup(CONFIG['cache.log_panel.size']) + self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
self._last_update = -1 # time the content was last revised self._halt = False # terminates thread if true @@ -260,7 +194,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler): with self.vals_lock: # clears the event log
- self._msg_log = LogGroup(CONFIG['cache.log_panel.size']) + self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers'])
# fetches past tor events from log file, if available
@@ -477,7 +411,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler): """
with self.vals_lock: - self._msg_log = LogGroup(CONFIG['cache.log_panel.size']) + self._msg_log = LogGroup(CONFIG['cache.log_panel.size'], group_by_day = CONFIG['features.log.showDateDividers']) self.redraw(True)
def save_snapshot(self, path): @@ -635,6 +569,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler): # determines if we have the minimum width to show date dividers
show_daybreaks = width - divider_indent >= 3 + last_day = deduplicated_log[0][0].days_since()
while deduplicated_log: entry, duplicate_count = deduplicated_log.pop(0) @@ -644,7 +579,7 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
# checks if we should be showing a divider with the date
- if entry.type == DAYBREAK_EVENT: + if last_day != entry.days_since(): # bottom of the divider
if seen_first_date_divider: @@ -669,57 +604,59 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
seen_first_date_divider = True line_count += 1 - else: - # entry contents to be displayed, tuples of the form: - # (msg, formatting, includeLinebreak)
- display_queue = [] + # entry contents to be displayed, tuples of the form: + # (msg, formatting, includeLinebreak)
- msg_comp = entry.display_message.split('\n') + display_queue = []
- for i in range(len(msg_comp)): - font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL # emphasizes ERR messages - display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1)) + msg_comp = entry.display_message.split('\n')
- if duplicate_count: - plural_label = 's' if duplicate_count > 1 else '' - duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label) - display_queue.append((duplicate_msg, duplicate_attr, False)) + for i in range(len(msg_comp)): + font = curses.A_BOLD if 'ERR' in entry.type else curses.A_NORMAL # emphasizes ERR messages + display_queue.append((msg_comp[i].strip(), (font, CONFIG['attr.log_color'].get(entry.type, 'white')), i != len(msg_comp) - 1))
- cursor_location, line_offset = msg_indent, 0 - max_entries_per_line = CONFIG['features.log.max_lines_per_entry'] + if duplicate_count: + plural_label = 's' if duplicate_count > 1 else '' + duplicate_msg = DUPLICATE_MSG % (duplicate_count, plural_label) + display_queue.append((duplicate_msg, duplicate_attr, False))
- while display_queue: - msg, format, include_break = display_queue.pop(0) - draw_line = line_count + line_offset + # TODO: a fix made line_offset unused, and probably broke max_entries_per_line... not sure if we care
- if line_offset == max_entries_per_line: - break + cursor_location, line_offset = msg_indent, 0 + max_entries_per_line = CONFIG['features.log.max_lines_per_entry']
- max_msg_size = width - cursor_location - 1 + while display_queue: + msg, format, include_break = display_queue.pop(0) + draw_line = line_count + line_offset
- if len(msg) > max_msg_size: - # message is too long - break it up - if line_offset == max_entries_per_line - 1: - msg = str_tools.crop(msg, max_msg_size) - else: - msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True) - display_queue.insert(0, (remainder.strip(), format, include_break)) + if line_offset == max_entries_per_line: + break
- include_break = True + max_msg_size = width - cursor_location - 1 + + if len(msg) > max_msg_size: + # message is too long - break it up + if line_offset == max_entries_per_line - 1: + msg = str_tools.crop(msg, max_msg_size) + else: + msg, remainder = str_tools.crop(msg, max_msg_size, 4, 4, str_tools.Ending.HYPHEN, True) + display_queue.insert(0, (remainder.strip(), format, include_break))
- if draw_line < height and draw_line >= 1: - if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks: - self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr) - self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr) + include_break = True
- self.addstr(draw_line, cursor_location, msg, *format) + if draw_line < height and draw_line >= 1: + if seen_first_date_divider and width - divider_indent >= 3 and show_daybreaks: + self.addch(draw_line, divider_indent, curses.ACS_VLINE, *divider_attr) + self.addch(draw_line, width - 1, curses.ACS_VLINE, *divider_attr)
- cursor_location += len(msg) + self.addstr(draw_line, cursor_location, msg, *format)
- if include_break or not display_queue: - line_offset += 1 - cursor_location = msg_indent + ENTRY_INDENT + cursor_location += len(msg) + + if include_break or not display_queue: + line_count += 1 + cursor_location = msg_indent + ENTRY_INDENT
line_count += line_offset
@@ -733,6 +670,8 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler):
line_count += 1
+ last_day = entry.days_since() + # redraw the display if... # - last_content_height was off by too much # - we're off the bottom of the page @@ -769,10 +708,10 @@ class LogPanel(panel.Panel, threading.Thread, logging.Handler): responsive if additions are less frequent. """
- last_day = days_since() # used to determine if the date has changed + last_day = days_since(time.time()) # used to determine if the date has changed
while not self._halt: - current_day = days_since() + current_day = days_since(time.time()) time_since_reset = time.time() - self._last_update max_log_update_rate = CONFIG['features.log.maxRefreshRate'] / 1000.0
diff --git a/nyx/util/log.py b/nyx/util/log.py index 545eb75..58126a7 100644 --- a/nyx/util/log.py +++ b/nyx/util/log.py @@ -19,6 +19,19 @@ except ImportError: from stem.util.lru_cache import lru_cache
TOR_RUNLEVELS = ['DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERR'] +TIMEZONE_OFFSET = time.altzone if time.localtime()[8] else time.timezone + + +def days_since(timestamp): + """ + Provides the number of days since a given unix timestamp, by local time. + + :param int timestamp: unix timestamp to provide time since + + :reutrns: **int** with the number of days since this event + """ + + return int((timestamp - TIMEZONE_OFFSET) / 86400)
@lru_cache() @@ -47,19 +60,24 @@ class LogGroup(object): and supports deduplication. """
- def __init__(self, max_size): + def __init__(self, max_size, group_by_day = False): self._max_size = max_size + self._group_by_day = group_by_day self._entries = [] self._lock = threading.RLock()
def add(self, timestamp, type, message): - entry = LogEntry(timestamp, type, message) + self.add_entry(LogEntry(timestamp, type, message))
+ def add_entry(self, entry): with self._lock: duplicate = None + our_day = entry.days_since()
for existing_entry in self._entries: - if entry.is_duplicate_of(existing_entry): + if self._group_by_day and our_day != existing_entry.days_since(): + break + elif entry.is_duplicate_of(existing_entry): duplicate = existing_entry break
@@ -150,6 +168,15 @@ class LogEntry(object):
return False
+ def days_since(self): + """ + Provides the number of days since this event, by local time. + + :reutrns: **int** with the number of days since this event + """ + + return days_since(self.timestamp) + def __eq__(self, other): if isinstance(other, LogEntry): return hash(self) == hash(other) diff --git a/test/util/log/data/daybreak_deduplication b/test/util/log/data/daybreak_deduplication new file mode 100644 index 0000000..1849ad3 --- /dev/null +++ b/test/util/log/data/daybreak_deduplication @@ -0,0 +1,37 @@ +Apr 24 19:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 0:00 hours, with 0 circuits open. I've sent 4.94 MB and received 130.17 MB. +Apr 24 19:50:42.000 [notice] Average packaged cell fullness: 65.101%. TLS write overhead: 11% +Apr 25 01:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 6:00 hours, with 0 circuits open. I've sent 5.00 MB and received 131.87 MB. +Apr 25 01:50:42.000 [notice] Average packaged cell fullness: 64.927%. TLS write overhead: 11% +Apr 25 07:50:42.000 [notice] Heartbeat: Tor's uptime is 12 days 12:00 hours, with 0 circuits open. I've sent 5.08 MB and received 134.19 MB. +Apr 25 07:50:42.000 [notice] Average packaged cell fullness: 64.587%. TLS write overhead: 11% +Apr 25 11:44:21.000 [notice] New control connection opened from 127.0.0.1. +Apr 25 11:44:33.000 [notice] Interrupt: exiting cleanly. +Apr 25 11:44:36.000 [notice] Tor 0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) opening log file. +Apr 25 11:44:36.492 [notice] Tor v0.2.7.0-alpha-dev (git-63a90f2df4dcd7ff) running on Linux with Libevent 2.0.16-stable, OpenSSL 1.0.1 and Zlib 1.2.3.4. +Apr 25 11:44:36.492 [notice] Tor can't help you if you use it wrong! Learn how to be safe at https://www.torproject.org/download/download#warning +Apr 25 11:44:36.492 [notice] This version is not a stable Tor release. Expect more bugs than usual. +Apr 25 11:44:36.525 [notice] Read configuration file "/home/atagar/.tor/torrc". +Apr 25 11:44:36.530 [notice] Opening Socks listener on 127.0.0.1:9050 +Apr 25 11:44:36.530 [notice] Opening Control listener on 127.0.0.1:9051 +Apr 25 11:44:36.000 [notice] Bootstrapped 0%: Starting +Apr 25 11:44:39.000 [notice] Bootstrapped 45%: Asking for relay descriptors +Apr 25 11:44:40.000 [notice] Bootstrapped 50%: Loading relay descriptors +Apr 25 11:44:47.000 [notice] Bootstrapped 55%: Loading relay descriptors +Apr 25 11:44:47.000 [notice] Bootstrapped 62%: Loading relay descriptors +Apr 25 11:44:48.000 [notice] Bootstrapped 72%: Loading relay descriptors +Apr 25 11:44:48.000 [notice] Bootstrapped 80%: Connecting to the Tor network +Apr 25 11:44:48.000 [notice] Bootstrapped 90%: Establishing a Tor circuit +Apr 25 11:44:49.000 [notice] Tor has successfully opened a circuit. Looks like client functionality is working. +Apr 25 11:44:49.000 [notice] Bootstrapped 100%: Done +Apr 25 11:45:03.000 [notice] New control connection opened from 127.0.0.1. +Apr 25 17:44:40.000 [notice] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB. +Apr 25 18:11:56.000 [notice] New control connection opened from 127.0.0.1. +Apr 25 18:52:47.000 [notice] New control connection opened from 127.0.0.1. +Apr 25 19:02:44.000 [notice] New control connection opened from 127.0.0.1. +Apr 25 23:44:40.000 [notice] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB. +Apr 26 05:44:40.000 [notice] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB. +Apr 26 10:16:38.000 [notice] New control connection opened from 127.0.0.1. +Apr 26 10:19:24.000 [notice] New control connection opened from 127.0.0.1. +Apr 26 10:21:31.000 [notice] New control connection opened from 127.0.0.1. +Apr 26 10:24:27.000 [notice] New control connection opened from 127.0.0.1. +Apr 26 10:24:46.000 [notice] New control connection opened from 127.0.0.1. diff --git a/test/util/log/log_group.py b/test/util/log/log_group.py index 732ee14..d1f1f46 100644 --- a/test/util/log/log_group.py +++ b/test/util/log/log_group.py @@ -1,6 +1,7 @@ +import os import unittest
-from nyx.util.log import LogGroup, LogEntry +from nyx.util.log import LogGroup, LogEntry, read_tor_log
class TestLogGroup(unittest.TestCase): @@ -85,3 +86,61 @@ class TestLogGroup(unittest.TestCase): self.assertEqual(None, group_items[0].duplicates) self.assertEqual(bootstrap_messages, [e.message for e in group_items[1].duplicates]) self.assertEqual([False, False, True, True, False], [e.is_duplicate for e in group_items]) + + def test_deduplication_with_daybreaks(self): + group = LogGroup(100, group_by_day = True) + test_log_path = os.path.join(os.path.dirname(__file__), 'data', 'daybreak_deduplication') + + for entry in reversed(list(read_tor_log(test_log_path))): + group.add_entry(entry) + + # Entries should consist of two days of results... + # + # Day 1: + # 10:24:27 [NOTICE] New control connection opened from 127.0.0.1. + # 10:21:31 [NOTICE] New control connection opened from 127.0.0.1. + # 10:19:24 [NOTICE] New control connection opened from 127.0.0.1. + # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1. + # 10:16:38 [NOTICE] New control connection opened from 127.0.0.1. + # 05:44:40 [NOTICE] Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB. + # + # Day 2: + # 23:44:40 [NOTICE] Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB. + # 19:02:44 [NOTICE] New control connection opened from 127.0.0.1. + # 18:52:47 [NOTICE] New control connection opened from 127.0.0.1. + # 18:11:56 [NOTICE] New control connection opened from 127.0.0.1. + # 17:44:40 [NOTICE] Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB. + # 11:45:03 [NOTICE] New control connection opened from 127.0.0.1. + # 11:44:49 [NOTICE] Bootstrapped 100%: Done + # ... etc... + + group_items = list(group) + + # First day + + self.assertEqual('New control connection opened from 127.0.0.1.', group_items[0].message) + self.assertEqual(5, len(group_items[0].duplicates)) + self.assertFalse(group_items[0].is_duplicate) + + for entry in group_items[1:5]: + self.assertEqual('New control connection opened from 127.0.0.1.', entry.message) + self.assertEqual(5, len(entry.duplicates)) + self.assertTrue(entry.is_duplicate) + + self.assertEqual("Heartbeat: Tor's uptime is 18:00 hours, with 0 circuits open. I've sent 862 kB and received 9.05 MB.", group_items[5].message) + self.assertEqual(None, group_items[5].duplicates) + self.assertFalse(group_items[5].is_duplicate) + + # Second day + + self.assertEqual("Heartbeat: Tor's uptime is 12:00 hours, with 1 circuits open. I've sent 794 kB and received 7.32 MB.", group_items[6].message) + self.assertEqual(2, len(group_items[6].duplicates)) + self.assertFalse(group_items[6].is_duplicate) + + self.assertEqual('New control connection opened from 127.0.0.1.', group_items[8].message) + self.assertEqual(4, len(group_items[8].duplicates)) + self.assertTrue(group_items[8].is_duplicate) + + self.assertEqual("Heartbeat: Tor's uptime is 6:00 hours, with 0 circuits open. I've sent 539 kB and received 4.25 MB.", group_items[10].message) + self.assertEqual(2, len(group_items[10].duplicates)) + self.assertTrue(group_items[10].is_duplicate)