[or-cvs] r23852: {arm} I'm not spotting a good solution for deduplicating large log (in arm/trunk: . src src/interface)

Damian Johnson atagar1 at gmail.com
Mon Nov 22 03:10:55 UTC 2010


Author: atagar
Date: 2010-11-22 03:10:55 +0000 (Mon, 22 Nov 2010)
New Revision: 23852

Modified:
   arm/trunk/armrc.sample
   arm/trunk/src/interface/configStatePanel.py
   arm/trunk/src/interface/logPanel.py
   arm/trunk/src/settings.cfg
Log:
I'm not spotting a good solution for deduplicating large logs, so sidesteping the issue for now by disabling the feature when it takes too long.



Modified: arm/trunk/armrc.sample
===================================================================
--- arm/trunk/armrc.sample	2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/armrc.sample	2010-11-22 03:10:55 UTC (rev 23852)
@@ -69,7 +69,7 @@
 #   when editing config values the current value is prepopulated if true, and
 #   left blank otherwise
 # state.colWidth.*
-#   maximum column content width
+#   column content width
 # state.showPrivateOptions
 #   tor provides config options of the form "__<option>" that can be dangerous
 #   to set, if true arm provides these on the config panel

Modified: arm/trunk/src/interface/configStatePanel.py
===================================================================
--- arm/trunk/src/interface/configStatePanel.py	2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/interface/configStatePanel.py	2010-11-22 03:10:55 UTC (rev 23852)
@@ -232,14 +232,17 @@
     optionColWidth, valueColWidth = 0, 0
     
     # constructs a mapping of entries to their current values
+    # TODO: just skip dynamic widths entirely?
     entryToValues = {}
     for entry in self.confContents:
       entryToValues[entry] = entry.get(FIELD_VALUE)
-      optionColWidth = max(optionColWidth, len(entry.get(FIELD_OPTION)))
-      valueColWidth = max(valueColWidth, len(entryToValues[entry]))
+      #optionColWidth = max(optionColWidth, len(entry.get(FIELD_OPTION)))
+      #valueColWidth = max(valueColWidth, len(entryToValues[entry]))
     
-    optionColWidth = min(self._config["features.config.state.colWidth.option"], optionColWidth)
-    valueColWidth = min(self._config["features.config.state.colWidth.value"], valueColWidth)
+    #optionColWidth = min(self._config["features.config.state.colWidth.option"], optionColWidth)
+    #valueColWidth = min(self._config["features.config.state.colWidth.value"], valueColWidth)
+    optionColWidth = self._config["features.config.state.colWidth.option"]
+    valueColWidth = self._config["features.config.state.colWidth.value"]
     descriptionColWidth = max(0, width - scrollOffset - optionColWidth - valueColWidth - 2)
     
     for lineNum in range(scrollLoc, len(self.confContents)):

Modified: arm/trunk/src/interface/logPanel.py
===================================================================
--- arm/trunk/src/interface/logPanel.py	2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/interface/logPanel.py	2010-11-22 03:10:55 UTC (rev 23852)
@@ -72,6 +72,9 @@
 CACHED_DUPLICATES_ARGUMENTS = None # events
 CACHED_DUPLICATES_RESULT = None
 
+# duration we'll wait for the deduplication function before giving up (in ms)
+DEDUPLICATION_TIMEOUT = 100
+
 def daysSince(timestamp=None):
   """
   Provides the number of days since the epoch converted to local time (rounded
@@ -326,7 +329,8 @@
   """
   Deduplicates a list of log entries, providing back a tuple listing with the
   log entry and count of duplicates following it. Entries in different days are
-  not considered to be duplicates.
+  not considered to be duplicates. This times out, returning None if it takes
+  longer than DEDUPLICATION_TIMEOUT.
   
   Arguments:
     events - chronologically ordered listing of events
@@ -339,35 +343,17 @@
   # loads common log entries from the config if they haven't been
   if COMMON_LOG_MESSAGES == None: loadLogMessages()
   
+  startTime = time.time()
   eventsRemaining = list(events)
   returnEvents = []
   
   while eventsRemaining:
     entry = eventsRemaining.pop(0)
-    duplicateIndices = []
+    duplicateIndices = isDuplicate(entry, eventsRemaining, True)
     
-    for i in range(len(eventsRemaining)):
-      forwardEntry = eventsRemaining[i]
-      
-      # if showing dates then do duplicate detection for each day, rather
-      # than globally
-      if forwardEntry.type == DAYBREAK_EVENT: break
-      
-      if entry.type == forwardEntry.type:
-        isDuplicate = False
-        if entry.msg == forwardEntry.msg: isDuplicate = True
-        elif entry.type in COMMON_LOG_MESSAGES:
-          for commonMsg in COMMON_LOG_MESSAGES[entry.type]:
-            # if it starts with an asterisk then check the whole message rather
-            # than just the start
-            if commonMsg[0] == "*":
-              isDuplicate = commonMsg[1:] in entry.msg and commonMsg[1:] in forwardEntry.msg
-            else:
-              isDuplicate = entry.msg.startswith(commonMsg) and forwardEntry.msg.startswith(commonMsg)
-            
-            if isDuplicate: break
-        
-        if isDuplicate: duplicateIndices.append(i)
+    # checks if the call timeout has been reached
+    if (time.time() - startTime) > DEDUPLICATION_TIMEOUT / 1000.0:
+      return None
     
     # drops duplicate entries
     duplicateIndices.reverse()
@@ -380,6 +366,48 @@
   
   return returnEvents
 
+def isDuplicate(event, eventSet, getDuplicates = False):
+  """
+  True if the event is a duplicate for something in the eventSet, false
+  otherwise. If the getDuplicates flag is set this provides the indices of
+  the duplicates instead.
+  
+  Arguments:
+    event         - event to search for duplicates of
+    eventSet      - set to look for the event in
+    getDuplicates - instead of providing back a boolean this gives a list of
+                    the duplicate indices in the eventSet
+  """
+  
+  duplicateIndices = []
+  for i in range(len(eventSet)):
+    forwardEntry = eventSet[i]
+    
+    # if showing dates then do duplicate detection for each day, rather
+    # than globally
+    if forwardEntry.type == DAYBREAK_EVENT: break
+    
+    if event.type == forwardEntry.type:
+      isDuplicate = False
+      if event.msg == forwardEntry.msg: isDuplicate = True
+      elif event.type in COMMON_LOG_MESSAGES:
+        for commonMsg in COMMON_LOG_MESSAGES[event.type]:
+          # if it starts with an asterisk then check the whole message rather
+          # than just the start
+          if commonMsg[0] == "*":
+            isDuplicate = commonMsg[1:] in event.msg and commonMsg[1:] in forwardEntry.msg
+          else:
+            isDuplicate = event.msg.startswith(commonMsg) and forwardEntry.msg.startswith(commonMsg)
+          
+          if isDuplicate: break
+      
+      if isDuplicate:
+        if getDuplicates: duplicateIndices.append(i)
+        else: return True
+  
+  if getDuplicates: return duplicateIndices
+  else: return False
+
 class LogEntry():
   """
   Individual log file entry, having the following attributes:
@@ -779,7 +807,14 @@
     
     isDatesShown = self.regexFilter == None and self._config["features.log.showDateDividers"]
     eventLog = getDaybreaks(self.msgLog, self._isPaused) if isDatesShown else list(self.msgLog)
-    if not self.showDuplicates: deduplicatedLog = getDuplicates(eventLog)
+    if not self.showDuplicates:
+      deduplicatedLog = getDuplicates(eventLog)
+      
+      if deduplicatedLog == None:
+        msg = "Deduplication took too long. Its current implementation has difficulty handling large logs so disabling it to keep the interface responsive."
+        log.log(log.WARN, msg)
+        self.showDuplicates = True
+        deduplicatedLog = [(entry, 0) for entry in eventLog]
     else: deduplicatedLog = [(entry, 0) for entry in eventLog]
     
     # determines if we have the minimum width to show date dividers

Modified: arm/trunk/src/settings.cfg
===================================================================
--- arm/trunk/src/settings.cfg	2010-11-21 21:21:25 UTC (rev 23851)
+++ arm/trunk/src/settings.cfg	2010-11-22 03:10:55 UTC (rev 23852)
@@ -44,6 +44,7 @@
 # [ARM_DEBUG] recreating panel 'graph' with the dimensions of 14/124
 # [ARM_DEBUG] redrawing the log panel with the corrected content height (estimat was off by 4)
 # [ARM_DEBUG] GETINFO accounting/bytes-left (runtime: 0.0006)
+# [ARM_DEBUG] GETCONF MyFamily (runtime: 0.0007)
 
 msg.BW READ:
 msg.DEBUG connection_handle_write(): After TLS write of
@@ -73,6 +74,7 @@
 msg.ARM_DEBUG GETINFO accounting/bytes-left
 msg.ARM_DEBUG GETINFO accounting/interval-end
 msg.ARM_DEBUG GETINFO accounting/hibernating
+msg.ARM_DEBUG GETCONF
 
 # some config options are fetched via special values
 torrc.map HiddenServiceDir => HiddenServiceOptions



More information about the tor-commits mailing list