[tor-commits] [stem/master] Stop tracking last-modified timestamps if file disappears

atagar at torproject.org atagar at torproject.org
Sun Apr 15 02:50:20 UTC 2012


commit 57398f137882e22d5ca278a7944212ec62e4d5a1
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Mar 29 09:07:44 2012 -0700

    Stop tracking last-modified timestamps if file disappears
    
    Improvement suggested by Karsten. For long running processes with a rotating
    pool of descriptors we could accumulate a lot of 'path => last modified'
    entries, so only including the entries for files that still exist.
---
 stem/descriptor/reader.py |    8 +++++---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index c81fe6e..bbedefc 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -226,7 +226,8 @@ class DescriptorReader:
     
     absolute path (str) => last modified unix timestamp (int)
     
-    This includes entries set through the set_processed_files() method.
+    This includes entries set through the set_processed_files() method. After
+    each run is reset to only the files that were present during that run.
     
     Returns:
       dict with the absolute paths and unix timestamp for the last modified
@@ -305,6 +306,7 @@ class DescriptorReader:
       self._reader_thread = None
   
   def _read_descriptor_files(self):
+    new_processed_files = {}
     remaining_files = list(self._targets)
     
     while remaining_files and not self._is_stopped.is_set():
@@ -328,12 +330,11 @@ class DescriptorReader:
         
         last_modified = int(os.stat(target).st_mtime)
         last_used = self._processed_files.get(target)
+        new_processed_files[target] = last_modified
         
         if last_used and last_used >= last_modified:
           self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
           continue
-        else:
-          self._processed_files[target] = last_modified
         
         # The mimetypes module only checks the file extension. To actually
         # check the content (like the 'file' command) we'd need something like
@@ -350,6 +351,7 @@ class DescriptorReader:
         else:
           self._notify_skip_listeners(target, UnrecognizedType(target_type))
     
+    self._processed_files = new_processed_files
     self._enqueue_descriptor(FINISHED)
     self._iter_notice.set()
   





More information about the tor-commits mailing list