commit 57398f137882e22d5ca278a7944212ec62e4d5a1
Author: Damian Johnson <atagar(a)torproject.org>
Date: Thu Mar 29 09:07:44 2012 -0700
Stop tracking last-modified timestamps if file disappears
Improvement suggested by Karsten. For long running processes with a rotating
pool of descriptors we could accumulate a lot of 'path => last modified'
entries, so only including the entries for files that still exist.
---
stem/descriptor/reader.py | 8 +++++---
1 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index c81fe6e..bbedefc 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -226,7 +226,8 @@ class DescriptorReader:
absolute path (str) => last modified unix timestamp (int)
- This includes entries set through the set_processed_files() method.
+ This includes entries set through the set_processed_files() method. After
+ each run is reset to only the files that were present during that run.
Returns:
dict with the absolute paths and unix timestamp for the last modified
@@ -305,6 +306,7 @@ class DescriptorReader:
self._reader_thread = None
def _read_descriptor_files(self):
+ new_processed_files = {}
remaining_files = list(self._targets)
while remaining_files and not self._is_stopped.is_set():
@@ -328,12 +330,11 @@ class DescriptorReader:
last_modified = int(os.stat(target).st_mtime)
last_used = self._processed_files.get(target)
+ new_processed_files[target] = last_modified
if last_used and last_used >= last_modified:
self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
continue
- else:
- self._processed_files[target] = last_modified
# The mimetypes module only checks the file extension. To actually
# check the content (like the 'file' command) we'd need something like
@@ -350,6 +351,7 @@ class DescriptorReader:
else:
self._notify_skip_listeners(target, UnrecognizedType(target_type))
+ self._processed_files = new_processed_files
self._enqueue_descriptor(FINISHED)
self._iter_notice.set()