commit 57398f137882e22d5ca278a7944212ec62e4d5a1 Author: Damian Johnson atagar@torproject.org Date: Thu Mar 29 09:07:44 2012 -0700
Stop tracking last-modified timestamps if file disappears
Improvement suggested by Karsten. For long running processes with a rotating pool of descriptors we could accumulate a lot of 'path => last modified' entries, so only including the entries for files that still exist. --- stem/descriptor/reader.py | 8 +++++--- 1 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index c81fe6e..bbedefc 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -226,7 +226,8 @@ class DescriptorReader:
absolute path (str) => last modified unix timestamp (int)
- This includes entries set through the set_processed_files() method. + This includes entries set through the set_processed_files() method. After + each run is reset to only the files that were present during that run.
Returns: dict with the absolute paths and unix timestamp for the last modified @@ -305,6 +306,7 @@ class DescriptorReader: self._reader_thread = None
def _read_descriptor_files(self): + new_processed_files = {} remaining_files = list(self._targets)
while remaining_files and not self._is_stopped.is_set(): @@ -328,12 +330,11 @@ class DescriptorReader:
last_modified = int(os.stat(target).st_mtime) last_used = self._processed_files.get(target) + new_processed_files[target] = last_modified
if last_used and last_used >= last_modified: self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used)) continue - else: - self._processed_files[target] = last_modified
# The mimetypes module only checks the file extension. To actually # check the content (like the 'file' command) we'd need something like @@ -350,6 +351,7 @@ class DescriptorReader: else: self._notify_skip_listeners(target, UnrecognizedType(target_type))
+ self._processed_files = new_processed_files self._enqueue_descriptor(FINISHED) self._iter_notice.set()