[stem/master] Making descriptor reader persistence more convenient

6 May 2012

commit 10cb30b546016f975559bdf1d8dd785ee516ae84
Author: Damian Johnson <atagar@torproject.org>
Date:   Sat May 5 16:54:24 2012 -0700

    Making descriptor reader persistence more convenient
    
    Our current functions for loading/saving processed file listings are fine if
    you want error handling and a great deal of control. However, I suspect that
    most callers would prefer for this to be an attribute of the reader itself.
    
    Adding an argument that performs best-effort persistance of our processed files
    listing.
---
 stem/descriptor/reader.py       |   22 +++++++++++++++++++++-
 test/integ/descriptor/reader.py |   29 +++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 0bddd53..8f00f5f 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -191,17 +191,25 @@ class DescriptorReader:
   waiting for our caller to fetch some of them. This is included to avoid
   unbounded memory usage.
   
+  Our persistence_path argument is a convenient method to persist the listing
+  of files we have processed between runs, however it doesn't allow for error
+  handling. If you want that then use the load/save_processed_files functions
+  instead.
+  
   Arguments:
     targets (list)      - paths for files or directories to be read from
     follow_links (bool) - determines if we'll follow symlinks when traversing
                           directories
     buffer_size (int)   - descriptors we'll buffer before waiting for some to
                           be read, this is unbounded if zero
+    persistence_path (str) - if set we will load and save processed file
+                          listings from this path, errors are ignored
   """
   
-  def __init__(self, targets, follow_links = False, buffer_size = 100):
+  def __init__(self, targets, follow_links = False, buffer_size = 100, persistence_path = None):
     self._targets = targets
     self._follow_links = follow_links
+    self._persistence_path = persistence_path
     self._skip_listeners = []
     self._processed_files = {}
     
@@ -218,6 +226,12 @@ class DescriptorReader:
     # FINISHED entry is used by the reading thread to indicate the end.
     
     self._unreturned_descriptors = Queue.Queue(buffer_size)
+    
+    if self._persistence_path:
+      try:
+        processed_files = load_processed_files(self._persistence_path)
+        self.set_processed_files(processed_files)
+      except: pass
   
   def get_processed_files(self):
     """
@@ -311,6 +325,12 @@ class DescriptorReader:
       
       self._reader_thread.join()
       self._reader_thread = None
+      
+      if self._persistence_path:
+        try:
+          processed_files = self.get_processed_files()
+          save_processed_files(self._persistence_path, processed_files)
+        except: pass
   
   def _read_descriptor_files(self):
     new_processed_files = {}
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index 5c84704..0e3467c 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -207,6 +207,35 @@ class TestDescriptorReader(unittest.TestCase):
       time.sleep(0.01)
       self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
   
+  def test_persistence_path(self):
+    """
+    Check that the persistence_path argument loads and saves a a processed
+    files listing.
+    """
+    
+    persistence_path = _get_processed_files_path()
+    descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor")
+    
+    # First run where the persistence_path doesn't yet exist. This just tests
+    # the saving functionality.
+    
+    reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path)
+    with reader: self.assertEqual(1, len(list(reader)))
+    
+    # check that we've saved reading example_descriptor
+    self.assertTrue(os.path.exists(persistence_path))
+    
+    with open(persistence_path) as persistence_file:
+      persistance_file_contents = persistence_file.read()
+      self.assertTrue(persistance_file_contents.startswith(descriptor_path))
+    
+    # Try running again with a new reader but the same persistance path, if it
+    # reads and takes the persistence_path into account then it won't read the
+    # descriptor file. This in essence just tests its loading functionality.
+    
+    reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path)
+    with reader: self.assertEqual(0, len(list(reader)))
+  
   def test_archived_uncompressed(self):
     """
     Checks that we can read descriptors from an uncompressed archive.

    

atagar＠torproject.org

tags

participants (1)