[stem/master] Making descriptor reader persistence more convenient

commit 10cb30b546016f975559bdf1d8dd785ee516ae84 Author: Damian Johnson <atagar@torproject.org> Date: Sat May 5 16:54:24 2012 -0700 Making descriptor reader persistence more convenient Our current functions for loading/saving processed file listings are fine if you want error handling and a great deal of control. However, I suspect that most callers would prefer for this to be an attribute of the reader itself. Adding an argument that performs best-effort persistance of our processed files listing. --- stem/descriptor/reader.py | 22 +++++++++++++++++++++- test/integ/descriptor/reader.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletions(-) diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 0bddd53..8f00f5f 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -191,17 +191,25 @@ class DescriptorReader: waiting for our caller to fetch some of them. This is included to avoid unbounded memory usage. + Our persistence_path argument is a convenient method to persist the listing + of files we have processed between runs, however it doesn't allow for error + handling. If you want that then use the load/save_processed_files functions + instead. + Arguments: targets (list) - paths for files or directories to be read from follow_links (bool) - determines if we'll follow symlinks when traversing directories buffer_size (int) - descriptors we'll buffer before waiting for some to be read, this is unbounded if zero + persistence_path (str) - if set we will load and save processed file + listings from this path, errors are ignored """ - def __init__(self, targets, follow_links = False, buffer_size = 100): + def __init__(self, targets, follow_links = False, buffer_size = 100, persistence_path = None): self._targets = targets self._follow_links = follow_links + self._persistence_path = persistence_path self._skip_listeners = [] self._processed_files = {} @@ -218,6 +226,12 @@ class DescriptorReader: # FINISHED entry is used by the reading thread to indicate the end. self._unreturned_descriptors = Queue.Queue(buffer_size) + + if self._persistence_path: + try: + processed_files = load_processed_files(self._persistence_path) + self.set_processed_files(processed_files) + except: pass def get_processed_files(self): """ @@ -311,6 +325,12 @@ class DescriptorReader: self._reader_thread.join() self._reader_thread = None + + if self._persistence_path: + try: + processed_files = self.get_processed_files() + save_processed_files(self._persistence_path, processed_files) + except: pass def _read_descriptor_files(self): new_processed_files = {} diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py index 5c84704..0e3467c 100644 --- a/test/integ/descriptor/reader.py +++ b/test/integ/descriptor/reader.py @@ -207,6 +207,35 @@ class TestDescriptorReader(unittest.TestCase): time.sleep(0.01) self.assertTrue(reader.get_buffered_descriptor_count() <= 2) + def test_persistence_path(self): + """ + Check that the persistence_path argument loads and saves a a processed + files listing. + """ + + persistence_path = _get_processed_files_path() + descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor") + + # First run where the persistence_path doesn't yet exist. This just tests + # the saving functionality. + + reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path) + with reader: self.assertEqual(1, len(list(reader))) + + # check that we've saved reading example_descriptor + self.assertTrue(os.path.exists(persistence_path)) + + with open(persistence_path) as persistence_file: + persistance_file_contents = persistence_file.read() + self.assertTrue(persistance_file_contents.startswith(descriptor_path)) + + # Try running again with a new reader but the same persistance path, if it + # reads and takes the persistence_path into account then it won't read the + # descriptor file. This in essence just tests its loading functionality. + + reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path) + with reader: self.assertEqual(0, len(list(reader))) + def test_archived_uncompressed(self): """ Checks that we can read descriptors from an uncompressed archive.
participants (1)
-
atagar@torproject.org