[tor-commits] [stem/master] Skeletion for descriptor reader

atagar at torproject.org atagar at torproject.org
Mon Mar 26 00:10:01 UTC 2012


commit 5a7bb2584ec976b64a91986e5aa8ca60d8ab60a0
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Mar 4 01:22:36 2012 -0800

    Skeletion for descriptor reader
    
    Initial skeleton for a class to read descriptor data, not including most of the
    functional bits. This will be similar to the metrics-lib DescriptorReader
    class, but with a slightly different API and the ability to handle tarball
    archives.
---
 stem/descriptor/__init__.py |    6 ++
 stem/descriptor/reader.py   |  119 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+), 0 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
new file mode 100644
index 0000000..fef6750
--- /dev/null
+++ b/stem/descriptor/__init__.py
@@ -0,0 +1,6 @@
+"""
+Utilities for parsing and processing descriptor data.
+"""
+
+__all__ = ["reader"]
+
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
new file mode 100644
index 0000000..b6404f3
--- /dev/null
+++ b/stem/descriptor/reader.py
@@ -0,0 +1,119 @@
+"""
+Reads descriptors from local directories and archives.
+
+Example:
+  my_descriptors = [
+    "/tmp/server-descriptors-2012-03.tar.bz2",
+    "/tmp/archived_descriptors/",
+  ]
+  
+  reader = DescriptorReader(my_descriptors)
+  reader.start()
+  
+  # prints all of the descriptor contents
+  for descriptor in reader:
+    print descriptor
+  
+  reader.stop()
+  reader.join()
+"""
+
+import os
+import theading
+import mimetypes
+import Queue
+
+class DescriptorReader(threading.Thread):
+  """
+  Iterator for the descriptor data on the local file system. This can process
+  text files, tarball archives (gzip or bzip2), or recurse directories.
+  
+  This ignores files that cannot be processed (either due to read errors or
+  because they don't contain descriptor data). The caller can be notified of
+  files that are skipped by restering a listener with register_skip_listener().
+  """
+  
+  def __init__(self, targets):
+    self.targets = targets
+    self.skip_listeners = []
+    self.processed_files = {}
+  
+  def stop(self):
+    """
+    Stops further reading of descriptors.
+    """
+    
+    pass # TODO: implement
+  
+  def get_processed_files(self):
+    """
+    For each file we have provided descriptor data for this provides a mapping
+    of the form...
+    
+    absolute_path (str) => modified_time (int)
+    
+    This includes entries set through the set_processed_files() method.
+    
+    Returns:
+      dict with the paths and unix timestamp for the last modified times of the
+      files we have processed
+    """
+    
+    return self.processed_files
+  
+  def set_processed_files(self, processed_files):
+    """
+    Appends a dictionary of 'path => modified timestamp' mappings to our
+    listing of processed files. With the get_processed_files() method this can
+    be used to skip descriptors that we have already read. For instance...
+    
+    # gets the initial descriptors
+    reader = DescriptorReader(["/tmp/descriptor_data"])
+    
+    with reader:
+      initial_descriptors = list(reader)
+      processed_files = reader.get_processed_files()
+    
+    # only gets the descriptors that have changed since we last checked
+    reader = DescriptorReader(["/tmp/descriptor_data"])
+    reader.set_processed_files(processed_files)
+    
+    with reader:
+      new_descriptors = list(reader)
+    
+    Arguments:
+      processed_files (dict) - mapping of absolute paths (str) to unix
+                               timestamps for the last modified time (int)
+    """
+    
+    self.processed_files.update(processed_files)
+  
+  def register_skip_listener(self, listener):
+    """
+    Registers a listener for files that are skipped. This listener is expected
+    to be a functor of the form...
+    
+    my_listener(path, exception)
+    
+    Arguments:
+      listener (functor) - functor to be notified of files that are skipped to
+                           read errors or because they couldn't be parsed as
+                           valid descriptor data
+    """
+    
+    self.skip_listeners.append(listener)
+  
+  def run(self):
+    pass # TODO: implement
+  
+  def _notify_skip_listener(self, path, exception):
+    for listener in self.skip_listeners:
+      listener(path, exception)
+  
+  def __enter__(self):
+    self.start()
+  
+  def __exit__(self, type, value, traceback):
+    self.stop()
+    self.join()
+





More information about the tor-commits mailing list