[tor-commits] [stem/master] Descriptor module with common functionality

atagar at torproject.org atagar at torproject.org
Mon Mar 26 00:10:01 UTC 2012


commit da4ccb0bedc122c4776048751c4de0b6cc0e010e
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Mar 14 09:55:38 2012 -0700

    Descriptor module with common functionality
    
    Adding the descriptor module which has the Descriptor class (the common parent
    for all descriptor types) and the function for parsing descriptors from a file.
    This parser will later do type recognition to figure out how descriptors
    should be parsed. This is identical to how metrics-lib does this.
    
    Nice advantage is that we can now move all of the remaining 'TODO' notes out of
    the reader. It's done. :)
---
 stem/descriptor/__init__.py   |    2 +-
 stem/descriptor/descriptor.py |   58 +++++++++++++++++++++++++++++++++++++++++
 stem/descriptor/reader.py     |   38 ++++++++++++++++----------
 3 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index fef6750..33970ef 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -2,5 +2,5 @@
 Utilities for parsing and processing descriptor data.
 """
 
-__all__ = ["reader"]
+__all__ = ["descriptor", "reader"]
 
diff --git a/stem/descriptor/descriptor.py b/stem/descriptor/descriptor.py
new file mode 100644
index 0000000..90d3bed
--- /dev/null
+++ b/stem/descriptor/descriptor.py
@@ -0,0 +1,58 @@
+"""
+Common functionality for descriptors.
+"""
+
+def parse_descriptors(path, descriptor_file):
+  """
+  Provides an iterator for the descriptors within a given file.
+  
+  Arguments:
+    path (str)             - absolute path to the file's location on disk
+    descriptor_file (file) - opened file with the descriptor contents
+  
+  Returns:
+    iterator that parses the file's contents into descriptors
+  
+  Raises:
+    TypeError if we can't match the contents of the file to a descriptor type
+    IOError if unable to read from the descriptor_file
+  """
+  
+  # TODO: implement actual descriptor type recognition and parsing
+  # TODO: add integ test for non-descriptor text content
+  yield Descriptor(path, descriptor_file.read())
+
+class Descriptor:
+  """
+  Common parent for all types of descriptors.
+  """
+  
+  def __init__(self, path, raw_contents):
+    self._path = path
+    self._raw_contents = raw_contents
+  
+  def get_path(self):
+    """
+    Provides the absolute path that we loaded this descriptor from.
+    
+    Returns:
+      str with the absolute path of the descriptor source
+    """
+    
+    return self._path
+  
+  def get_unrecognized_lines(self):
+    """
+    Provides a list of lines that were either ignored or had data that we did
+    not know how to process. This is most common due to new descriptor fields
+    that this library does not yet know how to process. Patches welcome!
+    
+    Returns:
+      list of lines of unrecognized content
+    """
+    
+    return []
+  
+  def __str__(self):
+    return self._raw_contents
+
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 8117361..3b6f5fd 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -70,6 +70,8 @@ import threading
 import mimetypes
 import Queue
 
+import stem.descriptor.descriptor
+
 # flag to indicate when the reader thread is out of descriptor files to read
 FINISHED = "DONE"
 
@@ -362,25 +364,31 @@ class DescriptorReader:
   
   def _handle_descriptor_file(self, target):
     try:
-      # TODO: replace with actual descriptor parsing when we have it
-      target_file = open(target)
-      self._enqueue_descriptor(target_file.read())
-      target_file.close()
-      
-      self._iter_notice.set()
+      with open(target) as target_file:
+        for desc in stem.descriptor.descriptor.parse_descriptors(target, target_file):
+          self._enqueue_descriptor(desc)
+          self._iter_notice.set()
+    except TypeError, exc:
+      self._notify_skip_listeners(target, ParsingFailure(exc))
     except IOError, exc:
       self._notify_skip_listeners(target, ReadFailed(exc))
   
   def _handle_archive(self, target):
-    with tarfile.open(target) as tar_file:
-      for tar_entry in tar_file:
-        if tar_entry.isfile():
-          # TODO: replace with actual descriptor parsing when we have it
-          entry = tar_file.extractfile(tar_entry)
-          self._enqueue_descriptor(entry.read())
-          entry.close()
-          
-          self._iter_notice.set()
+    try:
+      with tarfile.open(target) as tar_file:
+        for tar_entry in tar_file:
+          if tar_entry.isfile():
+            entry = tar_file.extractfile(tar_entry)
+            
+            for desc in stem.descriptor.descriptor.parse_descriptors(target, entry):
+              self._enqueue_descriptor(desc)
+              self._iter_notice.set()
+            
+            entry.close()
+    except TypeError, exc:
+      self._notify_skip_listeners(target, ParsingFailure(exc))
+    except IOError, exc:
+      self._notify_skip_listeners(target, ReadFailed(exc))
   
   def _enqueue_descriptor(self, descriptor):
     # blocks until their is either room for the descriptor or we're stopped





More information about the tor-commits mailing list