[stem/master] Descriptor module with common functionality

commit da4ccb0bedc122c4776048751c4de0b6cc0e010e Author: Damian Johnson <atagar@torproject.org> Date: Wed Mar 14 09:55:38 2012 -0700 Descriptor module with common functionality Adding the descriptor module which has the Descriptor class (the common parent for all descriptor types) and the function for parsing descriptors from a file. This parser will later do type recognition to figure out how descriptors should be parsed. This is identical to how metrics-lib does this. Nice advantage is that we can now move all of the remaining 'TODO' notes out of the reader. It's done. :) --- stem/descriptor/__init__.py | 2 +- stem/descriptor/descriptor.py | 58 +++++++++++++++++++++++++++++++++++++++++ stem/descriptor/reader.py | 38 ++++++++++++++++---------- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index fef6750..33970ef 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -2,5 +2,5 @@ Utilities for parsing and processing descriptor data. """ -__all__ = ["reader"] +__all__ = ["descriptor", "reader"] diff --git a/stem/descriptor/descriptor.py b/stem/descriptor/descriptor.py new file mode 100644 index 0000000..90d3bed --- /dev/null +++ b/stem/descriptor/descriptor.py @@ -0,0 +1,58 @@ +""" +Common functionality for descriptors. +""" + +def parse_descriptors(path, descriptor_file): + """ + Provides an iterator for the descriptors within a given file. + + Arguments: + path (str) - absolute path to the file's location on disk + descriptor_file (file) - opened file with the descriptor contents + + Returns: + iterator that parses the file's contents into descriptors + + Raises: + TypeError if we can't match the contents of the file to a descriptor type + IOError if unable to read from the descriptor_file + """ + + # TODO: implement actual descriptor type recognition and parsing + # TODO: add integ test for non-descriptor text content + yield Descriptor(path, descriptor_file.read()) + +class Descriptor: + """ + Common parent for all types of descriptors. + """ + + def __init__(self, path, raw_contents): + self._path = path + self._raw_contents = raw_contents + + def get_path(self): + """ + Provides the absolute path that we loaded this descriptor from. + + Returns: + str with the absolute path of the descriptor source + """ + + return self._path + + def get_unrecognized_lines(self): + """ + Provides a list of lines that were either ignored or had data that we did + not know how to process. This is most common due to new descriptor fields + that this library does not yet know how to process. Patches welcome! + + Returns: + list of lines of unrecognized content + """ + + return [] + + def __str__(self): + return self._raw_contents + diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py index 8117361..3b6f5fd 100644 --- a/stem/descriptor/reader.py +++ b/stem/descriptor/reader.py @@ -70,6 +70,8 @@ import threading import mimetypes import Queue +import stem.descriptor.descriptor + # flag to indicate when the reader thread is out of descriptor files to read FINISHED = "DONE" @@ -362,25 +364,31 @@ class DescriptorReader: def _handle_descriptor_file(self, target): try: - # TODO: replace with actual descriptor parsing when we have it - target_file = open(target) - self._enqueue_descriptor(target_file.read()) - target_file.close() - - self._iter_notice.set() + with open(target) as target_file: + for desc in stem.descriptor.descriptor.parse_descriptors(target, target_file): + self._enqueue_descriptor(desc) + self._iter_notice.set() + except TypeError, exc: + self._notify_skip_listeners(target, ParsingFailure(exc)) except IOError, exc: self._notify_skip_listeners(target, ReadFailed(exc)) def _handle_archive(self, target): - with tarfile.open(target) as tar_file: - for tar_entry in tar_file: - if tar_entry.isfile(): - # TODO: replace with actual descriptor parsing when we have it - entry = tar_file.extractfile(tar_entry) - self._enqueue_descriptor(entry.read()) - entry.close() - - self._iter_notice.set() + try: + with tarfile.open(target) as tar_file: + for tar_entry in tar_file: + if tar_entry.isfile(): + entry = tar_file.extractfile(tar_entry) + + for desc in stem.descriptor.descriptor.parse_descriptors(target, entry): + self._enqueue_descriptor(desc) + self._iter_notice.set() + + entry.close() + except TypeError, exc: + self._notify_skip_listeners(target, ParsingFailure(exc)) + except IOError, exc: + self._notify_skip_listeners(target, ReadFailed(exc)) def _enqueue_descriptor(self, descriptor): # blocks until their is either room for the descriptor or we're stopped
participants (1)
-
atagar@torproject.org