commit 3d3e69417975499fbe5d2a3eb591d808c6800874 Author: Damian Johnson atagar@torproject.org Date: Sat Jan 19 15:53:06 2013 -0800
Always processing @type headers
The parse_file() function only consumed the @type annotation when we didn't have a descriptor_type nor recognized filename. Changing it so we always consume the @type header.
I'm also changing the priority order of parse_file() so if the file could be both a metrics archive or cached descriptor (for instance a file named 'cached-consensus' that starts with a @type annotation) then we process it as a metrics archive. --- stem/descriptor/__init__.py | 41 ++++++++++++++++++-------------- test/integ/descriptor/networkstatus.py | 26 +++++++++++-------- 2 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 7280222..f1fb86e 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -48,12 +48,12 @@ def parse_file(descriptor_file, descriptor_type = None, path = None): If you don't provide a **descriptor_type** argument then this automatically tries to determine the descriptor type based on the following...
- * The filename if it matches something from tor's data directory. For - instance, tor's 'cached-descriptors' contains server descriptors. - * The @type annotation on the first line. These are generally only found in the `descriptor archives https://metrics.torproject.org`_.
+ * The filename if it matches something from tor's data directory. For + instance, tor's 'cached-descriptors' contains server descriptors. + This is a handy function for simple usage, but if you're reading multiple descriptor files you might want to consider the :class:`~stem.descriptor.reader.DescriptorReader`. @@ -101,7 +101,12 @@ def parse_file(descriptor_file, descriptor_type = None, path = None): # by an annotation on their first line... # https://trac.torproject.org/5651
- # Cached descriptor handling. These contain multiple descriptors per file. + initial_position = descriptor_file.tell() + first_line = descriptor_file.readline().strip() + metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line) + + if not metrics_header_match: + descriptor_file.seek(initial_position)
filename = '<undefined>' if path is None else os.path.basename(path) file_parser = None @@ -114,22 +119,22 @@ def parse_file(descriptor_file, descriptor_type = None, path = None): file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f) else: raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'") - elif filename == "cached-descriptors": - file_parser = stem.descriptor.server_descriptor._parse_file - elif filename == "cached-extrainfo": - file_parser = stem.descriptor.extrainfo_descriptor._parse_file - elif filename == "cached-consensus": - file_parser = stem.descriptor.networkstatus._parse_file - elif filename == "cached-microdesc-consensus": - file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True) - else: + elif metrics_header_match: # Metrics descriptor handling - first_line, desc = descriptor_file.readline().strip(), None - metrics_header_match = re.match("^@type (\S+) (\d+).(\d+)$", first_line)
- if metrics_header_match: - desc_type, major_version, minor_version = metrics_header_match.groups() - file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f) + desc_type, major_version, minor_version = metrics_header_match.groups() + file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f) + else: + # Cached descriptor handling. These contain multiple descriptors per file. + + if filename == "cached-descriptors": + file_parser = stem.descriptor.server_descriptor._parse_file + elif filename == "cached-extrainfo": + file_parser = stem.descriptor.extrainfo_descriptor._parse_file + elif filename == "cached-consensus": + file_parser = stem.descriptor.networkstatus._parse_file + elif filename == "cached-microdesc-consensus": + file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True)
if file_parser: for desc in file_parser(descriptor_file): diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py index c654158..035289d 100644 --- a/test/integ/descriptor/networkstatus.py +++ b/test/integ/descriptor/networkstatus.py @@ -117,17 +117,21 @@ class TestNetworkStatus(unittest.TestCase):
consensus_path = test.integ.descriptor.get_resource("metrics_consensus")
- with open(consensus_path) as descriptor_file: - descriptors = stem.descriptor.parse_file(descriptor_file, path = consensus_path) - - router = next(descriptors) - self.assertEquals("sumkledi", router.nickname) - self.assertEquals("0013D22389CD50D0B784A3E4061CB31E8CE8CEB5", router.fingerprint) - self.assertEquals("8mCr8Sl7RF4ENU4jb0FZFA/3do8", router.digest) - self.assertEquals(datetime.datetime(2012, 7, 12, 4, 1, 55), router.published) - self.assertEquals("178.218.213.229", router.address) - self.assertEquals(80, router.or_port) - self.assertEquals(None, router.dir_port) + for specify_type in (True, False): + with open(consensus_path) as descriptor_file: + if specify_type: + descriptors = stem.descriptor.parse_file(descriptor_file, "network-status-consensus-3 1.0", path = consensus_path) + else: + descriptors = stem.descriptor.parse_file(descriptor_file, path = consensus_path) + + router = next(descriptors) + self.assertEquals("sumkledi", router.nickname) + self.assertEquals("0013D22389CD50D0B784A3E4061CB31E8CE8CEB5", router.fingerprint) + self.assertEquals("8mCr8Sl7RF4ENU4jb0FZFA/3do8", router.digest) + self.assertEquals(datetime.datetime(2012, 7, 12, 4, 1, 55), router.published) + self.assertEquals("178.218.213.229", router.address) + self.assertEquals(80, router.or_port) + self.assertEquals(None, router.dir_port)
def test_metrics_bridge_consensus(self): """
tor-commits@lists.torproject.org