[tor-commits] [stem/master] Utilising metrics descriptor type annotations

atagar at torproject.org atagar at torproject.org
Fri May 18 17:28:50 UTC 2012


commit 301401360337e4c02a5fd5e4e8520cc1ecf88633
Author: Damian Johnson <atagar at torproject.org>
Date:   Fri May 18 10:25:49 2012 -0700

    Utilising metrics descriptor type annotations
    
    Using the @type annotations added by Karsten to the metrics descriptors as per
    ticket 5651.
---
 stem/descriptor/__init__.py                        |   52 ++++++++++++++------
 stem/descriptor/server_descriptor.py               |   14 -----
 test/integ/descriptor/data/bridge_descriptor       |    1 +
 test/integ/descriptor/data/cr_in_contact_line      |    1 +
 test/integ/descriptor/data/descriptor_archive.tar  |  Bin 20480 -> 20480 bytes
 .../descriptor/data/descriptor_archive.tar.bz2     |  Bin 3322 -> 3352 bytes
 .../descriptor/data/descriptor_archive.tar.gz      |  Bin 2844 -> 2874 bytes
 test/integ/descriptor/data/example_descriptor      |    1 +
 test/integ/descriptor/data/extrainfo_descriptor    |    1 +
 test/integ/descriptor/data/negative_uptime         |    1 +
 test/integ/descriptor/data/non-ascii_descriptor    |    1 +
 test/integ/descriptor/data/old_descriptor          |    1 +
 test/integ/descriptor/extrainfo_descriptor.py      |    1 +
 test/integ/descriptor/reader.py                    |    2 +
 test/integ/descriptor/server_descriptor.py         |    6 ++
 15 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index f3a5983..1de1dac 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -35,30 +35,50 @@ def parse_file(path, descriptor_file):
     IOError if unable to read from the descriptor_file
   """
   
-  import stem.descriptor.extrainfo_descriptor
   import stem.descriptor.server_descriptor
+  import stem.descriptor.extrainfo_descriptor
   
   # The tor descriptor specifications do not provide a reliable method for
   # identifying a descriptor file's type and version so we need to guess
-  # based on...
-  # - its filename for resources from the tor data directory
-  # - first line of our contents for files provided by metrics
+  # based on its filename. Metrics descriptors, however, can be identified
+  # by an annotation on their first line...
+  # https://trac.torproject.org/5651
   
-  filename = os.path.basename(path)
-  first_line = descriptor_file.readline()
-  descriptor_file.seek(0)
+  # Cached descriptor handling. These contain mulitple descriptors per file.
   
-  if filename == "cached-descriptors" or first_line.startswith("router "):
-    for desc in stem.descriptor.server_descriptor.parse_file(descriptor_file):
-      desc._set_path(path)
-      yield desc
-  elif filename == "cached-extrainfo" or first_line.startswith("extra-info "):
-    for desc in stem.descriptor.extrainfo_descriptor.parse_file(descriptor_file):
+  filename, file_parser = os.path.basename(path), None
+  
+  if filename == "cached-descriptors":
+    file_parser = stem.descriptor.server_descriptor.parse_file
+  elif filename == "cached-extrainfo":
+    file_parser = stem.descriptor.extrainfo_descriptor.parse_file
+  
+  if file_parser:
+    for desc in file_parser(descriptor_file):
       desc._set_path(path)
       yield desc
-  else:
-    # unrecognized descriptor type
-    raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
+    
+    return
+  
+  # Metrics descriptor handling. These contain a single descriptor per file.
+  
+  first_line, desc = descriptor_file.readline().strip(), None
+  
+  if first_line == "@type server-descriptor 1.0":
+    desc = stem.descriptor.server_descriptor.RelayDescriptor(descriptor_file.read())
+  elif first_line == "@type bridge-server-descriptor 1.0":
+    desc = stem.descriptor.server_descriptor.BridgeDescriptor(descriptor_file.read())
+  elif first_line in ("@type extra-info 1.0", "@type bridge-extra-info 1.0"):
+    desc = stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor(descriptor_file.read())
+  
+  if desc:
+    desc._set_path(path)
+    yield desc
+    return
+  
+  # Not recognized as a descriptor file.
+  
+  raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
 
 class Descriptor:
   """
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index d347b15..9a91d60 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -80,20 +80,6 @@ def parse_file(descriptor_file, validate = True):
     IOError if the file can't be read
   """
   
-  # Handler for bridge descriptors
-  #
-  # Bridge descriptors are scrubbed so their nickname is 'Unnamed' and their
-  # ip address is in the 10.x.x.x space, which is normally reserved for private
-  # networks. Bridge descriptors only come from metrics so a file only contains
-  # a single descriptor.
-  
-  first_line = descriptor_file.readline()
-  descriptor_file.seek(0)
-  
-  if first_line.startswith("router Unnamed 10."):
-    yield BridgeDescriptor(descriptor_file.read())
-    return
-  
   # Handler for relay descriptors
   #
   # Cached descriptors consist of annotations followed by the descriptor
diff --git a/test/integ/descriptor/data/bridge_descriptor b/test/integ/descriptor/data/bridge_descriptor
index d28ccfa..07dffe2 100644
--- a/test/integ/descriptor/data/bridge_descriptor
+++ b/test/integ/descriptor/data/bridge_descriptor
@@ -1,3 +1,4 @@
+ at type bridge-server-descriptor 1.0
 router Unnamed 10.45.227.253 9001 0 0
 or-address [fd9f:2e19:3bcf::02:9970]:9001
 platform Tor 0.2.3.12-alpha (git-800942b4176ca31c) on Linux x86_64
diff --git a/test/integ/descriptor/data/cr_in_contact_line b/test/integ/descriptor/data/cr_in_contact_line
index f1d93fb..ca6636c 100644
--- a/test/integ/descriptor/data/cr_in_contact_line
+++ b/test/integ/descriptor/data/cr_in_contact_line
@@ -1,3 +1,4 @@
+ at type server-descriptor 1.0
 router pogonip 75.5.248.48 9001 0 0
 platform Tor 0.1.2.17 on Darwin i386
 published 2007-09-03 10:15:53
diff --git a/test/integ/descriptor/data/descriptor_archive.tar b/test/integ/descriptor/data/descriptor_archive.tar
index 2c40716..55d29d1 100644
Binary files a/test/integ/descriptor/data/descriptor_archive.tar and b/test/integ/descriptor/data/descriptor_archive.tar differ
diff --git a/test/integ/descriptor/data/descriptor_archive.tar.bz2 b/test/integ/descriptor/data/descriptor_archive.tar.bz2
index ba1f239..2950c57 100644
Binary files a/test/integ/descriptor/data/descriptor_archive.tar.bz2 and b/test/integ/descriptor/data/descriptor_archive.tar.bz2 differ
diff --git a/test/integ/descriptor/data/descriptor_archive.tar.gz b/test/integ/descriptor/data/descriptor_archive.tar.gz
index 63a6a57..325adb7 100644
Binary files a/test/integ/descriptor/data/descriptor_archive.tar.gz and b/test/integ/descriptor/data/descriptor_archive.tar.gz differ
diff --git a/test/integ/descriptor/data/example_descriptor b/test/integ/descriptor/data/example_descriptor
index 5ab4f57..6ee442c 100644
--- a/test/integ/descriptor/data/example_descriptor
+++ b/test/integ/descriptor/data/example_descriptor
@@ -1,3 +1,4 @@
+ at type server-descriptor 1.0
 router caerSidi 71.35.133.197 9001 0 0
 platform Tor 0.2.1.30 on Linux x86_64
 opt protocols Link 1 2 Circuit 1
diff --git a/test/integ/descriptor/data/extrainfo_descriptor b/test/integ/descriptor/data/extrainfo_descriptor
index 4525afe..ecaabe0 100644
--- a/test/integ/descriptor/data/extrainfo_descriptor
+++ b/test/integ/descriptor/data/extrainfo_descriptor
@@ -1,3 +1,4 @@
+ at type extra-info 1.0
 extra-info NINJA B2289C3EAB83ECD6EB916A2F481A02E6B76A0A48
 published 2012-05-05 17:03:50
 write-history 2012-05-05 17:02:45 (900 s) 1082368,19456,50176,272384,485376,1850368,1132544,1790976,2459648,4091904,6310912,13701120,3209216,3871744,7873536,5440512,7287808,10561536,9979904,11247616,11982848,7590912,10611712,20728832,38534144,6839296,3173376,16678912
diff --git a/test/integ/descriptor/data/negative_uptime b/test/integ/descriptor/data/negative_uptime
index 4140d75..388d44b 100644
--- a/test/integ/descriptor/data/negative_uptime
+++ b/test/integ/descriptor/data/negative_uptime
@@ -1,3 +1,4 @@
+ at type server-descriptor 1.0
 router TipTor 62.99.247.83 9001 0 9030
 platform Tor 0.1.1.25 on Darwin Power Macintosh
 published 2006-12-18 22:42:40
diff --git a/test/integ/descriptor/data/non-ascii_descriptor b/test/integ/descriptor/data/non-ascii_descriptor
index 7758276..2cd2a6b 100644
--- a/test/integ/descriptor/data/non-ascii_descriptor
+++ b/test/integ/descriptor/data/non-ascii_descriptor
@@ -1,3 +1,4 @@
+ at type server-descriptor 1.0
 router torrelay389752132 130.243.230.116 9001 0 0
 platform Tor 0.2.2.35 (git-4f42b0a93422f70e) on Linux x86_64
 opt protocols Link 1 2 Circuit 1
diff --git a/test/integ/descriptor/data/old_descriptor b/test/integ/descriptor/data/old_descriptor
index 5407aa8..8be82f9 100644
--- a/test/integ/descriptor/data/old_descriptor
+++ b/test/integ/descriptor/data/old_descriptor
@@ -1,3 +1,4 @@
+ at type server-descriptor 1.0
 router krypton 212.37.39.59 8000 0 0
 platform Tor 0.1.0.14 on FreeBSD i386
 published 2005-12-16 18:01:03
diff --git a/test/integ/descriptor/extrainfo_descriptor.py b/test/integ/descriptor/extrainfo_descriptor.py
index 479ff01..a753d27 100644
--- a/test/integ/descriptor/extrainfo_descriptor.py
+++ b/test/integ/descriptor/extrainfo_descriptor.py
@@ -19,6 +19,7 @@ class TestExtraInfoDescriptor(unittest.TestCase):
     descriptor_path = test.integ.descriptor.get_resource("extrainfo_descriptor")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index 81635e0..0ae882e 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -51,6 +51,7 @@ def _get_raw_tar_descriptors():
       for tar_entry in tar_file:
         if tar_entry.isfile():
           entry = tar_file.extractfile(tar_entry)
+          entry.readline() # strip header
           raw_descriptors.append(entry.read())
           entry.close()
     
@@ -149,6 +150,7 @@ class TestDescriptorReader(unittest.TestCase):
     
     descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor")
     with open(descriptor_path) as descriptor_file:
+      descriptor_file.readline() # strip header
       descriptor_entries.append(descriptor_file.read())
     
     # running this test multiple times to flush out concurrency issues
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index 5d7e62d..afe3c93 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -21,6 +21,7 @@ class TestServerDescriptor(unittest.TestCase):
     descriptor_path = test.integ.descriptor.get_resource("example_descriptor")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
@@ -92,6 +93,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     descriptor_path = test.integ.descriptor.get_resource("old_descriptor")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
@@ -176,6 +178,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     descriptor_path = test.integ.descriptor.get_resource("non-ascii_descriptor")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
@@ -217,6 +220,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     descriptor_path = test.integ.descriptor.get_resource("cr_in_contact_line")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
@@ -244,6 +248,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     descriptor_path = test.integ.descriptor.get_resource("negative_uptime")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     
@@ -267,6 +272,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     descriptor_path = test.integ.descriptor.get_resource("bridge_descriptor")
     
     descriptor_file = open(descriptor_path)
+    descriptor_file.readline() # strip header
     descriptor_contents = descriptor_file.read()
     descriptor_file.close()
     



More information about the tor-commits mailing list