[tor-commits] [stem/master] Simplifying use of digest values for microdescriptors

atagar at torproject.org atagar at torproject.org
Mon Mar 25 03:54:22 UTC 2013


commit e10b6124e22fd9febd7bd23abcc4b820353fbf8b
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Mar 24 20:35:22 2013 -0700

    Simplifying use of digest values for microdescriptors
    
    To be useful a microdescriptor often needs to be matched against its
    corresponding router status entry. The design of microdescriptors makes this
    suck - the only way to do so is to iterate over all router status entries
    looking for one with the corresponding digest.
    
    Adding examples for doing this and making the following changes to make the
    matching easier...
    
    * changing the digest value of RouterStatusEntryMicroV3 from base64 to hex
    * adding an attribute with the hex digest to Microdescriptors
---
 stem/descriptor/microdescriptor.py          |   57 +++++++++++++++++++++++++++
 stem/descriptor/router_status_entry.py      |   27 +++++++------
 test/unit/control/controller.py             |    2 +-
 test/unit/descriptor/router_status_entry.py |   12 +++---
 4 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py
index 324ee32..4fc2c31 100644
--- a/stem/descriptor/microdescriptor.py
+++ b/stem/descriptor/microdescriptor.py
@@ -9,6 +9,56 @@ downloads server descriptors by default, opting for microdescriptors instead.
 Unlike most descriptor documents these aren't available on the metrics site
 (since they don't contain any information that the server descriptors don't).
 
+The limited information in microdescriptors make them rather clunky to use
+compared with server descriptors. For instance microdescriptors lack the
+relay's fingerprint, making it difficut to use them to look up the relay's
+other descriptors.
+
+To do so you need to match the microdescriptor's digest against its
+corresponding router status entry. For added fun as of this writing the
+controller doesn't even surface those router status entries
+(`ticket <https://trac.torproject.org/7953>`_).
+
+For instance, here's an example that prints the nickname and fignerprints of
+the exit relays.
+
+::
+
+  import os
+
+  from stem.control import Controller
+  from stem.descriptor import parse_file
+
+  with Controller.from_port(port = 9051) as controller:
+    controller.authenticate()
+
+    exit_digests = set()
+    data_dir = controller.get_conf("DataDirectory")
+
+    for desc in controller.get_microdescriptors():
+      if desc.exit_policy.is_exiting_allowed():
+        exit_digests.add(desc.digest)
+
+    print "Exit Relays:"
+
+    with open(os.path.join(data_dir, 'cached-microdesc-consensus')) as desc_file:
+      for desc in parse_file(desc_file):
+        if desc.digest in exit_digests:
+          print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
+Doing the same is trivial with server descriptors...
+
+::
+
+  from stem.descriptor import parse_file
+
+  print "Exit Relays:"
+
+  with open("/home/atagar/.tor/cached-descriptors") as desc_file:
+    for desc in parse_file(desc_file):
+      if desc.exit_policy.is_exiting_allowed():
+        print "  %s (%s)" % (desc.nickname, desc.fingerprint)
+
 **Module Overview:**
 
 ::
@@ -16,6 +66,8 @@ Unlike most descriptor documents these aren't available on the metrics site
   Microdescriptor - Tor microdescriptor.
 """
 
+import hashlib
+
 import stem.descriptor
 import stem.descriptor.router_status_entry
 import stem.exit_policy
@@ -91,6 +143,9 @@ class Microdescriptor(stem.descriptor.Descriptor):
   Microdescriptor (`descriptor specification
   <https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt>`_)
 
+  :var str digest: **\*** hex digest for this microdescriptor, this can be used
+    to match against the corresponding digest attribute of a
+    :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
   :var str onion_key: **\*** key used to encrypt EXTEND cells
   :var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
   :var list or_addresses: **\*** alternative for our address/or_port attributes, each
@@ -107,6 +162,8 @@ class Microdescriptor(stem.descriptor.Descriptor):
     super(Microdescriptor, self).__init__(raw_contents)
     raw_contents = stem.util.str_tools._to_unicode(raw_contents)
 
+    self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
+
     self.onion_key = None
     self.ntor_onion_key = None
     self.or_addresses = []
diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py
index 63cf24b..12f1c7f 100644
--- a/stem/descriptor/router_status_entry.py
+++ b/stem/descriptor/router_status_entry.py
@@ -403,7 +403,7 @@ class RouterStatusEntryMicroV3(RouterStatusEntry):
   :var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
     information that isn't yet recognized
 
-  :var str digest: **\*** router's base64 encoded router microdescriptor digest
+  :var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
 
   **\*** attribute is either required when we're parsed with validation or has
   a default value, others are left as **None** if undefined
@@ -433,7 +433,7 @@ class RouterStatusEntryMicroV3(RouterStatusEntry):
         # "m" digest
         # example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
 
-        self.digest = value
+        self.digest = _base64_to_hex(value, validate, False)
         del entries['m']
 
     RouterStatusEntry._parse(self, entries, validate)
@@ -505,10 +505,10 @@ def _parse_r_line(desc, value, validate, include_digest = True):
     return
 
   desc.nickname = r_comp[0]
-  desc.fingerprint = _decode_fingerprint(r_comp[1], validate)
+  desc.fingerprint = _base64_to_hex(r_comp[1], validate)
 
   if include_digest:
-    desc.digest = _decode_fingerprint(r_comp[2], validate)
+    desc.digest = _base64_to_hex(r_comp[2], validate)
 
   desc.address = r_comp[5]
   desc.or_port = int(r_comp[6])
@@ -687,18 +687,18 @@ def _parse_m_line(desc, value, validate):
   desc.microdescriptor_hashes.append((methods, hashes))
 
 
-def _decode_fingerprint(identity, validate):
+def _base64_to_hex(identity, validate, check_if_fingerprint = True):
   """
-  Decodes the 'identity' value found in consensuses into the more common hex
-  encoding of the relay's fingerprint. For example...
+  Decodes a base64 value to hex. For example...
 
   ::
 
-    >>> _decode_fingerprint('p1aag7VwarGxqctS7/fS0y5FU+s')
+    >>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
     'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
 
   :param str identity: encoded fingerprint from the consensus
   :param bool validate: checks validity if **True**
+  :param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
 
   :returns: **str** with the uppercase hex encoding of the relay's fingerprint
 
@@ -706,7 +706,7 @@ def _decode_fingerprint(identity, validate):
   """
 
   # trailing equal signs were stripped from the identity
-  missing_padding = 28 - len(identity)
+  missing_padding = len(identity) % 4
   identity += "=" * missing_padding
 
   fingerprint = ""
@@ -733,10 +733,11 @@ def _decode_fingerprint(identity, validate):
     char_int = char if isinstance(char, int) else ord(char)
     fingerprint += hex(char_int)[2:].zfill(2).upper()
 
-  if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
-    if not validate:
-      return None
+  if check_if_fingerprint:
+    if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
+      if not validate:
+        return None
 
-    raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
+      raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
 
   return fingerprint
diff --git a/test/unit/control/controller.py b/test/unit/control/controller.py
index 746d9ce..ccce4df 100644
--- a/test/unit/control/controller.py
+++ b/test/unit/control/controller.py
@@ -253,7 +253,7 @@ class TestControl(unittest.TestCase):
     self.assertEqual(router, self.controller.get_network_status(nickname))
 
     # Pretend to get the router status entry with its fingerprint.
-    hex_fingerprint = stem.descriptor.router_status_entry._decode_fingerprint(fingerprint, False)
+    hex_fingerprint = stem.descriptor.router_status_entry._base64_to_hex(fingerprint, False)
     self.assertEqual(router, self.controller.get_network_status(hex_fingerprint))
 
     # Mangle hex fingerprint and try again.
diff --git a/test/unit/descriptor/router_status_entry.py b/test/unit/descriptor/router_status_entry.py
index e236a75..bf966e9 100644
--- a/test/unit/descriptor/router_status_entry.py
+++ b/test/unit/descriptor/router_status_entry.py
@@ -6,7 +6,7 @@ import datetime
 import unittest
 
 from stem import Flag
-from stem.descriptor.router_status_entry import RouterStatusEntryV3, _decode_fingerprint
+from stem.descriptor.router_status_entry import RouterStatusEntryV3, _base64_to_hex
 from stem.exit_policy import MicroExitPolicy
 from stem.version import Version
 
@@ -19,7 +19,7 @@ from test.mocking import get_router_status_entry_v2, \
 class TestRouterStatusEntry(unittest.TestCase):
   def test_fingerprint_decoding(self):
     """
-    Tests for the _decode_fingerprint() helper.
+    Tests for the _base64_to_hex() helper.
     """
 
     # consensus identity field and fingerprint for caerSidi and Amunet1-5
@@ -33,12 +33,12 @@ class TestRouterStatusEntry(unittest.TestCase):
     }
 
     for arg, expected in test_values.items():
-      self.assertEqual(expected, _decode_fingerprint(arg, True))
+      self.assertEqual(expected, _base64_to_hex(arg, True))
 
     # checks with some malformed inputs
     for arg in ('', '20wYcb', '20wYcb' * 30):
-      self.assertRaises(ValueError, _decode_fingerprint, arg, True)
-      self.assertEqual(None, _decode_fingerprint(arg, False))
+      self.assertRaises(ValueError, _base64_to_hex, arg, True)
+      self.assertEqual(None, _base64_to_hex(arg, False))
 
   def test_minimal_v2(self):
     """
@@ -105,7 +105,7 @@ class TestRouterStatusEntry(unittest.TestCase):
     self.assertEqual(expected_flags, set(entry.flags))
     self.assertEqual(None, entry.version_line)
     self.assertEqual(None, entry.version)
-    self.assertEqual("aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70", entry.digest)
+    self.assertEqual("6A252497006BB9AF36A1B1B902C4D7FA2129923400DBE0101F167B1B031F63BD", entry.digest)
     self.assertEqual([], entry.get_unrecognized_lines())
 
   def test_missing_fields(self):





More information about the tor-commits mailing list