[tor-commits] [stem/master] Drop server descriptor annotations

Mon Feb 10 03:14:50 UTC 2020

commit fb82decc3af768f480db7fe7e93c9ce258110567
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Feb 6 13:40:29 2020 -0800

    Drop server descriptor annotations
    
    Server descriptor files within tor's data directory begin with a couple
    annotations. This is the only place where these annotations exist, and
    is not part of tor's dir-spec.
    
    I parsed these annotations for completeness when I first authored Stem.
    However, we effectively never read these files nowadays for a few reasons...
    
      1. Stem's remote and collector modules provide better methods for getting
         descriptors.
    
      2. When tor is running locally we can get this information via GETINFO rather
         than reading directly from disk.
    
      3. Tor is trying to discourage us from reading the data directory from disk
         by restricting its permissions.
    
    This isn't to say there aren't merits to reading on-disk descriptors. Nyx does
    so, for instance, because it's a passive listener (no downloading allowed)
    and massive GETINFO responses noticably lag the control port.
    
    However, reading on-disk descriptors is rare and even in this edge case I'm
    unaware of anyone using the annotations.
---
 stem/descriptor/server_descriptor.py      | 80 ++++++-------------------------
 test/unit/descriptor/remote.py            |  4 +-
 test/unit/descriptor/server_descriptor.py | 28 -----------
 3 files changed, 15 insertions(+), 97 deletions(-)

diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 84ba8b65..5236c4db 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -26,9 +26,7 @@ etc). This information is provided from a few sources...
     |  |- is_scrubbed - checks if our content has been properly scrubbed
     |  +- get_scrubbing_issues - description of issues with our scrubbing
     |
-    |- digest - calculates the upper-case hex digest value for our content
-    |- get_annotations - dictionary of content prior to the descriptor entry
-    +- get_annotation_lines - lines that provided the annotations
+    +- digest - calculates the upper-case hex digest value for our content
 
 .. data:: BridgeDistribution (enum)
 
@@ -190,10 +188,14 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
   # to the caller).
 
   while True:
-    annotations = _read_until_keywords('router', descriptor_file)
-    annotations = map(bytes.strip, annotations)                      # strip newlines
-    annotations = map(stem.util.str_tools._to_unicode, annotations)  # convert to unicode
-    annotations = list(filter(lambda x: x != '', annotations))       # drop any blanks
+    # skip annotations
+
+    while True:
+      pos = descriptor_file.tell()
+
+      if not descriptor_file.readline().startswith(b'@'):
+        descriptor_file.seek(pos)
+        break
 
     if not is_bridge:
       descriptor_content = _read_until_keywords('router-signature', descriptor_file)
@@ -212,13 +214,10 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
       descriptor_text = bytes.join(b'', descriptor_content)
 
       if is_bridge:
-        yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs)
+        yield BridgeDescriptor(descriptor_text, validate, **kwargs)
       else:
-        yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs)
+        yield RelayDescriptor(descriptor_text, validate, **kwargs)
     else:
-      if validate and annotations:
-        raise ValueError('Content conform to being a server descriptor:\n%s' % '\n'.join(annotations))
-
       break  # done parsing descriptors
 
 
@@ -590,7 +589,7 @@ class ServerDescriptor(Descriptor):
     'eventdns': _parse_eventdns_line,
   }
 
-  def __init__(self, raw_contents, validate = False, annotations = None):
+  def __init__(self, raw_contents, validate = False):
     """
     Server descriptor constructor, created from an individual relay's
     descriptor content (as provided by 'GETINFO desc/*', cached descriptors,
@@ -603,13 +602,11 @@ class ServerDescriptor(Descriptor):
     :param str raw_contents: descriptor content provided by the relay
     :param bool validate: checks the validity of the descriptor's content if
       **True**, skips these checks otherwise
-    :param list annotations: lines that appeared prior to the descriptor
 
     :raises: **ValueError** if the contents is malformed and validate is True
     """
 
     super(ServerDescriptor, self).__init__(raw_contents, lazy_load = not validate)
-    self._annotation_lines = annotations if annotations else []
 
     # A descriptor contains a series of 'keyword lines' which are simply a
     # keyword followed by an optional value. Lines can also be followed by a
@@ -663,55 +660,6 @@ class ServerDescriptor(Descriptor):
 
     raise NotImplementedError('Unsupported Operation: this should be implemented by the ServerDescriptor subclass')
 
-  @functools.lru_cache()
-  def get_annotations(self):
-    """
-    Provides content that appeared prior to the descriptor. If this comes from
-    the cached-descriptors file then this commonly contains content like...
-
-    ::
-
-      @downloaded-at 2012-03-18 21:18:29
-      @source "173.254.216.66"
-
-    .. deprecated:: 1.8.0
-       Users very rarely read from cached descriptor files any longer. This
-       method will be removed in Stem 2.x. If you have some need for us to keep
-       this please `let me know
-       <https://trac.torproject.org/projects/tor/wiki/doc/stem/bugs>`_.
-
-    :returns: **dict** with the key/value pairs in our annotations
-    """
-
-    annotation_dict = {}
-
-    for line in self._annotation_lines:
-      if ' ' in line:
-        key, value = line.split(' ', 1)
-        annotation_dict[key] = value
-      else:
-        annotation_dict[line] = None
-
-    return annotation_dict
-
-  def get_annotation_lines(self):
-    """
-    Provides the lines of content that appeared prior to the descriptor. This
-    is the same as the
-    :func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations`
-    results, but with the unparsed lines and ordering retained.
-
-    .. deprecated:: 1.8.0
-       Users very rarely read from cached descriptor files any longer. This
-       method will be removed in Stem 2.x. If you have some need for us to keep
-       this please `let me know
-       <https://trac.torproject.org/projects/tor/wiki/doc/stem/bugs>`_.
-
-    :returns: **list** with the lines of annotation that came before this descriptor
-    """
-
-    return self._annotation_lines
-
   def _check_constraints(self, entries):
     """
     Does a basic check that the entries conform to this descriptor type's
@@ -831,8 +779,8 @@ class RelayDescriptor(ServerDescriptor):
     'router-signature': _parse_router_signature_line,
   })
 
-  def __init__(self, raw_contents, validate = False, annotations = None, skip_crypto_validation = False):
-    super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
+  def __init__(self, raw_contents, validate = False, skip_crypto_validation = False):
+    super(RelayDescriptor, self).__init__(raw_contents, validate)
 
     if validate:
       if self.fingerprint:
diff --git a/test/unit/descriptor/remote.py b/test/unit/descriptor/remote.py
index d9893535..7cdeab46 100644
--- a/test/unit/descriptor/remote.py
+++ b/test/unit/descriptor/remote.py
@@ -333,12 +333,10 @@ class TestDescriptorDownloader(unittest.TestCase):
 
     # checking via the iterator
 
-    expected_error_msg = 'Content conform to being a server descriptor:\nsome malformed stuff'
-
     descriptors = list(query)
     self.assertEqual(0, len(descriptors))
     self.assertEqual(ValueError, type(query.error))
-    self.assertEqual(expected_error_msg, str(query.error))
+    self.assertEqual("Descriptor must have a 'router' entry", str(query.error))
 
     # check via the run() method
 
diff --git a/test/unit/descriptor/server_descriptor.py b/test/unit/descriptor/server_descriptor.py
index aeb58df1..ab419575 100644
--- a/test/unit/descriptor/server_descriptor.py
+++ b/test/unit/descriptor/server_descriptor.py
@@ -705,34 +705,6 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4=
     self.assertEqual(900, desc.read_history_interval)
     self.assertEqual([], desc.read_history_values)
 
-  @patch('stem.prereq.is_crypto_available', Mock(return_value = False))
-  def test_annotations(self):
-    """
-    Checks that content before a descriptor are parsed as annotations.
-    """
-
-    desc_text = b'@pepperjack very tasty\n at mushrooms not so much\n'
-    desc_text += RelayDescriptor.content()
-    desc_text += b'\ntrailing text that should be invalid, ho hum'
-
-    # running _parse_file should provide an iterator with a single descriptor
-    desc_iter = stem.descriptor.server_descriptor._parse_file(io.BytesIO(desc_text), validate = True)
-    self.assertRaises(ValueError, list, desc_iter)
-
-    desc_text = b'@pepperjack very tasty\n at mushrooms not so much\n'
-    desc_text += RelayDescriptor.content({'router': 'caerSidi 71.35.133.197 9001 0 0'})
-    desc_iter = stem.descriptor.server_descriptor._parse_file(io.BytesIO(desc_text))
-
-    desc_entries = list(desc_iter)
-    self.assertEqual(1, len(desc_entries))
-    desc = desc_entries[0]
-
-    self.assertEqual('caerSidi', desc.nickname)
-    self.assertEqual('@pepperjack very tasty', desc.get_annotation_lines()[0])
-    self.assertEqual('@mushrooms not so much', desc.get_annotation_lines()[1])
-    self.assertEqual({'@pepperjack': 'very tasty', '@mushrooms': 'not so much'}, desc.get_annotations())
-    self.assertEqual([], desc.get_unrecognized_lines())
-
   def test_duplicate_field(self):
     """
     Constructs with a field appearing twice.