[tor-commits] [stem/master] Replace parse_bytes() with a from_str() method

atagar at torproject.org atagar at torproject.org
Tue Nov 20 21:44:28 UTC 2018


commit 2192228e436fce49a2abfa6d44242e407f15d8dc
Author: Damian Johnson <atagar at torproject.org>
Date:   Tue Nov 20 13:29:43 2018 -0800

    Replace parse_bytes() with a from_str() method
    
    Shifting to the same pattern we used with the stem.response.ControlMessage
    method...
    
      https://stem.torproject.org/api/response.html#stem.response.ControlMessage.from_str
    
    Also making this provide a single descriptor by default (the more common use
    case) with a 'multiple = True' option, and tests.
---
 docs/change_log.rst                |  3 +-
 stem/descriptor/__init__.py        | 74 ++++++++++++++++++++++++++------------
 test/settings.cfg                  |  1 +
 test/unit/descriptor/descriptor.py | 51 ++++++++++++++++++++++++++
 4 files changed, 106 insertions(+), 23 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 2fb3ae7e..e99e933f 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -51,7 +51,8 @@ The following are only available within Stem's `git repository
 
  * **Descriptors**
 
-  * Added :func:`~stem.descriptor.Descriptor.type_annotation` method (:trac:`28397`)
+  * Added :func:`~stem.descriptor.__init__.Descriptor.from_str` method (:trac:`28450`)
+  * Added :func:`~stem.descriptor.__init__.Descriptor.type_annotation` method (:trac:`28397`)
   * Added the **hash_type** and **encoding** arguments to `ServerDescriptor <api/descriptor/server_descriptor.html#stem.descriptor.server_descriptor.ServerDescriptor.digest>`_ and `ExtraInfo's <api/descriptor/extrainfo_descriptor.html#stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor.digest>`_ digest methods (:trac:`28398`)
   * Added the network status vote's new bandwidth_file_digest attribute (:spec:`1b686ef`)
   * Added :func:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3.is_valid` and :func:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3.is_fresh` methods (:trac:`28448`)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index a003d603..17bea678 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -8,12 +8,12 @@ Package for parsing and processing descriptor data.
 
 ::
 
-  parse_bytes - Parses the descriptors in a :class:`bytes`.
   parse_file - Parses the descriptors in a file.
   create - Creates a new custom descriptor.
   create_signing_key - Cretes a signing key that can be used for creating descriptors.
 
   Descriptor - Common parent for all descriptor file types.
+    |- from_str - provides a parsed descriptor for the given string
     |- get_path - location of the descriptor on disk if it came from a file
     |- get_archive_path - location of the descriptor within the archive it came from
     |- get_bytes - similar to str(), but provides our original bytes content
@@ -117,16 +117,15 @@ __all__ = [
   'networkstatus',
   'router_status_entry',
   'tordnsel',
-  'parse_bytes',
   'parse_file',
   'Descriptor',
 ]
 
 UNSEEKABLE_MSG = """\
-File object isn't seekable. Try using parse_bytes() instead:
+File object isn't seekable. Try using Descriptor.from_str() instead:
 
   content = my_file.read()
-  parsed_descriptors = stem.descriptor.parse_bytes(content)
+  parsed_descriptors = stem.descriptor.Descriptor.from_str(content)
 """
 
 KEYWORD_CHAR = 'a-zA-Z0-9-'
@@ -195,24 +194,6 @@ class SigningKey(collections.namedtuple('SigningKey', ['private', 'public', 'pub
   """
 
 
-def parse_bytes(descriptor_bytes, **kwargs):
-  """
-  Read the descriptor contents from a :class:`bytes`, providing an iterator
-  for its :class:`~stem.descriptor.__init__.Descriptor` contents.
-
-  :param bytes descriptor_bytes: Raw descriptor
-  :param dict kwargs: Keyword arguments as used for :func:`parse_file`.
-
-  :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
-
-  :raises:
-    * **ValueError** if the contents is malformed and validate is True
-    * **TypeError** if we can't match the contents of the file to a descriptor type
-    * **IOError** if unable to read from the descriptor_file
-  """
-  return parse_file(io.BytesIO(descriptor_bytes), **kwargs)
-
-
 def parse_file(descriptor_file, descriptor_type = None, validate = False, document_handler = DocumentHandler.ENTRIES, normalize_newlines = None, **kwargs):
   """
   Simple function to read the descriptor contents from a file, providing an
@@ -721,6 +702,55 @@ class Descriptor(object):
     self._unrecognized_lines = []
 
   @classmethod
+  def from_str(cls, content, **kwargs):
+    """
+    Provides a :class:`~stem.descriptor.__init__.Descriptor` for the given content.
+
+    To parse a descriptor we must know its type. There are three ways to
+    convey this...
+
+    ::
+
+      # use a descriptor_type argument
+      desc = Descriptor.from_str(content, descriptor_type = 'server-descriptor 1.0')
+
+      # prefixing the content with a "@type" annotation
+      desc = Descriptor.from_str('@type server-descriptor 1.0\\n' + content)
+
+      # use this method from a subclass
+      desc = stem.descriptor.server_descriptor.RelayDescriptor.from_str(content)
+
+    .. versionadded:: 1.8.0
+
+    :param bytes content: string to construct the descriptor from
+    :param bool multiple: if provided with **True** this provides a list of
+      descriptors rather than a single one
+    :param dict kwargs: additional arguments for :func:`~stem.descriptor.__init__.parse_file`
+
+    :returns: :class:`~stem.descriptor.__init__.Descriptor` subclass for the
+      given content, or a **list** of descriptors if **multiple = True** is
+      provided
+
+    :raises:
+      * **ValueError** if the contents is malformed and validate is True
+      * **TypeError** if we can't match the contents of the file to a descriptor type
+      * **IOError** if unable to read from the descriptor_file
+    """
+
+    if 'descriptor_type' not in kwargs and cls.TYPE_ANNOTATION_NAME is not None:
+      kwargs['descriptor_type'] = str(TypeAnnotation(cls.TYPE_ANNOTATION_NAME, 1, 0))[6:]
+
+    is_multiple = kwargs.pop('multiple', False)
+    results = list(parse_file(io.BytesIO(content), **kwargs))
+
+    if is_multiple:
+      return results
+    elif len(results) == 1:
+      return results[0]
+    else:
+      raise ValueError("Descriptor.from_str() expected a single descriptor, but had %i instead. Please include 'multiple = True' if you want a list of results instead." % len(results))
+
+  @classmethod
   def content(cls, attr = None, exclude = (), sign = False):
     """
     Creates descriptor content with the given attributes. Mandatory fields are
diff --git a/test/settings.cfg b/test/settings.cfg
index 8423614b..727bfc80 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -207,6 +207,7 @@ test.unit_tests
 |test.unit.util.tor_tools.TestTorTools
 |test.unit.util.__init__.TestBaseUtil
 |test.unit.installation.TestInstallation
+|test.unit.descriptor.descriptor.TestDescriptor
 |test.unit.descriptor.export.TestExport
 |test.unit.descriptor.reader.TestDescriptorReader
 |test.unit.descriptor.remote.TestDescriptorDownloader
diff --git a/test/unit/descriptor/descriptor.py b/test/unit/descriptor/descriptor.py
new file mode 100644
index 00000000..cedb3832
--- /dev/null
+++ b/test/unit/descriptor/descriptor.py
@@ -0,0 +1,51 @@
+"""
+Unit tests for the base stem.descriptor module.
+"""
+
+import unittest
+
+from stem.descriptor import Descriptor
+from stem.descriptor.server_descriptor import RelayDescriptor
+
+
+class TestDescriptor(unittest.TestCase):
+  def test_from_str(self):
+    """
+    Basic exercise for Descriptor.from_str().
+    """
+
+    desc_text = RelayDescriptor.content({'router': 'caerSidi 71.35.133.197 9001 0 0'})
+    desc = Descriptor.from_str(desc_text, descriptor_type = 'server-descriptor 1.0')
+    self.assertEqual('caerSidi', desc.nickname)
+
+  def test_from_str_type_handling(self):
+    """
+    Check our various methods of conveying the descriptor type. There's three:
+    @type annotations, a descriptor_type argument, and using the from_str() of
+    a particular subclass.
+    """
+
+    desc_text = RelayDescriptor.content({'router': 'caerSidi 71.35.133.197 9001 0 0'})
+
+    desc = Descriptor.from_str(desc_text, descriptor_type = 'server-descriptor 1.0')
+    self.assertEqual('caerSidi', desc.nickname)
+
+    desc = Descriptor.from_str('@type server-descriptor 1.0\n' + desc_text)
+    self.assertEqual('caerSidi', desc.nickname)
+
+    desc = RelayDescriptor.from_str(desc_text)
+    self.assertEqual('caerSidi', desc.nickname)
+
+    self.assertRaisesWith(TypeError, "Unable to determine the descriptor's type. filename: '<undefined>', first line: 'router caerSidi 71.35.133.197 9001 0 0'", Descriptor.from_str, desc_text)
+
+  def test_from_str_multiple(self):
+    desc_text = '\n'.join((
+      '@type server-descriptor 1.0',
+      RelayDescriptor.content({'router': 'relay1 71.35.133.197 9001 0 0'}),
+      RelayDescriptor.content({'router': 'relay2 71.35.133.197 9001 0 0'}),
+    ))
+
+    self.assertEqual(2, len(RelayDescriptor.from_str(desc_text, multiple = True)))
+    self.assertEqual(0, len(RelayDescriptor.from_str('', multiple = True)))
+
+    self.assertRaisesWith(ValueError, "Descriptor.from_str() expected a single descriptor, but had 2 instead. Please include 'multiple = True' if you want a list of results instead.", RelayDescriptor.from_str, desc_text)





More information about the tor-commits mailing list