[tor-commits] [bridgedb/master] Read router status entries as bytes

phw at torproject.org phw at torproject.org
Wed Feb 19 18:26:37 UTC 2020


commit 8db4ba1efcdfa8dbe72e1c237eb45fef0f91d79b
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat Jan 11 13:41:20 2020 -0800

    Read router status entries as bytes
    
    Stem expects descriptors to be bytes. This fixes the following errors...
    
      Traceback (most recent call last):
        File "/home/atagar/Desktop/tor/bridgedb/bridgedb/test/test_parse_descriptors.py", line 392, in test_parse_descriptors_parseNetworkStatusFile_bad_nickname
        ...
        File "/home/atagar/Desktop/tor/bridgedb/bridgedb/parse/descriptors.py", line 135, in parseNetworkStatusFile
          routers.extend(list(document))
        File "/usr/local/lib/python3.5/dist-packages/stem/descriptor/router_status_entry.py", line 101, in _parse_file
          desc_content = bytes.join(b'', desc_lines)
      builtins.TypeError: sequence item 0: expected a bytes-like object, str found
    
    Just fixing that caused the tests to hang due to an infinite loop in descriptor
    parsing. Unsure why Isis implemented it that way so simply pulling that out.
    
    Test results changed as follows...
    
      before: FAILED (skips=103, failures=9, errors=189, successes=366)
      after:  FAILED (skips=103, failures=6, errors=183, successes=375)
---
 bridgedb/parse/descriptors.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/bridgedb/parse/descriptors.py b/bridgedb/parse/descriptors.py
index 1b01023..ff32226 100644
--- a/bridgedb/parse/descriptors.py
+++ b/bridgedb/parse/descriptors.py
@@ -122,10 +122,10 @@ def parseNetworkStatusFile(filename, validate=True, skipAnnotations=True,
     routers = []
 
     logging.info("Parsing networkstatus file: %s" % filename)
-    with open(filename) as fh:
+    with open(filename, 'rb') as fh:
         position = fh.tell()
         if skipAnnotations:
-            while not fh.readline().startswith('r '):
+            while not fh.readline().startswith(b'r '):
                 position = fh.tell()
         logging.debug("Skipping %d bytes of networkstatus file." % position)
         fh.seek(position)
@@ -161,20 +161,8 @@ def parseServerDescriptorsFile(filename, validate=True):
     logging.info("Parsing server descriptors with Stem: %s" % filename)
     descriptorType = 'server-descriptor 1.0'
     document = parse_file(filename, descriptorType, validate=validate)
-    routers = list()
+    return list(document)
 
-    # Work around https://bugs.torproject.org/26023 by parsing each descriptor
-    # at a time and catching any errors not handled in stem:
-    while True:
-        try:
-            routers.append(document.next())
-        except StopIteration:
-            break
-        except Exception as error:
-            logging.debug("Error while parsing a bridge server descriptor: %s"
-                          % error)
-
-    return routers
 
 def __cmp_published__(x, y):
     """A custom ``cmp()`` which sorts descriptors by published date.





More information about the tor-commits mailing list