commit 8db4ba1efcdfa8dbe72e1c237eb45fef0f91d79b Author: Damian Johnson atagar@torproject.org Date: Sat Jan 11 13:41:20 2020 -0800
Read router status entries as bytes
Stem expects descriptors to be bytes. This fixes the following errors...
Traceback (most recent call last): File "/home/atagar/Desktop/tor/bridgedb/bridgedb/test/test_parse_descriptors.py", line 392, in test_parse_descriptors_parseNetworkStatusFile_bad_nickname ... File "/home/atagar/Desktop/tor/bridgedb/bridgedb/parse/descriptors.py", line 135, in parseNetworkStatusFile routers.extend(list(document)) File "/usr/local/lib/python3.5/dist-packages/stem/descriptor/router_status_entry.py", line 101, in _parse_file desc_content = bytes.join(b'', desc_lines) builtins.TypeError: sequence item 0: expected a bytes-like object, str found
Just fixing that caused the tests to hang due to an infinite loop in descriptor parsing. Unsure why Isis implemented it that way so simply pulling that out.
Test results changed as follows...
before: FAILED (skips=103, failures=9, errors=189, successes=366) after: FAILED (skips=103, failures=6, errors=183, successes=375) --- bridgedb/parse/descriptors.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/bridgedb/parse/descriptors.py b/bridgedb/parse/descriptors.py index 1b01023..ff32226 100644 --- a/bridgedb/parse/descriptors.py +++ b/bridgedb/parse/descriptors.py @@ -122,10 +122,10 @@ def parseNetworkStatusFile(filename, validate=True, skipAnnotations=True, routers = []
logging.info("Parsing networkstatus file: %s" % filename) - with open(filename) as fh: + with open(filename, 'rb') as fh: position = fh.tell() if skipAnnotations: - while not fh.readline().startswith('r '): + while not fh.readline().startswith(b'r '): position = fh.tell() logging.debug("Skipping %d bytes of networkstatus file." % position) fh.seek(position) @@ -161,20 +161,8 @@ def parseServerDescriptorsFile(filename, validate=True): logging.info("Parsing server descriptors with Stem: %s" % filename) descriptorType = 'server-descriptor 1.0' document = parse_file(filename, descriptorType, validate=validate) - routers = list() + return list(document)
- # Work around https://bugs.torproject.org/26023 by parsing each descriptor - # at a time and catching any errors not handled in stem: - while True: - try: - routers.append(document.next()) - except StopIteration: - break - except Exception as error: - logging.debug("Error while parsing a bridge server descriptor: %s" - % error) - - return routers
def __cmp_published__(x, y): """A custom ``cmp()`` which sorts descriptors by published date.