[tor-commits] [bridgedb/master] Fix descriptor parsing tests

phw at torproject.org phw at torproject.org
Wed Feb 19 18:26:37 UTC 2020


commit dc07e4e34d8b5f31f015362ee239f07c0b203a44
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Jan 13 19:07:29 2020 -0800

    Fix descriptor parsing tests
    
    Mostly just simple byte normalizaiton issues. Test results changed as
    follows...
    
      before: FAILED (skips=109, failures=24, errors=364, successes=483)
      after:  FAILED (skips=109, failures=23, errors=349, successes=499)
---
 bridgedb/parse/descriptors.py           |  24 +------
 bridgedb/test/test_parse_descriptors.py | 110 ++++++++++++++++----------------
 2 files changed, 56 insertions(+), 78 deletions(-)

diff --git a/bridgedb/parse/descriptors.py b/bridgedb/parse/descriptors.py
index ff32226..f6dee83 100644
--- a/bridgedb/parse/descriptors.py
+++ b/bridgedb/parse/descriptors.py
@@ -164,28 +164,6 @@ def parseServerDescriptorsFile(filename, validate=True):
     return list(document)
 
 
-def __cmp_published__(x, y):
-    """A custom ``cmp()`` which sorts descriptors by published date.
-
-    :rtype: int
-    :returns: Return negative if x<y, zero if x==y, positive if x>y.
-    """
-    if x.published < y.published:
-        return -1
-    elif x.published == y.published:
-        # This *shouldn't* happen. It would mean that two descriptors for
-        # the same router had the same timestamps, probably meaning there
-        # is a severely-messed up OR implementation out there. Let's log
-        # its fingerprint (no matter what!) so that we can look up its
-        # ``platform`` line in its server-descriptor and tell whoever
-        # wrote that code that they're probably (D)DOSing the Tor network.
-        logging.warn(("Duplicate descriptor with identical timestamp (%s) "
-                      "for bridge %s with fingerprint %s !") %
-                     (x.published, x.nickname, x.fingerprint))
-        return 0
-    elif x.published > y.published:
-        return 1
-
 def deduplicate(descriptors, statistics=False):
     """Deduplicate some descriptors, returning only the newest for each router.
 
@@ -215,7 +193,7 @@ def deduplicate(descriptors, statistics=False):
             duplicates[fingerprint] = [descriptor,]
 
     for fingerprint, dupes in duplicates.items():
-        dupes.sort(cmp=__cmp_published__)
+        dupes.sort(key=lambda x: x.published)
         first = dupes.pop()
         newest[fingerprint] = first
         duplicates[fingerprint] = dupes
diff --git a/bridgedb/test/test_parse_descriptors.py b/bridgedb/test/test_parse_descriptors.py
index 7fd7582..eb86a0e 100644
--- a/bridgedb/test/test_parse_descriptors.py
+++ b/bridgedb/test/test_parse_descriptors.py
@@ -36,7 +36,7 @@ else:
 from bridgedb.test.util import Benchmarker
 
 
-BRIDGE_NETWORKSTATUS_0 = '''\
+BRIDGE_NETWORKSTATUS_0 = b'''\
 r MiserLandfalls 4IsyTSCtChPhFPAnq5rD8yymlqA /GMC4lz8RXT/62v6kZNdmzSmopk 2014-11-04 06:23:22 2.215.61.223 4056 0
 a [c5fd:4467:98a7:90be:c76a:b449:8e6f:f0a7]:4055
 s Fast Guard Running Stable Valid
@@ -44,7 +44,7 @@ w Bandwidth=1678904
 p reject 1-65535
 '''
 
-BRIDGE_NETWORKSTATUS_1 = '''\
+BRIDGE_NETWORKSTATUS_1 = b'''\
 r Unmentionable BgOrX0ViP5hNsK5ZvixAuPZ6EY0 NTg9NoE5ls9KjF96Dp/UdrabZ9Y 2014-11-04 12:23:37 80.44.173.87 51691 0
 a [da14:7d1e:ba8e:60d0:b078:3f88:382b:5c70]:51690
 s Fast Guard Running Stable Valid
@@ -52,7 +52,7 @@ w Bandwidth=24361
 p reject 1-65535
 '''
 
-BRIDGE_SERVER_DESCRIPTOR = '''\
+BRIDGE_SERVER_DESCRIPTOR = b'''\
 @purpose bridge
 router MiserLandfalls 2.215.61.223 4056 0 0
 or-address [c5fd:4467:98a7:90be:c76a:b449:8e6f:f0a7]:4055
@@ -86,7 +86,7 @@ P2aB/+XQfzFBA5TaWF83coDng4OGodhwHaOx10Kn7Bg=
 -----END SIGNATURE-----
 '''
 
-BRIDGE_EXTRA_INFO_DESCRIPTOR = '''\
+BRIDGE_EXTRA_INFO_DESCRIPTOR = b'''\
 extra-info MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0
 published 2014-11-04 06:23:22
 write-history 2014-11-04 06:23:22 (900 s) 3188736,2226176,2866176
@@ -117,7 +117,7 @@ U36EY4UoN5ABPowhNZFeyr5A3vKiDr6j0hCOqYOhxPY=
 -----END SIGNATURE-----
 '''
 
-BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWER_DUPLICATE = '''\
+BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWER_DUPLICATE = b'''\
 extra-info MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0
 published 2014-11-04 08:10:25
 write-history 2014-11-04 08:10:25 (900 s) 3188736,2226176,2866176,2226176
@@ -148,7 +148,7 @@ U36EY4UoN5ABPowhNZFeyr5A3vKiDr6j0hCOqYOhxPY=
 -----END SIGNATURE-----
 '''
 
-BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE = '''\
+BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE = b'''\
 extra-info MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0
 published 2014-12-04 03:10:25
 write-history 2014-12-04 03:10:25 (900 s) 3188736,2226176,2866176,2226176
@@ -179,7 +179,7 @@ U36EY4UoN5ABPowhNZFeyr5A3vKiDr6j0hCOqYOhxPY=
 -----END SIGNATURE-----
 '''
 
-BRIDGE_SERVER_DESCRIPTOR_ED25519 = '''\
+BRIDGE_SERVER_DESCRIPTOR_ED25519 = b'''\
 @purpose bridge
 router piratepartei 80.92.79.70 80 0 0
 identity-ed25519
@@ -233,7 +233,7 @@ VC4FdHgFlAkXbiqkpWtD0ojJJjLlEeXbmGILjC1Ls2I=
 -----END SIGNATURE-----
 '''
 
-BRIDGE_EXTRA_INFO_DESCRIPTOR_ED25519 = '''\
+BRIDGE_EXTRA_INFO_DESCRIPTOR_ED25519 = b'''\
 extra-info piratepartei 312D64274C29156005843EECB19C6865FA3CC10C
 identity-ed25519
 -----BEGIN ED25519 CERT-----
@@ -307,7 +307,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         :returns: The full path to the file which was written to.
         """
         descFilename = os.path.join(os.getcwd(), filename)
-        with open(descFilename, 'w') as fh:
+        with open(descFilename, 'wb') as fh:
             for desc in descriptors:
                 fh.write(desc)
                 fh.flush()
@@ -381,8 +381,8 @@ class ParseDescriptorsTests(unittest.TestCase):
         raise InvalidRouterNickname.
         """
         unparseable = BRIDGE_NETWORKSTATUS_0.replace(
-            'MiserLandfalls',
-            'MiserLandfallsWaterfallsSnowfallsAvalanche')
+            b'MiserLandfalls',
+            b'MiserLandfallsWaterfallsSnowfallsAvalanche')
         # Write the descriptor to a file for testing. This is necessary
         # because the function opens the networkstatus file to read it.
         descFile = self.writeTestDescriptorsToFile('networkstatus-bridges',
@@ -399,8 +399,8 @@ class ParseDescriptorsTests(unittest.TestCase):
         See also: :trac:`16616`
         """
         unparseable = BRIDGE_NETWORKSTATUS_0.replace(
-            's Fast Guard Running Stable Valid',
-            's Fast Guard Running Stable Valid HSDir')
+            b's Fast Guard Running Stable Valid',
+            b's Fast Guard Running Stable Valid HSDir')
         # Write the descriptor to a file for testing. This is necessary
         # because the function opens the networkstatus file to read it.
         descFile = self.writeTestDescriptorsToFile('networkstatus-bridges',
@@ -420,7 +420,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         a ValueError.
         """
         unparseable = BRIDGE_NETWORKSTATUS_0.replace(
-            '2.215.61.223', '[2837:fcd2:387b:e376:34c:1ec7:11ff:1686]')
+            b'2.215.61.223', b'[2837:fcd2:387b:e376:34c:1ec7:11ff:1686]')
         descFile = self.writeTestDescriptorsToFile('networkstatus-bridges',
                                                    unparseable)
         self.assertRaises(ValueError,
@@ -434,9 +434,9 @@ class ParseDescriptorsTests(unittest.TestCase):
         expectedIPs = [self.expectedIPBridge0, self.expectedIPBridge1]
         descFile = 'networkstatus-bridges'
 
-        with open(descFile, 'w') as fh:
-            fh.write('signature and stuff from the BridgeAuth would go here\n')
-            fh.write('some more annotations with parameters and stuff\n')
+        with open(descFile, 'wb') as fh:
+            fh.write(b'signature and stuff from the BridgeAuth would go here\n')
+            fh.write(b'some more annotations with parameters and stuff\n')
             fh.write(BRIDGE_NETWORKSTATUS_0)
             fh.write(BRIDGE_NETWORKSTATUS_1)
             fh.flush()
@@ -454,9 +454,9 @@ class ParseDescriptorsTests(unittest.TestCase):
         expectedIPs = [self.expectedIPBridge0, self.expectedIPBridge1]
         descFile = 'networkstatus-bridges'
 
-        with open(descFile, 'w') as fh:
-            fh.write('signature and stuff from the BridgeAuth would go here\n')
-            fh.write('some more annotations with parameters and stuff\n')
+        with open(descFile, 'wb') as fh:
+            fh.write(b'signature and stuff from the BridgeAuth would go here\n')
+            fh.write(b'some more annotations with parameters and stuff\n')
             fh.write(BRIDGE_NETWORKSTATUS_0)
             fh.write(BRIDGE_NETWORKSTATUS_1)
             fh.flush()
@@ -479,7 +479,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         """
         descFile = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR)
         routers = descriptors.parseExtraInfoFiles(descFile)
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
         self.assertIsInstance(bridge, RelayExtraInfoDescriptor)
 
     def test_parse_descriptors_parseExtraInfoFiles_one_file(self):
@@ -488,12 +488,12 @@ class ParseDescriptorsTests(unittest.TestCase):
         """
         descFile = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR)
         routers = descriptors.parseExtraInfoFiles(descFile)
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
 
         # The number of transports we parsed should be equal to the number of
         # 'transport' lines in the descriptor:
         self.assertEqual(len(bridge.transport),
-                         BRIDGE_EXTRA_INFO_DESCRIPTOR.count('transport '))
+                         BRIDGE_EXTRA_INFO_DESCRIPTOR.count(b'transport '))
 
         self.assertEqual(bridge.fingerprint, self.expectedFprBridge0)
 
@@ -522,7 +522,7 @@ class ParseDescriptorsTests(unittest.TestCase):
                          "We shouldn't have any duplicate descriptors.")
 
         # We should only have the newest descriptor:
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
         self.assertEqual(
             bridge.published,
             datetime.datetime.strptime("2014-11-04 08:10:25", "%Y-%m-%d %H:%M:%S"),
@@ -541,7 +541,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         self.assertEqual(len(routers), 1,
                          "We shouldn't have any duplicate descriptors.")
 
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
         self.assertEqual(
             bridge.published,
             datetime.datetime.strptime("2014-11-04 08:10:25", "%Y-%m-%d %H:%M:%S"),
@@ -564,7 +564,7 @@ class ParseDescriptorsTests(unittest.TestCase):
                          "We shouldn't have any duplicate descriptors.")
 
         # We should only have the newest descriptor:
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
         self.assertEqual(
             bridge.published,
             datetime.datetime.strptime("2014-12-04 03:10:25", "%Y-%m-%d %H:%M:%S"),
@@ -581,10 +581,10 @@ class ParseDescriptorsTests(unittest.TestCase):
         descFiles = []
 
         # The timestamp and fingerprint from BRIDGE_EXTRA_INFO_DESCRIPTOR:
-        timestamp  = "2014-11-04 06:23:22"
-        Y, M, rest = timestamp.split("-")
-        fpr        = "E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
-        newerFpr   = "E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
+        timestamp  = b"2014-11-04 06:23:22"
+        Y, M, rest = timestamp.split(b"-")
+        fpr        = b"E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
+        newerFpr   = b"E08B324D20AD0A13E114F027AB9AC3F32CA696A0"
 
         total = 0
         needed = b * n
@@ -592,15 +592,15 @@ class ParseDescriptorsTests(unittest.TestCase):
             if total >= needed:
                 break
             # Re-digest the fingerprint to create a "new" bridge
-            newerFpr = hashlib.sha1(newerFpr).hexdigest().upper()
+            newerFpr = hashlib.sha1(newerFpr).hexdigest().upper().encode('utf-8')
             # Generate n extrainfos with different timestamps:
             count = 0
-            for year in range(1, ((n + 1)/ 12) + 2):  # Start from the next year
+            for year in range(1, ((n + 1)// 12) + 2):  # Start from the next year
                 if count >= n:
                     break
                 for month in range(1, 13):
                     if count < n:
-                        newerTimestamp = "-".join([str(int(Y) + year), "%02d" % month, rest])
+                        newerTimestamp = b"-".join([str(int(Y) + year).encode('utf-8'), b"%02d" % month, rest])
                         newerDuplicate = BRIDGE_EXTRA_INFO_DESCRIPTOR[:].replace(
                             fpr, newerFpr).replace(
                                 timestamp, newerTimestamp)
@@ -663,10 +663,10 @@ class ParseDescriptorsTests(unittest.TestCase):
         """
         # Give it a bad geoip-db-digest:
         unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace(
-            "MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
-            "DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
-                "geoip-db-digest 09A0E093100B279AD9CFF47A67B13A21C6E1483F",
-                "geoip-db-digest FOOOOOOOOOOOOOOOOOOBAAAAAAAAAAAAAAAAAARR")
+            b"MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
+            b"DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
+                b"geoip-db-digest 09A0E093100B279AD9CFF47A67B13A21C6E1483F",
+                b"geoip-db-digest FOOOOOOOOOOOOOOOOOOBAAAAAAAAAAAAAAAAAARR")
 
         descFileOne = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE)
         descFileTwo = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR)
@@ -684,7 +684,7 @@ class ParseDescriptorsTests(unittest.TestCase):
             "and one was unparseable, so that should only leave one "
             "descriptor remaining."))
 
-        bridge = routers.values()[0]
+        bridge = list(routers.values())[0]
         self.assertEqual(
             bridge.fingerprint,
             "E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
@@ -703,14 +703,14 @@ class ParseDescriptorsTests(unittest.TestCase):
         """
         # Mess up the bridge-ip-transports line:
         unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace(
-            "MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
-            "DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
-                "bridge-ip-transports <OR>=8",
-                "bridge-ip-transports <OR>")
+            b"MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
+            b"DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
+                b"bridge-ip-transports <OR>=8",
+                b"bridge-ip-transports <OR>")
 
         parseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace(
-            "MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
-            "ImOkWithBeingParsed 2B5DA67FBA13A6449DE625673B7AE9E3AA7DF75F")
+            b"MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
+            b"ImOkWithBeingParsed 2B5DA67FBA13A6449DE625673B7AE9E3AA7DF75F")
 
         descFileOne = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR)
         descFileTwo = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE)
@@ -750,8 +750,8 @@ class ParseDescriptorsTests(unittest.TestCase):
         zero parsed descriptors.
         """
         unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace(
-            '-----END SIGNATURE-----',
-            '-----END SIGNATURE FOR REALZ-----')
+            b'-----END SIGNATURE-----',
+            b'-----END SIGNATURE FOR REALZ-----')
         # This must be a "real" file or _copyUnparseableDescriptorFile() will
         # raise an AttributeError saying:
         # '_io.BytesIO' object has no attribute 'rpartition'"
@@ -766,7 +766,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         missing the signature should return zero parsed descriptors.
         """
         # Remove the signature
-        BEGIN_SIG = '-----BEGIN SIGNATURE-----'
+        BEGIN_SIG = b'-----BEGIN SIGNATURE-----'
         unparseable, _ = BRIDGE_EXTRA_INFO_DESCRIPTOR.split(BEGIN_SIG)
         # This must be a "real" file or _copyUnparseableDescriptorFile() will
         # raise an AttributeError saying:
@@ -782,7 +782,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         bad signature should raise an InvalidExtraInfoSignature exception.
         """
         # Truncate the signature to 50 bytes
-        BEGIN_SIG = '-----BEGIN SIGNATURE-----'
+        BEGIN_SIG = b'-----BEGIN SIGNATURE-----'
         doc, sig = BRIDGE_EXTRA_INFO_DESCRIPTOR.split(BEGIN_SIG)
         unparseable = BEGIN_SIG.join([doc, sig[:50]])
         # This must be a "real" file or _copyUnparseableDescriptorFile() will
@@ -803,10 +803,10 @@ class ParseDescriptorsTests(unittest.TestCase):
         """
         # Give it a bad geoip-db-digest:
         unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace(
-            "MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
-            "DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
-                "geoip-db-digest 09A0E093100B279AD9CFF47A67B13A21C6E1483F",
-                "geoip-db-digest FOOOOOOOOOOOOOOOOOOBAAAAAAAAAAAAAAAAAARR")
+            b"MiserLandfalls E08B324D20AD0A13E114F027AB9AC3F32CA696A0",
+            b"DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E").replace(
+                b"geoip-db-digest 09A0E093100B279AD9CFF47A67B13A21C6E1483F",
+                b"geoip-db-digest FOOOOOOOOOOOOOOOOOOBAAAAAAAAAAAAAAAAAARR")
 
         descFileOne = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR)
         descFileTwo = io.BytesIO(BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE)
@@ -817,7 +817,7 @@ class ParseDescriptorsTests(unittest.TestCase):
 
     def test_parse_descriptors_parseExtraInfoFiles_empty_file(self):
         """Test parsing an empty extrainfo descriptors file."""
-        routers = descriptors.parseExtraInfoFiles(io.BytesIO(''))
+        routers = descriptors.parseExtraInfoFiles(io.BytesIO(b''))
         self.assertIsInstance(routers, dict)
         self.assertEqual(len(routers), 0)
 
@@ -846,7 +846,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         True when the new file is successfully created.
         """
         filename = "bridge-descriptors"
-        with open(filename, 'w') as fh:
+        with open(filename, 'wb') as fh:
             fh.write(BRIDGE_SERVER_DESCRIPTOR)
             fh.flush()
 
@@ -858,7 +858,7 @@ class ParseDescriptorsTests(unittest.TestCase):
         copy of the bad file with a specific filename format.
         """
         filename = "bridge-descriptors"
-        with open(filename, 'w') as fh:
+        with open(filename, 'wb') as fh:
             fh.write(BRIDGE_SERVER_DESCRIPTOR)
             fh.flush()
 





More information about the tor-commits mailing list