commit 60e809ffa7aa22de9cd7e5e152e6916606b387d5 Author: Cecylia Bocovich cohosh@torproject.org Date: Wed Feb 24 11:15:10 2021 -0500
Trust but verify bridge descriptors
These descriptors are already validated by the Bridge Authority. Instead of potentially DoS-ing ourselves, accept all descriptors but log an error if stem validation fails. --- bridgedb/parse/descriptors.py | 51 +++++++------ bridgedb/test/test_parse_descriptors.py | 128 +++++++++++++------------------- 2 files changed, 80 insertions(+), 99 deletions(-)
diff --git a/bridgedb/parse/descriptors.py b/bridgedb/parse/descriptors.py index d3a3802..8baf7a8 100644 --- a/bridgedb/parse/descriptors.py +++ b/bridgedb/parse/descriptors.py @@ -215,7 +215,7 @@ def deduplicate(descriptors, statistics=False):
return newest
-def parseExtraInfoFiles(*filenames, **kwargs): +def parseExtraInfoFiles(*descriptorFiles): """Open **filenames** and parse any ``@type bridge-extrainfo-descriptor`` contained within.
@@ -229,13 +229,6 @@ def parseExtraInfoFiles(*filenames, **kwargs): .. note:: This function will call :func:`deduplicate` to deduplicate the extrainfo descriptors parsed from all **filenames**.
- :kwargs validate: If there is a ``'validate'`` keyword argument, its value - will be passed along as the ``'validate'`` argument to - :class:`stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`. - The ``'validate'`` keyword argument defaults to ``True``, meaning that - the hash digest stored in the ``router-digest`` line will be checked - against the actual contents of the descriptor and the extrainfo - document's signature will be verified. :rtype: dict :returns: A dictionary mapping bridge fingerprints to their corresponding, deduplicated @@ -254,33 +247,45 @@ def parseExtraInfoFiles(*filenames, **kwargs): # to be a signature). descriptorType = 'extra-info 1.0'
- validate = True - if ('validate' in kwargs) and (kwargs['validate'] is False): - validate = False - - for filename in filenames: - logging.info("Parsing %s descriptors in %s..." - % (descriptorType, filename)) + for descFile in descriptorFiles:
# Make sure we open the file rather than passing in a path so that # subsequent calls to parse_file will use the current file position # to continue parsing the remaining descriptors after an exception # is raised - descFile = open(filename, 'rb') + if isinstance(descFile, str): + descFile = open(descFile, 'rb') # open by filename + + filename = os.path.basename(descFile.name) + copied = False while True: try: - document = parse_file(descFile, descriptorType, validate=validate) - + document = parse_file(descFile, descriptorType, validate=True) for router in document: - descriptors.append(router) - + logging.info("Successfully parsed descriptor for %s." + % router.fingerprint) break # Reached end of file
except (ValueError, ProtocolError) as error: - logging.error( - ("Stem exception while parsing extrainfo descriptor from " + # For now we are just logging validation errors, we'll + # trust and parse all descriptors later + logging.warning( + ("Stem exception while validating extrainfo descriptor from " "file '%s':\n%s") % (filename, str(error))) - _copyUnparseableDescriptorFile(filename) + if not copied: + _copyUnparseableDescriptorFile(filename) + copied = True + + # Now seek to start of document and parse all descriptors again + descFile.seek(0) + try: + document = parse_file(descFile, descriptorType, validate=False) + for router in document: + descriptors.append(router) + except Exception as error: + logging.error("Stem exception while parsing extrainfo descriptor %s" + % str(error)) +
routers = deduplicate(descriptors) return routers diff --git a/bridgedb/test/test_parse_descriptors.py b/bridgedb/test/test_parse_descriptors.py index 86b7d1f..4874161 100644 --- a/bridgedb/test/test_parse_descriptors.py +++ b/bridgedb/test/test_parse_descriptors.py @@ -690,16 +690,6 @@ class ParseDescriptorsTests(unittest.TestCase): with Benchmarker(): routers = descriptors.parseExtraInfoFiles(*descFiles)
- def test_parse_descriptors_parseExtraInfoFiles_no_validate(self): - """Test for ``b.p.descriptors.parseExtraInfoFiles`` with - descriptor validation disabled. - """ - descFileOne = self.writeTestDescriptorsToFile('cached-extrainfo', - BRIDGE_EXTRA_INFO_DESCRIPTOR) - routers = descriptors.parseExtraInfoFiles(descFileOne, - validate=False) - self.assertGreaterEqual(len(routers), 1) - def test_parse_descriptors_parseExtraInfoFiles_unparseable(self): """Test parsing three extrainfo descriptors: one is a valid descriptor, one is an older duplicate, and one is unparseable (it has a bad @@ -742,11 +732,39 @@ class ParseDescriptorsTests(unittest.TestCase): datetime.datetime.strptime("2014-12-04 03:10:25", "%Y-%m-%d %H:%M:%S"), "We should have the newest available descriptor for this router.")
+ def test_parse_descriptors_parseExtraInfoFiles_invalid(self): + """Test parsing three extrainfo descriptors: one is a valid descriptor, + and the other is completely invalid (lacking a signature). + BridgeDB should parse the valid descriptor if it preceeds the mangled + one and then terminate. + """ + # Give it a bad geoip-db-digest: + unparseable = b'DontParseMe F373CC1D86D82267F1F1F5D39470F0E0A022122E' + + descFile = self.writeTestDescriptorsToFile('cached-extrainfo', + BRIDGE_EXTRA_INFO_DESCRIPTOR_NEWEST_DUPLICATE, unparseable, + BRIDGE_EXTRA_INFO_DESCRIPTOR) + routers = descriptors.parseExtraInfoFiles(descFile) + + self.assertIsInstance(routers, dict) + self.assertEqual(len(routers), 2, ( + "There were three extrainfo descriptors: one lacks a signature, " + "and one was duplicate. Given our trust-but-verify policy that should " + "give us two descriptors")) + + bridge = list(routers.values())[0] + self.assertEqual( + bridge.fingerprint, + "E08B324D20AD0A13E114F027AB9AC3F32CA696A0", + ("It looks like the (supposedly) unparseable bridge was returned " + "instead of the valid one!")) + def test_parse_descriptors_parseExtraInfoFiles_unparseable_and_parseable(self): """Test parsing four extrainfo descriptors: two are valid descriptors, - one is an older duplicate of one of the valid descriptors, and one is - unparseable (it has a line we shouldn't recognise). There should be - only two descriptors returned after parsing. + one is an older duplicate of one of the valid descriptors, and fails + Stem's validation. Since the bridge authority already does validation, + we should except the unparseable descriptor. There should be three + descriptors returned after parsing. """ # Mess up the bridge-ip-transports line: unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace( @@ -770,13 +788,13 @@ class ParseDescriptorsTests(unittest.TestCase): routers = descriptors.parseExtraInfoFiles(descFile)
self.assertIsInstance(routers, dict) - self.assertEqual(len(routers), 2, ( + self.assertEqual(len(routers), 3, ( "There were four extrainfo descriptors: one was a duplicate, " - "and one was unparseable, so that should only leave two " - "descriptors remaining.")) + "and one will throw a validation error, but all three " + "descriptors should be returned."))
- self.assertNotIn("F373CC1D86D82267F1F1F5D39470F0E0A022122E", routers.keys(), - "The 'unparseable' descriptor was returned by the parser.") + self.assertIn("F373CC1D86D82267F1F1F5D39470F0E0A022122E", routers.keys(), + "The 'unparseable' descriptor was not returned by the parser.")
self.assertIn("E08B324D20AD0A13E114F027AB9AC3F32CA696A0", routers.keys(), ("A bridge extrainfo which had duplicates was completely missing " @@ -789,56 +807,6 @@ class ParseDescriptorsTests(unittest.TestCase): self.assertIn("2B5DA67FBA13A6449DE625673B7AE9E3AA7DF75F", routers.keys(), "The 'parseable' descriptor wasn't returned by the parser.")
- def test_parse_descriptors_parseExtraInfoFiles_bad_signature_footer(self): - """Calling parseExtraInfoFiles() with a descriptor which has a - signature with a bad "-----END SIGNATURE-----" footer should return - zero parsed descriptors. - """ - unparseable = BRIDGE_EXTRA_INFO_DESCRIPTOR.replace( - b'-----END SIGNATURE-----', - b'-----END SIGNATURE FOR REALZ-----') - # This must be a "real" file or _copyUnparseableDescriptorFile() will - # raise an AttributeError saying: - # '_io.BytesIO' object has no attribute 'rpartition'" - descFileOne = self.writeTestDescriptorsToFile( - "bad-signature-footer", unparseable) - routers = descriptors.parseExtraInfoFiles(descFileOne) - - self.assertEqual(len(routers), 0) - - def test_parse_descriptors_parseExtraInfoFiles_missing_signature(self): - """Calling parseExtraInfoFiles() with a descriptor which is - missing the signature should return zero parsed descriptors. - """ - # Remove the signature - BEGIN_SIG = b'-----BEGIN SIGNATURE-----' - unparseable, _ = BRIDGE_EXTRA_INFO_DESCRIPTOR.split(BEGIN_SIG) - # This must be a "real" file or _copyUnparseableDescriptorFile() will - # raise an AttributeError saying: - # '_io.BytesIO' object has no attribute 'rpartition'" - descFileOne = self.writeTestDescriptorsToFile( - "missing-signature", unparseable) - routers = descriptors.parseExtraInfoFiles(descFileOne) - - self.assertEqual(len(routers), 0) - - def test_parse_descriptors_parseExtraInfoFiles_bad_signature_too_short(self): - """Calling _verifyExtraInfoSignature() with a descriptor which has a - bad signature should raise an InvalidExtraInfoSignature exception. - """ - # Truncate the signature to 50 bytes - BEGIN_SIG = b'-----BEGIN SIGNATURE-----' - doc, sig = BRIDGE_EXTRA_INFO_DESCRIPTOR.split(BEGIN_SIG) - unparseable = BEGIN_SIG.join([doc, sig[:50]]) - # This must be a "real" file or _copyUnparseableDescriptorFile() will - # raise an AttributeError saying: - # '_io.BytesIO' object has no attribute 'rpartition'" - descFileOne = self.writeTestDescriptorsToFile( - "truncated-signature", unparseable) - routers = descriptors.parseExtraInfoFiles(descFileOne) - - self.assertEqual(len(routers), 0) - def test_parse_descriptors_parseExtraInfoFiles_unparseable(self): """Test parsing three extrainfo descriptors: one is a valid descriptor, one is an older duplicate, and one is unparseable (it has a bad @@ -863,15 +831,23 @@ class ParseDescriptorsTests(unittest.TestCase):
descriptors.parseExtraInfoFiles(descFileOne, descFileTwo, descFileThree)
- timestamp = datetime.datetime.now() - timestamp = timestamp.isoformat(sep=chr(0x2d)) - timestamp = timestamp.rsplit('.', 1)[0] + matchingFiles = glob.glob("*copy-unparseable-test.unparseable") + self.assertEqual(len(matchingFiles), 1) + + newFile = matchingFiles[-1] + self.assertTrue(os.path.isfile(newFile)) + + timestamp = datetime.datetime.strptime(newFile.split("_")[0], + "%Y-%m-%d-%H:%M:%S") + # The timestamp should be roughly today (unless we just passed + # midnight, then it might be +/- 1): + self.assertApproximates(timestamp.now().day, timestamp.day, 1)
- path, sep, fname = descFileThree.rpartition(os.path.sep) - newfilename = "%s%s%s_%s%sunparseable" % (path, sep, timestamp, - fname, os.path.extsep) + # The timestamp should be roughly this hour (+/- 1): + self.assertApproximates(timestamp.now().hour, timestamp.hour, 1)
- self.assertTrue(os.path.exists(newfilename)) + # The timestamp should be roughly this minute (+/- 2): + self.assertApproximates(timestamp.now().minute, timestamp.minute, 2)
def test_parse_descriptors_parseExtraInfoFiles_empty_file(self): """Test parsing an empty extrainfo descriptors file.""" @@ -930,7 +906,7 @@ class ParseDescriptorsTests(unittest.TestCase): self.assertTrue(os.path.isfile(newFile))
timestamp = datetime.datetime.strptime(newFile.split("_")[0], - "%Y-%m-%d-%H:%M:%S") + "%Y-%m-%d-%H:%M:%S") # The timestamp should be roughly today (unless we just passed # midnight, then it might be +/- 1): self.assertApproximates(timestamp.now().day, timestamp.day, 1)
tor-commits@lists.torproject.org