commit 1f19568e61142521ddaccbc92a461ce831ecb255 Author: Isis Lovecruft isis@torproject.org Date: Wed Nov 5 02:35:51 2014 +0000
Add error handling for cases when Stem cannot parse an extrainfo file.
If a file containing bridge-extrainfo descriptors cannot be parsed the first time, and validation was enabled, then we parse it again with Stem's validation functionality disabled. If we still can't parse the file, we save a timestamped copy of the file for later debugging an continue with the next file. --- lib/bridgedb/parse/descriptors.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/lib/bridgedb/parse/descriptors.py b/lib/bridgedb/parse/descriptors.py index 63d9bf2..6eac164 100644 --- a/lib/bridgedb/parse/descriptors.py +++ b/lib/bridgedb/parse/descriptors.py @@ -238,10 +238,38 @@ def parseBridgeExtraInfoFiles(*filenames, **kwargs): validate = False
for filename in filenames: + document = None + documentWasUnparseable = False + logging.info("Parsing %s descriptors with Stem: %s" % (descriptorType, filename)) - document = parse_file(filename, descriptorType, validate=validate) - descriptors.extend([router for router in document]) + try: + document = parse_file(filename, descriptorType, validate=validate) + except ValueError as error: + documentWasUnparseable = True + + if documentWasUnparseable: + logging.warn(("Stem ran into an exception while parsing extrainfo " + "file '%s'!") % filename) + logging.debug("Error while parsing extrainfo file:\n%s" + % str(error)) + + if documentWasUnparseable and (validate is True): + logging.info(("Retrying parsing of extrainfo file '%s' with " + "validation disabled...") % filename) + try: + document = parse_file(filename, descriptorType, validate=False) + except ValueError as another: + logging.critical(("We were still unable to parse extrainfo " + "file on the second attempt! Bailing!")) + else: + documentWasUnparseable = False + + if documentWasUnparseable: + _copyUnparseableDescriptorFile(filename) + + if document: + descriptors.extend([router for router in document])
routers = deduplicate(descriptors) return routers