commit 8ad310114b1ea7b743a868a8b70832eea5b8f3e2 Author: Ravi Chandra Padmala neenaoffline@gmail.com Date: Fri Aug 10 17:49:23 2012 +0530
Add microdescriptor parsing --- run_tests.py | 1 + stem/descriptor/__init__.py | 4 + stem/descriptor/networkstatus.py | 218 ++++++++++++++++++++++++++++++-- test/integ/descriptor/networkstatus.py | 42 ++++++ 4 files changed, 254 insertions(+), 11 deletions(-)
diff --git a/run_tests.py b/run_tests.py index 8d115f1..b0550d6 100755 --- a/run_tests.py +++ b/run_tests.py @@ -136,6 +136,7 @@ INTEG_TESTS = ( test.integ.descriptor.server_descriptor.TestServerDescriptor, test.integ.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor, test.integ.descriptor.networkstatus.TestNetworkStatusDocument, + test.integ.descriptor.networkstatus.TestMicrodescriptorConsensus, test.integ.version.TestVersion, test.integ.response.protocolinfo.TestProtocolInfo, test.integ.process.TestProcess, diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 40f03ad..d9ac21b 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -66,6 +66,8 @@ def parse_file(path, descriptor_file): file_parser = stem.descriptor.extrainfo_descriptor.parse_file elif filename == "cached-consensus": file_parser = stem.descriptor.networkstatus.parse_file + elif filename == "cached-microdesc-consensus": + file_parser = lambda f: stem.descriptor.networkstatus.parse_file(f, True, "microdesc")
if file_parser: for desc in file_parser(descriptor_file): @@ -103,6 +105,8 @@ def parse_file(path, descriptor_file): desc._set_path(path) yield desc return + elif desc_type == "network-status-microdesc-consensus-3" and major_version == 1: + desc = stem.descriptor.networkstatus.MicrodescriptorConsensus(descriptor_file.read())
if desc: desc._set_path(path) diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 214a33c..7effc7e 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -59,9 +59,30 @@ _bandwidth_weights_regex = re.compile(" ".join(["W%s=\d+" % weight for weight in
_router_desc_end_kws = ["r", "bandwidth-weights", "directory-footer", "directory-signature"]
+Flavour = stem.util.enum.Enum( + ("NONE", ""), + ("NS", "ns"), + ("MICRODESCRIPTOR", "microdesc"), + ) + +Flag = stem.util.enum.Enum( + ("AUTHORITY", "Authority"), + ("BADEXIT", "BadExit"), + ("EXIT", "Exit"), + ("FAST", "Fast"), + ("GUARD", "Guard"), + ("HSDIR", "HSDir"), + ("NAMED", "Named"), + ("RUNNING", "Running"), + ("STABLE", "Stable"), + ("UNNAMED", "Unnamed"), + ("V2DIR", "V2Dir"), + ("VALID", "Valid"), + ) + Flag = stem.util.enum.Enum(*[(flag.upper(), flag) for flag in ["Authority", "BadExit", "Exit", "Fast", "Guard", "HSDir", "Named", "Running", "Stable", "Unnamed", "V2Dir", "Valid"]])
-def parse_file(document_file, validate = True): +def parse_file(document_file, validate = True, flavour = Flavour.NONE): """ Iterates over the router descriptors in a network status document.
@@ -83,15 +104,27 @@ def parse_file(document_file, validate = True): _skip_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], document_file) # parse until end document_data = document_data + document_file.read() - document = NetworkStatusDocument(document_data, validate) - document_file.seek(r_offset) - document.router_descriptors = _router_desc_generator(document_file, document.vote_status == "vote", validate, document.known_flags) - return document.router_descriptors + + if flavour == Flavour.NONE: + document = NetworkStatusDocument(document_data, validate) + document_file.seek(r_offset) + document.router_descriptors = _ns_router_desc_generator(document_file, document.vote_status == "vote", validate) + yield document + elif flavour == Flavour.MICRODESCRIPTOR: + document = MicrodescriptorConsensus(document_data, validate) + document_file.seek(r_offset) + document.router_descriptors = _router_microdesc_generator(document_file, validate, document.known_flags) + yield document + +def _ns_router_desc_generator(document_file, vote, validate): + while _peek_keyword(document_file) == "r": + desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True)) + yield RouterDescriptor(desc_content, vote, validate)
-def _router_desc_generator(document_file, vote, validate, known_flags): +def _router_microdesc_generator(document_file, validate, known_flags): while _peek_keyword(document_file) == "r": desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True)) - yield RouterDescriptor(desc_content, vote, validate, known_flags) + yield RouterMicrodescriptor(desc_content, validate, known_flags)
class NetworkStatusDocument(stem.descriptor.Descriptor): """ @@ -159,8 +192,10 @@ class NetworkStatusDocument(stem.descriptor.Descriptor):
self._parse(raw_content)
- def _generate_router(self, raw_content, vote, validate, known_flags): - return RouterDescriptor(raw_content, vote, validate, known_flags) + def _router_desc_generator(self, document_file): + while _peek_keyword(document_file) == "r": + desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True)) + yield RouterDescriptor(desc_content, self.vote_status == "vote", self.validated, self.known_flags)
def _validate_network_status_version(self): return self.network_status_version == "3" @@ -223,7 +258,7 @@ class NetworkStatusDocument(stem.descriptor.Descriptor): # router descriptors if _peek_keyword(content) == "r": router_descriptors_data = "".join(_read_until_keywords(["bandwidth-weights", "directory-footer", "directory-signature"], content, False, True)) - self.router_descriptors = _router_desc_generator(StringIO(router_descriptors_data), vote, validate, self.known_flags) + self.router_descriptors = self._router_desc_generator(StringIO(router_descriptors_data))
# footer section if self.consensus_method > 9 or vote and filter(lambda x: x >= 9, self.consensus_methods): @@ -394,7 +429,7 @@ class RouterDescriptor(stem.descriptor.Descriptor): :param bool vote: True if the descriptor is from a vote document :param bool validate: whether the router descriptor should be validated :param bool known_flags: list of known router status flags - + :raises: ValueError if the descriptor data is invalid """
@@ -524,3 +559,164 @@ class RouterDescriptor(stem.descriptor.Descriptor):
return self.unrecognized_lines
+class MicrodescriptorConsensus(NetworkStatusDocument): + """ + A v3 microdescriptor consensus. + + :var bool validated: ***** whether the document is validated + :var str network_status_version: ***** a document format version. For v3 microdescriptor consensuses this is "3 microdesc" + :var str vote_status: ***** status of the vote (is "consensus") + :var int consensus_method: **~** consensus method used to generate a consensus + :var datetime valid_after: ***** time when the consensus becomes valid + :var datetime fresh_until: ***** time until when the consensus is considered to be fresh + :var datetime valid_until: ***** time until when the consensus is valid + :var int vote_delay: ***** number of seconds allowed for collecting votes from all authorities + :var int dist_delay: number of seconds allowed for collecting signatures from all authorities + :var list client_versions: list of recommended Tor client versions + :var list server_versions: list of recommended Tor server versions + :var list known_flags: ***** list of known router flags + :var list params: dict of parameter(str) => value(int) mappings + :var list router_descriptors: ***** iterator for RouterDescriptor objects defined in the document + :var list directory_authorities: ***** list of DirectoryAuthority objects that have generated this document + :var dict bandwidth_weights: **~** dict of weight(str) => value(int) mappings + :var list directory_signatures: ***** list of signatures this document has + + | ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined + | **~** attribute appears only in consensuses + """ + + def _router_desc_generator(self, document_file): + while _peek_keyword(document_file) == "r": + desc_content = "".join(_read_until_keywords(_router_desc_end_kws, document_file, False, True)) + yield RouterMicrodescriptor(desc_content, self.validated, self.known_flags) + + def _validate_network_status_version(self): + return self.network_status_version == "3 microdesc" + +class RouterMicrodescriptor(RouterDescriptor): + """ + Router microdescriptor object. Parses and stores router information in a router + microdescriptor from a v3 microdescriptor consensus. + + :var str nickname: ***** router's nickname + :var str identity: ***** router's identity + :var datetime publication: ***** router's publication + :var str ip: ***** router's IP address + :var int orport: ***** router's ORPort + :var int dirport: ***** router's DirPort + + :var list flags: ***** list of status flags + :var list unknown_flags: ***** list of unidentified status flags + + :var :class:`stem.version.Version`,str version: Version of the Tor protocol this router is running + + :var int bandwidth: router's claimed bandwidth + :var int measured_bandwidth: router's measured bandwidth + + :var str digest: base64 of the hash of the router's microdescriptor with trailing =s omitted + + | ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined + """ + + def __init__(self, raw_contents, validate = True, known_flags = Flag): + """ + Parse a router descriptor in a v3 microdescriptor consensus and provide a new + RouterMicrodescriptor object. + + :param str raw_content: router descriptor content to be parsed + :param bool validate: whether the router descriptor should be validated + :param bool known_flags: list of known router status flags + + :raises: ValueError if the descriptor data is invalid + """ + + super(RouterMicrodescriptor, self).__init__(raw_contents, False, validate, known_flags) + + def _parse(self, raw_content, _, validate, known_flags): + """ + :param dict raw_content: router descriptor contents to be parsed + :param bool validate: checks the validity of descriptor content if True + :param bool known_flags: list of known router status flags + + :raises: ValueError if an error occures in validation + """ + + content = StringIO(raw_content) + seen_keywords = set() + peek_check_kw = lambda keyword: keyword == _peek_keyword(content) + + r = _read_keyword_line("r", content, validate) + # r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0 + # "r" SP nickname SP identity SP digest SP publication SP IP SP ORPort SP DirPort NL + if r: + seen_keywords.add("r") + values = r.split(" ") + self.nickname, self.identity = values[0], values[1] + self.publication = _strptime(" ".join((values[2], values[3])), validate) + self.ip, self.orport, self.dirport = values[4], int(values[5]), int(values[6]) + if self.dirport == 0: self.dirport = None + elif validate: raise ValueError("Invalid router descriptor: empty 'r' line") + + while _peek_line(content): + if peek_check_kw("s"): + if "s" in seen_keywords: raise ValueError("Invalid router descriptor: 's' line appears twice") + line = _read_keyword_line("s", content, validate) + if not line: continue + seen_keywords.add("s") + # s Named Running Stable Valid + #A series of space-separated status flags, in *lexical order* + self.flags = line.split(" ") + + self.unknown_flags = filter(lambda f: not f in known_flags, self.flags) + if validate and self.unknown_flags: + raise ValueError("Router contained unknown flags: %s", " ".join(self.unknown_flags)) + + elif peek_check_kw("v"): + if "v" in seen_keywords: raise ValueError("Invalid router descriptor: 'v' line appears twice") + line = _read_keyword_line("v", content, validate, True) + seen_keywords.add("v") + # v Tor 0.2.2.35 + if line: + if line.startswith("Tor "): + self.version = stem.version.Version(line[4:]) + else: + self.version = line + elif validate: raise ValueError("Invalid router descriptor: empty 'v' line" ) + + elif peek_check_kw("w"): + if "w" in seen_keywords: raise ValueError("Invalid router descriptor: 'w' line appears twice") + w = _read_keyword_line("w", content, validate, True) + # "w" SP "Bandwidth=" INT [SP "Measured=" INT] NL + seen_keywords.add("w") + if w: + values = w.split(" ") + if len(values) <= 2 and len(values) > 0: + key, value = values[0].split("=") + if key == "Bandwidth": self.bandwidth = int(value) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Bandwidth, read " + key) + + if len(values) == 2: + key, value = values[1].split("=") + if key == "Measured": self.measured_bandwidth = int(value) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Measured, read " + key) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line") + elif validate: raise ValueError("Router descriptor contains empty 'w' line") + + elif peek_check_kw("m"): + # microdescriptor hashes + self.digest = _read_keyword_line("m", content, validate, True) + + elif validate: + raise ValueError("Router descriptor contains unrecognized trailing lines: %s" % content.readline()) + + else: + self.unrecognized_lines.append(content.readline()) # ignore unrecognized lines if we aren't validating + + def get_unrecognized_lines(self): + """ + Returns any unrecognized lines. + + :returns: a list of unrecognized lines + """ + + return self.unrecognized_lines diff --git a/test/integ/descriptor/networkstatus.py b/test/integ/descriptor/networkstatus.py index 07414c3..484e67d 100644 --- a/test/integ/descriptor/networkstatus.py +++ b/test/integ/descriptor/networkstatus.py @@ -13,6 +13,7 @@ import stem.exit_policy import stem.version import stem.descriptor.networkstatus import test.integ.descriptor +from stem.descriptor.networkstatus import Flavour
def _strptime(string): return datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S") @@ -256,3 +257,44 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= self.assertEquals("D5C30C15BB3F1DA27669C2D88439939E8F418FCF", desc.directory_signatures[0].key_digest) self.assertEquals(expected_signature, desc.directory_signatures[0].signature)
+class TestMicrodescriptorConsensus(unittest.TestCase): + def test_cached_microdesc_consensus(self): + """ + Parses the cached-microdesc-consensus file in our data directory. + """ + + # lengthy test and uneffected by targets, so only run once + if test.runner.only_run_once(self, "test_cached_microdesc_consensus"): return + + descriptor_path = test.runner.get_runner().get_test_dir("cached-microdesc-consensus") + + if not os.path.exists(descriptor_path): + test.runner.skip(self, "(no cached-microdesc-consensus)") + + count = 0 + with open(descriptor_path) as descriptor_file: + for desc in next(stem.descriptor.networkstatus.parse_file(descriptor_file, True, flavour = Flavour.MICRODESCRIPTOR)).router_descriptors: + assert desc.nickname # check that the router has a nickname + count += 1 + + assert count > 100 # sanity check - assuming atleast 100 relays in the consensus + + def test_metrics_microdesc_consensus(self): + """ + Checks if consensus documents from Metrics are parsed properly. + """ + + descriptor_path = test.integ.descriptor.get_resource("metrics_microdesc_consensus") + + with file(descriptor_path) as descriptor_file: + desc = stem.descriptor.parse_file(descriptor_path, descriptor_file) + + router = next(next(desc).router_descriptors) + self.assertEquals("JapanAnon", router.nickname) + self.assertEquals("AGw/p8P246zRPQ3ZsQx9+pM8I3s", router.identity) + self.assertEquals("9LDw0XiFeLQDXK9t8ht4+MK9tWx6Jxp1RwP36eatRWs", router.digest) + self.assertEquals(_strptime("2012-07-18 15:55:42"), router.publication) + self.assertEquals("220.0.231.71", router.ip) + self.assertEquals(443, router.orport) + self.assertEquals(9030, router.dirport) +
tor-commits@lists.torproject.org