commit a9f35d5e86bffc4b04cb525736a23a7e72009efc Author: Ravi Chandra Padmala neenaoffline@gmail.com Date: Thu Jul 19 16:46:09 2012 +0200
Add support for network status parsing --- stem/descriptor/networkstatus_descriptor.py | 635 +++++++++++++++++++++++++++ 1 files changed, 635 insertions(+), 0 deletions(-)
diff --git a/stem/descriptor/networkstatus_descriptor.py b/stem/descriptor/networkstatus_descriptor.py new file mode 100644 index 0000000..4bcf9a9 --- /dev/null +++ b/stem/descriptor/networkstatus_descriptor.py @@ -0,0 +1,635 @@ +""" +Parsing for Tor network status documents. Currently supports parsing v3 network +status documents (both votes and consensus'). + +The network status documents also contain a list of router descriptors, +directory authorities, signatures etc. + +The votes and consensus' can be obtained from any of the following sources... + +* the 'cached-consensus' file in tor's data directory +* tor metrics, at https://metrics.torproject.org/data.html +* directory authorities and mirrors via their DirPort + +**Module Overview:** + +:: + + parse_file - parses a network status file and provides a NetworkStatusDocument + NetworkStatusDocument - Tor v3 network status document + +- MicrodescriptorConsensus - Tor microdescriptor consensus document + RouterDescriptor - Router descriptor; contains information about a Tor relay + +- RouterMicrodescriptor - Router microdescriptor; contains information that doesn't change often + DirectorySignature + DirectoryAuthority +""" + +import re +import base64 +import hashlib +import datetime + +import stem.prereq +import stem.descriptor +import stem.descriptor.extrainfo_descriptor +import stem.version +import stem.exit_policy +import stem.util.log as log +import stem.util.connection +import stem.util.tor_tools + +_bandwidth_weights_regex = re.compile(" ".join(["W%s=\d+" % weight for weight in ["bd", + "be", "bg", "bm", "db", "eb", "ed", "ee", "eg", "em", "gb", "gd", "gg", "gm", "mb", "md", "me", "mg", "mm"]])) + +def parse_file(document_file, validate = True): + """ + Parses a network status document file, and returns a NetworkStatusDocument + object. + + :param file document_file: file with network status document content + :param bool validate: checks the validity of the document's contents if True, skips these checks otherwise + + :returns: NetworkStatusDocument object created by parsing the file + + :raises: + * ValueError if the contents is malformed and validate is True + * IOError if the file can't be read + """ + + data = document_file.read() + + # if the file has Metrics metadata + if data.startswith("@type network-status-consensus-3 1.0\n") or data.startswith("@type network-status-vote-3 1.0\n"): + return NetworkStatusDocument(data[data.find("\n") + 1:], validate) + + return NetworkStatusDocument(document_file.read(), validate) + +class DocumentParser: + """ + Helper class to parse documents. + + :var str line: current line to be being parsed + :var list lines: list of remaining lines to be parsed + """ + + def __init__(self, raw_content, validate): + """ + Create a new DocumentParser. + + :param str raw_content: content to be parsed + :param bool validate: if False, treats every keyword line as optional + """ + + self._raw_content = raw_content + self.lines = raw_content.split("\n") + self.validate = validate + self.line = self.lines.pop(0) + + def peek_keyword(self): + """ + Returns the first keyword in the next line. Respects the opt keyword and + returns the actual keyword if the first is "opt". + + :returns: the first keyword of the next line + """ + + if self.line: + if self.line.startswith("opt "): + return self.line.split(" ")[1] + return self.line.split(" ")[0] + + def read_keyword_line(self, keyword, optional = False): + """ + Returns the first keyword in the next line it matches the given keyword. + + If it doesn't match, a ValueError is raised if optional is True and if the + DocumentParser was created with validation enabled. If not, None is returned. + + Respects the opt keyword and returns the next keyword if the first is "opt". + + :param str keyword: keyword the line must begin with + :param bool optional: If the current line must begin with the given keyword + + :returns: the text after the keyword if the keyword matches the one provided, otherwise returns None or raises an exception + + :raises: ValueError if a non-optional keyword doesn't match when validation is enabled + """ + + keyword_regex = re.compile("(opt )?" + re.escape(keyword) + "($| )") + + if not self.line: + if not optional and self.validate: + raise ValueError("Unexpected end of document") + return + + if keyword_regex.match(self.line): + try: line, self.line = self.line, self.lines.pop(0) + except IndexError: line, self.line = self.line, None + + if line == "opt " + keyword or line == keyword: return "" + elif line.startswith("opt "): return line.split(" ", 2)[2] + else: return line.split(" ", 1)[1] + elif self.line.startswith("opt"): + # if this was something new introduced at some point in the future + # ignore it and go to the next line + self.read_line() + return self.read_keyword_line(self, keyword, optional) + elif not optional and self.validate: + raise ValueError("Error parsing network status document: Expected %s, received: %s" % (keyword, self.line)) + + def read_line(self): + """ + Returns the current line and shifts the parser to the next line. + + :returns: the current line if it exists, None otherwise + """ + + if self.line: + tmp, self.line = self.line, self.lines.pop(0) + return tmp + + def read_block(self, keyword): + """ + Returns a keyword block that begins with "-----BEGIN keyword-----\n" and + ends with "-----END keyword-----\n". + + :param str keyword: keyword block that must be read + + :returns: the data in the keyword block + """ + + lines = [] + + if self.line == "-----BEGIN " + keyword + "-----": + self.read_line() + while self.line != "-----END " + keyword + "-----": + lines.append(self.read_line()) + + return "\n".join(lines) + + def read_until(self, terminals = []): + """ + Returns the data in the parser until a line that begins with one of the keywords in terminals are found. + + :param list terminals: list of strings at which we should stop reading and return the data + + :returns: the current line if it exists, None otherwise + """ + + if self.line == None: return + lines, self.line = [self.line], self.lines.pop(0) + while self.line and not self.line.split(" ")[0] in terminals: + lines.append(self.line) + self.line = self.lines.pop(0) + + return "\n".join(lines) + + def remaining(self): + """ + Returns the data remaining in the parser. + + :returns: all a list of all unparsed lines + """ + + if self.line: + lines, self.lines = self.lines, [] + lines.insert(0, self.line) + return lines + else: + return [] + +def _strptime(string, validate = True, optional = False): + try: + return datetime.datetime.strptime(string, "%Y-%m-%d %H:%M:%S") + except ValueError, exc: + if validate or not optional: raise exc + +class NetworkStatusDocument(stem.descriptor.Descriptor): + """ + A v3 network status document. + + This could be a v3 consensus or vote document. + + :var bool validated: ***** whether the document is validated + :var str network_status_version: ***** a document format version. For v3 documents this is "3" + :var str vote_status: ***** status of the vote. Is either "vote" or "consensus" + :var list consensus_methods: A list of supported consensus generation methods (integers) + :var datetime published: time when the document was published + :var int consensus_method: consensus method used to generate a consensus + :var datetime valid_after: ***** time when the consensus becomes valid + :var datetime fresh_until: ***** time until when the consensus is considered to be fresh + :var datetime valid_until: ***** time until when the consensus is valid + :var int vote_delay: ***** number of seconds allowed for collecting votes from all authorities + :var int dist_delay: number of seconds allowed for collecting signatures from all authorities + :var list client_versions: list of recommended Tor client versions + :var list server_versions: list of recommended Tor server versions + :var list known_flags: ***** list of known router flags + :var list params: dict of parameter(str) => value(int) mappings + :var list router_descriptors: ***** list of RouterDescriptor objects defined in the document + :var list directory_authorities: ***** list of DirectoryAuthority objects that have generated this document + :var dict bandwidth_weights: dict of weight(str) => value(int) mappings + :var list directory_signatures: ***** list of signatures this document has + + ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined + """ + + def __init__(self, raw_content, validate = True): + """ + Parse a v3 network status document and provide a new NetworkStatusDocument object. + + :param str raw_content: raw network status document data + :param bool validate: True if the document is to be validated, False otherwise + + :raises: ValueError if the document is invalid + """ + + super(NetworkStatusDocument, self).__init__(raw_content) + + self.router_descriptors = [] + self.directory_authorities = [] + self.directory_signatures = [] + self.validated = validate + + self.network_status_version = None + self.vote_status = None + self.consensus_methods = [] + self.published = None + self.consensus_method = None + self.valid_after = None + self.fresh_until = None + self.valid_until = None + self.vote_delay = None + self.dist_delay = None + self.client_versions = [] + self.server_versions = [] + self.known_flags = [] + self.params = {} + self.bandwidth_weights = {} + + self._parse(raw_content) + + def _generate_router(self, raw_content, vote, validate): + return RouterDescriptor(raw_content, vote, validate) + + def _validate_network_status_version(self): + return self.network_status_version == "3" + + def get_unrecognized_lines(self): + """ + Returns any unrecognized trailing lines. + + :returns: a list of unrecognized trailing lines + """ + + return self._unrecognized_lines + + def _parse(self, raw_content): + # preamble + validate = self.validated + doc_parser = DocumentParser(raw_content, validate) + + read_keyword_line = lambda keyword, optional = False: setattr(self, keyword.replace("-", "_"), doc_parser.read_keyword_line(keyword, optional)) + + map(read_keyword_line, ["network-status-version", "vote-status"]) + if validate and not self._validate_network_status_version(): + raise ValueError("Invalid network-status-version: %s" % self.network_status_version) + + if self.vote_status == "vote": vote = True + elif self.vote_status == "consensus": vote = False + elif validate: raise ValueError("Unrecognized document type specified in vote-status") + + if vote: + read_keyword_line("consensus-methods", True) + self.consensus_methods = [int(method) for method in self.consensus_methods.split(" ")] + self.published = _strptime(doc_parser.read_keyword_line("published", True), validate, True) + else: + self.consensus_method = int(doc_parser.read_keyword_line("consensus-method", True)) + + map(read_keyword_line, ["valid-after", "fresh-until", "valid-until"]) + self.valid_after = _strptime(self.valid_after, validate) + self.fresh_until = _strptime(self.fresh_until, validate) + self.valid_until = _strptime(self.valid_until, validate) + voting_delay = doc_parser.read_keyword_line("voting-delay") + self.vote_delay, self.dist_delay = [int(delay) for delay in voting_delay.split(" ")] + + read_keyword_line("client-versions", True) + self.client_versions = [stem.version.Version(version_string) for version_string in self.client_versions.split(",")] + read_keyword_line("server-versions", True) + self.server_versions = [stem.version.Version(version_string) for version_string in self.server_versions.split(",")] + self.known_flags = doc_parser.read_keyword_line("known-flags").split(" ") + read_keyword_line("params", True) + if self.params: + self.params = dict([(param.split("=")[0], int(param.split("=")[1])) for param in self.params.split(" ")]) + + # authority section + while doc_parser.line.startswith("dir-source "): + dirauth_data = doc_parser.read_until(["dir-source", "r"]) + self.directory_authorities.append(DirectoryAuthority(dirauth_data, vote, validate)) + + # router descriptors + while doc_parser.line.startswith("r "): + router_data = doc_parser.read_until(["r", "directory-footer", "directory-signature"]) + self.router_descriptors.append(self._generate_router(router_data, vote, validate)) + + # footer section + if self.consensus_method > 9 or vote and filter(lambda x: x >= 9, self.consensus_methods): + if doc_parser.line == "directory-footer": + doc_parser.read_line() + elif validate: + raise ValueError("Network status document missing directory-footer") + + if not vote: + read_keyword_line("bandwidth-weights", True) + if _bandwidth_weights_regex.match(self.bandwidth_weights): + self.bandwidth_weights = dict([(weight.split("=")[0], int(weight.split("=")[1])) for weight in self.bandwidth_weights.split(" ")]) + elif validate: + raise ValueError("Invalid bandwidth-weights line") + + while doc_parser.line.startswith("directory-signature "): + signature_data = doc_parser.read_until(["directory-signature"]) + self.directory_signatures.append(DirectorySignature(signature_data)) + + self._unrecognized_lines = doc_parser.remaining() + if validate and self._unrecognized_lines: raise ValueError("Unrecognized trailing data") + +class DirectoryAuthority(stem.descriptor.Descriptor): + """ + Contains directory authority information obtained from v3 network status + documents. + + :var str nickname: directory authority's nickname + :var str identity: uppercase hex fingerprint of the authority's identity key + :var str address: hostname + :var str ip: current IP address + :var int dirport: current directory port + :var int orport: current orport + :var str contact: directory authority's contact information + :var str legacy_dir_key: fingerprint of and obsolete identity key + :var str vote_digest: digest of the authority that contributed to the consensus + """ + + def __init__(self, raw_content, vote = True, validate = True): + """ + Parse a directory authority entry in a v3 network status document and + provide a DirectoryAuthority object. + + :param str raw_content: raw directory authority entry information + :param bool validate: True if the document is to be validated, False otherwise + + :raises: ValueError if the raw data is invalid + """ + + super(DirectoryAuthority, self).__init__(raw_content) + parser = DocumentParser(raw_content, validate) + + dir_source = parser.read_keyword_line("dir-source") + self.nickname, self.identity, self.address, self.ip, self.dirport, self.orport = dir_source.split(" ") + self.dirport = int(self.dirport) + self.orport = int(self.orport) + + self.contact = parser.read_keyword_line("contact") + if vote: + self.legacy_dir_key = parser.read_keyword_line("legacy-dir-key", True) + else: + self.vote_digest = parser.read_keyword_line("vote-digest", True) + if parser.remaining() and validate: + raise ValueError("Unrecognized trailing data in directory authority information") + +class DirectorySignature(stem.descriptor.Descriptor): + """ + Contains directory signature information described in a v3 network status + document. + + :var str identity: signature identity + :var str key_digest: signature key digest + :var str method: method used to generate the signature + :var str signature: the signature data + """ + + def __init__(self, raw_content, validate = True): + """ + Parse a directory signature entry in a v3 network status document and + provide a DirectorySignature object. + + :param str raw_content: raw directory signature entry information + :param bool validate: True if the document is to be validated, False otherwise + + :raises: ValueError if the raw data is invalid + """ + + super(DirectorySignature, self).__init__(raw_content) + parser = DocumentParser(raw_content, validate) + + signature_line = parser.read_keyword_line("directory-signature").split(" ") + + if len(signature_line) == 2: + self.identity, self.key_digest = signature_line + if len(signature_line) == 3: # for microdescriptor consensuses + self.method, self.identity, self.key_digest = signature_line + + self.signature = parser.read_block("SIGNATURE") + if parser.remaining() and validate: + raise ValueError("Unrecognized trailing data in directory signature") + +class RouterDescriptor(stem.descriptor.Descriptor): + """ + Router descriptor object. Parses and stores router information in a router + entry read from a v3 network status document. + + :var str nickname: ***** router's nickname + :var str identity: ***** router's identity + :var str digest: ***** router's digest + :var datetime publication: ***** router's publication + :var str ip: ***** router's IP address + :var int orport: ***** router's ORPort + :var int dirport: ***** router's DirPort + + :var bool is_valid: ***** router is valid + :var bool is_guard: ***** router is suitable for use as an entry guard + :var bool is_named: ***** router is named + :var bool is_unnamed: ***** router is unnamed + :var bool is_running: ***** router is running and currently usable + :var bool is_stable: ***** router is stable, i.e., it's suitable for for long-lived circuits + :var bool is_exit: ***** router is an exit router + :var bool is_fast: ***** router is Fast, i.e., it's usable for high-bandwidth circuits + :var bool is_authority: ***** router is a directory authority + :var bool supports_v2dir: ***** router supports v2dir + :var bool supports_v3dir: ***** router supports v3dir + :var bool is_hsdir: ***** router is a hidden status + :var bool is_badexit: ***** router is marked a bad exit + :var bool is_baddirectory: ***** router is a bad directory + + :var :class:`stem.version.Version`,str version: Version of the Tor protocol this router is running + + :var int bandwidth: router's claimed bandwidth + :var int measured_bandwidth: router's measured bandwidth + + :var :class:`stem.exit_policy.MicrodescriptorExitPolicy` exitpolicy: router's exitpolicy + + :var str mircodescriptor_hashes: "m" SP methods 1*(SP algorithm "=" digest) NL + + ***** attribute is either required when we're parsed with validation or has a default value, others are left as None if undefined + """ + + def __init__(self, raw_contents, vote = True, validate = True): + """ + Parse a router descriptor in a v3 network status document and provide a new + RouterDescriptor object. + + :param str raw_content: router descriptor content to be parsed + :param bool validate: whether the router descriptor should be validated + """ + + super(RouterDescriptor, self).__init__(raw_contents) + + self.nickname = None + self.identity = None + self.digest = None + self.publication = None + self.ip = None + self.orport = None + self.dirport = None + + self.is_valid = None + self.is_guard = None + self.is_named = None + self.is_unnamed = None + self.is_running = None + self.is_stable = None + self.is_exit = None + self.is_fast = None + self.is_authority = None + self.supports_v2dir = None + self.supports_v3dir = None + self.is_hsdir = None + self.is_badexit = None + self.is_baddirectory = None + + self.version = None + + self.bandwidth = None + self.measured_bandwidth = None + + self.exit_policy = None + + self.mircodescriptor_hashes = [] + + self._parse(raw_contents, vote, validate) + + def _parse(self, raw_content, vote, validate): + """ + :param dict raw_content: iptor contents to be applied + :param bool validate: checks the validity of descriptor content if True + + :raises: ValueError if an error occures in validation + """ + + parser = DocumentParser(raw_content, validate) + seen_keywords = set() + peek_check_kw = lambda keyword: keyword == parser.peek_keyword() + + r = parser.read_keyword_line("r") + # r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0 + # "r" SP nickname SP identity SP digest SP publication SP IP SP ORPort SP DirPort NL + seen_keywords.add("r") + if r: + values = r.split(" ") + self.nickname, self.identity, self.digest = values[0], values[1], values[2] + self.publication = _strptime(" ".join((values[3], values[4])), validate) + self.ip, self.orport, self.dirport = values[5], int(values[6]), int(values[7]) + if self.dirport == 0: self.dirport = None + elif validate: raise ValueError("Invalid router descriptor: empty 'r' line" ) + + while parser.line: + if peek_check_kw("s"): + if "s" in seen_keywords: raise ValueError("Invalid router descriptor: 's' line appears twice") + line = parser.read_keyword_line("s") + if not line: continue + seen_keywords.add("s") + # s Named Running Stable Valid + #A series of space-separated status flags, in *lexical order* + flags = line.split(" ") + flag_map = { + "Valid": "is_valid", + "Guard": "is_guard", + "Named": "is_named", + "Unnamed": "is_unnamed", + "Running": "is_running", + "Stable": "is_stable", + "Exit": "is_exit", + "Fast": "is_fast", + "Authority": "is_authority", + "V2Dir": "supports_v2dir", + "V3Dir": "supports_v3dir", + "HSDir": "is_hsdir", + "BadExit": "is_badexit", + "BadDirectory": "is_baddirectory", + } + map(lambda flag: setattr(self, flag_map[flag], True), flags) + + if self.is_unnamed: self.is_named = False + elif self.is_named: self.is_unnamed = False + + elif peek_check_kw("v"): + if "v" in seen_keywords: raise ValueError("Invalid router descriptor: 'v' line appears twice") + line = parser.read_keyword_line("v", True) + seen_keywords.add("v") + # v Tor 0.2.2.35 + if line: + if line.startswith("Tor "): + self.version = stem.version.Version(line[4:]) + else: + self.version = line + elif validate: raise ValueError("Invalid router descriptor: empty 'v' line" ) + + elif peek_check_kw("w"): + if "w" in seen_keywords: raise ValueError("Invalid router descriptor: 'w' line appears twice") + w = parser.read_keyword_line("w", True) + # "w" SP "Bandwidth=" INT [SP "Measured=" INT] NL + seen_keywords.add("w") + if w: + values = w.split(" ") + if len(values) <= 2 and len(values) > 0: + key, value = values[0].split("=") + if key == "Bandwidth": self.bandwidth = int(value) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Bandwidth, read " + key) + + if len(values) == 2: + key, value = values[1].split("=") + if key == "Measured=": self.measured_bandwidth = int(value) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line: expected Measured, read " + key) + elif validate: raise ValueError("Router descriptor contains invalid 'w' line") + elif validate: raise ValueError("Router descriptor contains empty 'w' line") + + elif peek_check_kw("p"): + if "p" in seen_keywords: raise ValueError("Invalid router descriptor: 'p' line appears twice") + p = parser.read_keyword_line("p", True) + seen_keywords.add("p") + # "p" SP ("accept" / "reject") SP PortList NL + if p: + self.exit_policy = stem.exit_policy.MicrodescriptorExitPolicy(p) + #self.exit_policy = p + + elif vote and peek_check_kw("m"): + # microdescriptor hashes + m = parser.read_keyword_line("m", True) + methods, digests = m.split(" ", 1) + method_list = methods.split(",") + digest_dict = [digest.split("=", 1) for digest in digests.split(" ")] + self.microdescriptor_hashes.append((method_list, digest_dict)) + + elif validate: + raise ValueError("Router descriptor contains unrecognized trailing lines: %s" % parser.line) + + else: + self._unrecognized_lines.append(parser.read_line()) # ignore unrecognized lines if we aren't validating + + def get_unrecognized_lines(self): + """ + Returns any unrecognized lines. + + :returns: a list of unrecognized lines + """ + + return self._unrecognized_lines +
tor-commits@lists.torproject.org