[stem/master] Move extrainfo descriptor parsing to helpers

commit b717bef5bb2a0d8a37fb154df28501af574c04df Author: Damian Johnson <atagar@torproject.org> Date: Wed Jan 14 10:40:27 2015 -0800 Move extrainfo descriptor parsing to helpers Intermediate point so we can do the same pattern of lazy loading I did for server descriptors. --- stem/descriptor/extrainfo_descriptor.py | 772 ++++++++++++++------------ test/unit/descriptor/extrainfo_descriptor.py | 28 +- 2 files changed, 431 insertions(+), 369 deletions(-) diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index d6e6102..408cc92 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -69,6 +69,7 @@ Extra-info descriptors are available from a few sources... ===================== =========== """ +import functools import hashlib import re @@ -225,6 +226,349 @@ def _parse_timestamp_and_interval(keyword, content): raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) +def _value(line, entries): + return entries[line][0][0] + + +def _values(line, entries): + return [entry[0] for entry in entries[line]] + + +def _parse_extra_info_line(descriptor, entries): + # "extra-info" Nickname Fingerprint + + value = _value('extra-info', entries) + extra_info_comp = value.split() + + if len(extra_info_comp) < 2: + raise ValueError('Extra-info line must have two values: extra-info %s' % value) + elif not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]): + raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0]) + elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]): + raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1]) + + descriptor.nickname = extra_info_comp[0] + descriptor.fingerprint = extra_info_comp[1] + + +def _parse_geoip_db_digest_line(descriptor, entries): + # "geoip-db-digest" Digest + + value = _value('geoip-db-digest', entries) + + if not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError('Geoip digest line had an invalid sha1 digest: geoip-db-digest %s' % value) + + descriptor.geoip_db_digest = value + + +def _parse_geoip6_db_digest_line(descriptor, entries): + # "geoip6-db-digest" Digest + + value = _value('geoip6-db-digest', entries) + + if not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError('Geoip v6 digest line had an invalid sha1 digest: geoip6-db-digest %s' % value) + + descriptor.geoip6_db_digest = value + + +def _parse_transport_line(descriptor, entries): + # "transport" transportname address:port [arglist] + # Everything after the transportname is scrubbed in published bridge + # descriptors, so we'll never see it in practice. + # + # These entries really only make sense for bridges, but have been seen + # on non-bridges in the wild when the relay operator configured it this + # way. + + for value in _values('transport', entries): + name, address, port, args = None, None, None, None + + if ' ' not in value: + # scrubbed + name = value + else: + # not scrubbed + value_comp = value.split() + + if len(value_comp) < 1: + raise ValueError('Transport line is missing its transport name: transport %s' % value) + elif len(value_comp) < 2: + raise ValueError('Transport line is missing its address:port value: transport %s' % value) + elif ':' not in value_comp[1]: + raise ValueError("Transport line's address:port entry is missing a colon: transport %s" % value) + + name = value_comp[0] + address, port_str = value_comp[1].split(':', 1) + + if not stem.util.connection.is_valid_ipv4_address(address) or \ + stem.util.connection.is_valid_ipv6_address(address): + raise ValueError('Transport line has a malformed address: transport %s' % value) + elif not stem.util.connection.is_valid_port(port_str): + raise ValueError('Transport line has a malformed port: transport %s' % value) + + port = int(port_str) + args = value_comp[2:] if len(value_comp) >= 3 else [] + + descriptor.transport[name] = (address, port, args) + + +def _parse_cell_circuits_per_decline_line(descriptor, entries): + # "cell-circuits-per-decile" num + + value = _value('cell-circuits-per-decile', entries) + + if not value.isdigit(): + raise ValueError('Non-numeric cell-circuits-per-decile value: %s' % value) + elif int(value) < 0: + raise ValueError('Negative cell-circuits-per-decile value: %s' % value) + + descriptor.cell_circuits_per_decile = int(value) + + +def _parse_dirreq_line(keyword, recognized_counts_attr, unrecognized_counts_attr, descriptor, entries): + value = _value(keyword, entries) + + recognized_counts = {} + unrecognized_counts = {} + + is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp') + key_set = DirResponse if is_response_stats else DirStat + + key_type = 'STATUS' if is_response_stats else 'STAT' + error_msg = '%s lines should contain %s=COUNT mappings: %s %s' % (keyword, key_type, keyword, value) + + if value: + for entry in value.split(','): + if '=' not in entry: + raise ValueError(error_msg) + + status, count = entry.split('=', 1) + + if count.isdigit(): + if status in key_set: + recognized_counts[status] = int(count) + else: + unrecognized_counts[status] = int(count) + else: + raise ValueError(error_msg) + + setattr(descriptor, recognized_counts_attr, recognized_counts) + setattr(descriptor, unrecognized_counts_attr, unrecognized_counts) + + +def _parse_dirreq_share_line(keyword, attribute, descriptor, entries): + value = _value(keyword, entries) + + if not value.endswith('%'): + raise ValueError('%s lines should be a percentage: %s %s' % (keyword, keyword, value)) + elif float(value[:-1]) < 0: + raise ValueError('Negative percentage value: %s %s' % (keyword, value)) + + # bug means it might be above 100%: https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html + + setattr(descriptor, attribute, float(value[:-1]) / 100) + + +def _parse_cell_line(keyword, attribute, descriptor, entries): + # "<keyword>" num,...,num + + value = _value(keyword, entries) + entries, exc = [], None + + if value: + for entry in value.split(','): + try: + # Values should be positive but as discussed in ticket #5849 + # there was a bug around this. It was fixed in tor 0.2.2.1. + + entries.append(float(entry)) + except ValueError: + exc = ValueError('Non-numeric entry in %s listing: %s %s' % (keyword, keyword, value)) + + setattr(descriptor, attribute, entries) + + if exc: + raise exc + + +def _parse_timestamp_line(keyword, attribute, descriptor, entries): + # "<keyword>" YYYY-MM-DD HH:MM:SS + + value = _value(keyword, entries) + + try: + setattr(descriptor, attribute, stem.util.str_tools._parse_timestamp(value)) + except ValueError: + raise ValueError("Timestamp on %s line wasn't parsable: %s %s" % (keyword, keyword, value)) + + +def _parse_timestamp_and_interval_line(keyword, end_attribute, interval_attribute, descriptor, entries): + # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) + + timestamp, interval, _ = _parse_timestamp_and_interval(keyword, _value(keyword, entries)) + setattr(descriptor, end_attribute, timestamp) + setattr(descriptor, interval_attribute, interval) + + +def _parse_conn_bi_direct_line(descriptor, entries): + # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH + + value = _value('conn-bi-direct', entries) + timestamp, interval, remainder = _parse_timestamp_and_interval('conn-bi-direct', value) + stats = remainder.split(',') + + if len(stats) != 4 or not (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()): + raise ValueError('conn-bi-direct line should end with four numeric values: conn-bi-direct %s' % value) + + descriptor.conn_bi_direct_end = timestamp + descriptor.conn_bi_direct_interval = interval + descriptor.conn_bi_direct_below = int(stats[0]) + descriptor.conn_bi_direct_read = int(stats[1]) + descriptor.conn_bi_direct_write = int(stats[2]) + descriptor.conn_bi_direct_both = int(stats[3]) + + +def _parse_history_line(keyword, end_attribute, interval_attribute, values_attribute, descriptor, entries): + # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... + + value = _value(keyword, entries) + timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) + history_values = [] + + if remainder: + try: + history_values = [int(entry) for entry in remainder.split(',')] + except ValueError: + raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value)) + + setattr(descriptor, end_attribute, timestamp) + setattr(descriptor, interval_attribute, interval) + setattr(descriptor, values_attribute, history_values) + + +def _parse_port_count_line(keyword, attribute, descriptor, entries): + # "<keyword>" port=N,port=N,... + + value, port_mappings = _value(keyword, entries), {} + error_msg = 'Entries in %s line should only be PORT=N entries: %s %s' % (keyword, keyword, value) + + if value: + for entry in value.split(','): + if '=' not in entry: + raise ValueError(error_msg) + + port, stat = entry.split('=', 1) + + if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit(): + if port != 'other': + port = int(port) + + port_mappings[port] = int(stat) + else: + raise ValueError(error_msg) + + setattr(descriptor, attribute, port_mappings) + + +def _parse_geoip_to_count_line(keyword, attribute, descriptor, entries): + # "<keyword>" CC=N,CC=N,... + # + # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric + # locale codes for some special values, for instance... + # A1,"Anonymous Proxy" + # A2,"Satellite Provider" + # ??,"Unknown" + + value, locale_usage = _value(keyword, entries), {} + error_msg = 'Entries in %s line should only be CC=N entries: %s %s' % (keyword, keyword, value) + + if value: + for entry in value.split(','): + if '=' not in entry: + raise ValueError(error_msg) + + locale, count = entry.split('=', 1) + + if _locale_re.match(locale) and count.isdigit(): + locale_usage[locale] = int(count) + else: + raise ValueError(error_msg) + + setattr(descriptor, attribute, locale_usage) + + +def _parse_bridge_ip_versions_line(descriptor, entries): + value, ip_versions = _value('bridge-ip-versions', entries), {} + + if value: + for entry in value.split(','): + if '=' not in entry: + raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-versions %s" % value) + + protocol, count = entry.split('=', 1) + + if not count.isdigit(): + raise stem.ProtocolError('IP protocol count was non-numeric (%s): bridge-ip-versions %s' % (count, value)) + + ip_versions[protocol] = int(count) + + descriptor.ip_versions = ip_versions + + +def _parse_bridge_ip_transports_line(descriptor, entries): + value, ip_transports = _value('bridge-ip-transports', entries), {} + + if value: + for entry in value.split(','): + if '=' not in entry: + raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-transports %s" % value) + + protocol, count = entry.split('=', 1) + + if not count.isdigit(): + raise stem.ProtocolError('Transport count was non-numeric (%s): bridge-ip-transports %s' % (count, value)) + + ip_transports[protocol] = int(count) + + descriptor.ip_transports = ip_transports + + +_parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown') +_parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown') +_parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown') +_parse_dirreq_v3_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-direct-dl', 'dir_v3_direct_dl', 'dir_v3_direct_dl_unknown') +_parse_dirreq_v2_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-tunneled-dl', 'dir_v2_tunneled_dl', 'dir_v2_tunneled_dl_unknown') +_parse_dirreq_v3_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-tunneled-dl', 'dir_v3_tunneled_dl', 'dir_v3_tunneled_dl_unknown') +_parse_dirreq_v2_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v2-share', 'dir_v2_share') +_parse_dirreq_v3_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v3-share', 'dir_v3_share') +_parse_cell_processed_cells_line = functools.partial(_parse_cell_line, 'cell-processed-cells', 'cell_processed_cells') +_parse_cell_queued_cells_line = functools.partial(_parse_cell_line, 'cell-queued-cells', 'cell_queued_cells') +_parse_cell_time_in_queue_line = functools.partial(_parse_cell_line, 'cell-time-in-queue', 'cell_time_in_queue') +_parse_published_line = functools.partial(_parse_timestamp_line, 'published', 'published') +_parse_geoip_start_time_line = functools.partial(_parse_timestamp_line, 'geoip-start-time', 'geoip_start_time') +_parse_cell_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'cell-stats-end', 'cell_stats_end', 'cell_stats_interval') +_parse_entry_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'entry-stats-end', 'entry_stats_end', 'entry_stats_interval') +_parse_exit_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'exit-stats-end', 'exit_stats_end', 'exit_stats_interval') +_parse_bridge_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'bridge-stats-end', 'bridge_stats_end', 'bridge_stats_interval') +_parse_dirreq_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'dirreq-stats-end', 'dir_stats_end', 'dir_stats_interval') +_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values') +_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values') +_parse_dirreq_read_history_line = functools.partial(_parse_history_line, 'dirreq-read-history', 'dir_read_history_end', 'dir_read_history_interval', 'dir_read_history_values') +_parse_dirreq_write_history_line = functools.partial(_parse_history_line, 'dirreq-write-history', 'dir_write_history_end', 'dir_write_history_interval', 'dir_write_history_values') +_parse_exit_kibibytes_written_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-written', 'exit_kibibytes_written') +_parse_exit_kibibytes_read_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-read', 'exit_kibibytes_read') +_parse_exit_streams_opened_line = functools.partial(_parse_port_count_line, 'exit-streams-opened', 'exit_streams_opened') +_parse_dirreq_v2_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-ips', 'dir_v2_ips') +_parse_dirreq_v3_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-ips', 'dir_v3_ips') +_parse_dirreq_v2_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-reqs', 'dir_v2_requests') +_parse_dirreq_v3_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-reqs', 'dir_v3_requests') +_parse_geoip_client_origins_line = functools.partial(_parse_geoip_to_count_line, 'geoip-client-origins', 'geoip_client_origins') +_parse_entry_ips_line = functools.partial(_parse_geoip_to_count_line, 'entry-ips', 'entry_ips') +_parse_bridge_ips_line = functools.partial(_parse_geoip_to_count_line, 'bridge-ips', 'bridge_ips') + + class ExtraInfoDescriptor(Descriptor): """ Extra-info descriptor document. @@ -465,376 +809,94 @@ class ExtraInfoDescriptor(Descriptor): value, _, _ = values[0] line = '%s %s' % (keyword, value) # original line - if keyword == 'extra-info': - # "extra-info" Nickname Fingerprint - extra_info_comp = value.split() - - if len(extra_info_comp) < 2: - if not validate: - continue - - raise ValueError('Extra-info line must have two values: %s' % line) - - if validate: - if not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]): - raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0]) - elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]): - raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1]) - - self.nickname = extra_info_comp[0] - self.fingerprint = extra_info_comp[1] - elif keyword == 'geoip-db-digest': - # "geoip-db-digest" Digest - - if validate and not stem.util.tor_tools.is_hex_digits(value, 40): - raise ValueError('Geoip digest line had an invalid sha1 digest: %s' % line) - - self.geoip_db_digest = value - elif keyword == 'geoip6-db-digest': - # "geoip6-db-digest" Digest - - if validate and not stem.util.tor_tools.is_hex_digits(value, 40): - raise ValueError('Geoip v6 digest line had an invalid sha1 digest: %s' % line) - - self.geoip6_db_digest = value - elif keyword == 'transport': - # "transport" transportname address:port [arglist] - # Everything after the transportname is scrubbed in published bridge - # descriptors, so we'll never see it in practice. - # - # These entries really only make sense for bridges, but have been seen - # on non-bridges in the wild when the relay operator configured it this - # way. - - for transport_value, _, _ in values: - name, address, port, args = None, None, None, None - - if ' ' not in transport_value: - # scrubbed - name = transport_value - else: - # not scrubbed - value_comp = transport_value.split() - - if len(value_comp) < 1: - raise ValueError('Transport line is missing its transport name: %s' % line) - else: - name = value_comp[0] - - if len(value_comp) < 2: - raise ValueError('Transport line is missing its address:port value: %s' % line) - elif ':' not in value_comp[1]: - raise ValueError("Transport line's address:port entry is missing a colon: %s" % line) - else: - address, port_str = value_comp[1].split(':', 1) - - if not stem.util.connection.is_valid_ipv4_address(address) or \ - stem.util.connection.is_valid_ipv6_address(address): - raise ValueError('Transport line has a malformed address: %s' % line) - elif not stem.util.connection.is_valid_port(port_str): - raise ValueError('Transport line has a malformed port: %s' % line) - - port = int(port_str) - - if len(value_comp) >= 3: - args = value_comp[2:] - else: - args = [] - - self.transport[name] = (address, port, args) - elif keyword == 'cell-circuits-per-decile': - # "cell-circuits-per-decile" num - - if not value.isdigit(): - if validate: - raise ValueError('Non-numeric cell-circuits-per-decile value: %s' % line) - else: - continue - - stat = int(value) - - if validate and stat < 0: - raise ValueError('Negative cell-circuits-per-decile value: %s' % line) - - self.cell_circuits_per_decile = stat - elif keyword in ('dirreq-v2-resp', 'dirreq-v3-resp', 'dirreq-v2-direct-dl', 'dirreq-v3-direct-dl', 'dirreq-v2-tunneled-dl', 'dirreq-v3-tunneled-dl'): - recognized_counts = {} - unrecognized_counts = {} - - is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp') - key_set = DirResponse if is_response_stats else DirStat - - key_type = 'STATUS' if is_response_stats else 'STAT' - error_msg = '%s lines should contain %s=COUNT mappings: %s' % (keyword, key_type, line) - - if value: - for entry in value.split(','): - if '=' not in entry: - if validate: - raise ValueError(error_msg) - else: - continue - - status, count = entry.split('=', 1) - - if count.isdigit(): - if status in key_set: - recognized_counts[status] = int(count) - else: - unrecognized_counts[status] = int(count) - elif validate: - raise ValueError(error_msg) - - if keyword == 'dirreq-v2-resp': - self.dir_v2_responses = recognized_counts - self.dir_v2_responses_unknown = unrecognized_counts + try: + if keyword == 'extra-info': + _parse_extra_info_line(self, entries) + elif keyword == 'geoip-db-digest': + _parse_geoip_db_digest_line(self, entries) + elif keyword == 'geoip6-db-digest': + _parse_geoip6_db_digest_line(self, entries) + elif keyword == 'transport': + _parse_transport_line(self, entries) + elif keyword == 'cell-circuits-per-decile': + _parse_cell_circuits_per_decline_line(self, entries) + elif keyword == 'dirreq-v2-resp': + _parse_dirreq_v2_resp_line(self, entries) elif keyword == 'dirreq-v3-resp': - self.dir_v3_responses = recognized_counts - self.dir_v3_responses_unknown = unrecognized_counts + _parse_dirreq_v3_resp_line(self, entries) elif keyword == 'dirreq-v2-direct-dl': - self.dir_v2_direct_dl = recognized_counts - self.dir_v2_direct_dl_unknown = unrecognized_counts + _parse_dirreq_v2_direct_dl_line(self, entries) elif keyword == 'dirreq-v3-direct-dl': - self.dir_v3_direct_dl = recognized_counts - self.dir_v3_direct_dl_unknown = unrecognized_counts + _parse_dirreq_v3_direct_dl_line(self, entries) elif keyword == 'dirreq-v2-tunneled-dl': - self.dir_v2_tunneled_dl = recognized_counts - self.dir_v2_tunneled_dl_unknown = unrecognized_counts + _parse_dirreq_v2_tunneled_dl_line(self, entries) elif keyword == 'dirreq-v3-tunneled-dl': - self.dir_v3_tunneled_dl = recognized_counts - self.dir_v3_tunneled_dl_unknown = unrecognized_counts - elif keyword in ('dirreq-v2-share', 'dirreq-v3-share'): - # "<keyword>" num% - - try: - if not value.endswith('%'): - raise ValueError() - - percentage = float(value[:-1]) / 100 - - # Bug lets these be above 100%, however they're soon going away... - # https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html - - if validate and percentage < 0: - raise ValueError('Negative percentage value: %s' % line) - - if keyword == 'dirreq-v2-share': - self.dir_v2_share = percentage - elif keyword == 'dirreq-v3-share': - self.dir_v3_share = percentage - except ValueError as exc: - if validate: - raise ValueError("Value can't be parsed as a percentage: %s" % line) - elif keyword in ('cell-processed-cells', 'cell-queued-cells', 'cell-time-in-queue'): - # "<keyword>" num,...,num - - entries = [] - - if value: - for entry in value.split(','): - try: - # Values should be positive but as discussed in ticket #5849 - # there was a bug around this. It was fixed in tor 0.2.2.1. - - entries.append(float(entry)) - except ValueError: - if validate: - raise ValueError('Non-numeric entry in %s listing: %s' % (keyword, line)) - - if keyword == 'cell-processed-cells': - self.cell_processed_cells = entries + _parse_dirreq_v3_tunneled_dl_line(self, entries) + elif keyword == 'dirreq-v2-share': + _parse_dirreq_v2_share_line(self, entries) + elif keyword == 'dirreq-v3-share': + _parse_dirreq_v3_share_line(self, entries) + elif keyword == 'cell-processed-cells': + _parse_cell_processed_cells_line(self, entries) elif keyword == 'cell-queued-cells': - self.cell_queued_cells = entries + _parse_cell_queued_cells_line(self, entries) elif keyword == 'cell-time-in-queue': - self.cell_time_in_queue = entries - elif keyword in ('published', 'geoip-start-time'): - # "<keyword>" YYYY-MM-DD HH:MM:SS - - try: - timestamp = stem.util.str_tools._parse_timestamp(value) - - if keyword == 'published': - self.published = timestamp - elif keyword == 'geoip-start-time': - self.geoip_start_time = timestamp - except ValueError: - if validate: - raise ValueError("Timestamp on %s line wasn't parsable: %s" % (keyword, line)) - elif keyword in ('cell-stats-end', 'entry-stats-end', 'exit-stats-end', 'bridge-stats-end', 'dirreq-stats-end'): - # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) - - try: - timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value) - - if keyword == 'cell-stats-end': - self.cell_stats_end = timestamp - self.cell_stats_interval = interval - elif keyword == 'entry-stats-end': - self.entry_stats_end = timestamp - self.entry_stats_interval = interval - elif keyword == 'exit-stats-end': - self.exit_stats_end = timestamp - self.exit_stats_interval = interval - elif keyword == 'bridge-stats-end': - self.bridge_stats_end = timestamp - self.bridge_stats_interval = interval - elif keyword == 'dirreq-stats-end': - self.dir_stats_end = timestamp - self.dir_stats_interval = interval - except ValueError as exc: - if validate: - raise exc - elif keyword == 'conn-bi-direct': - # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH - - try: - timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) - stats = remainder.split(',') - - if len(stats) != 4 or not \ - (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()): - raise ValueError('conn-bi-direct line should end with four numeric values: %s' % line) - - self.conn_bi_direct_end = timestamp - self.conn_bi_direct_interval = interval - self.conn_bi_direct_below = int(stats[0]) - self.conn_bi_direct_read = int(stats[1]) - self.conn_bi_direct_write = int(stats[2]) - self.conn_bi_direct_both = int(stats[3]) - except ValueError as exc: - if validate: - raise exc - elif keyword in ('read-history', 'write-history', 'dirreq-read-history', 'dirreq-write-history'): - # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... - try: - timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value) - history_values = [] - - if remainder: - try: - history_values = [int(entry) for entry in remainder.split(",")] - except ValueError: - raise ValueError('%s line has non-numeric values: %s' % (keyword, line)) - - if keyword == 'read-history': - self.read_history_end = timestamp - self.read_history_interval = interval - self.read_history_values = history_values - elif keyword == 'write-history': - self.write_history_end = timestamp - self.write_history_interval = interval - self.write_history_values = history_values - elif keyword == 'dirreq-read-history': - self.dir_read_history_end = timestamp - self.dir_read_history_interval = interval - self.dir_read_history_values = history_values - elif keyword == 'dirreq-write-history': - self.dir_write_history_end = timestamp - self.dir_write_history_interval = interval - self.dir_write_history_values = history_values - except ValueError as exc: - if validate: - raise exc - elif keyword in ('exit-kibibytes-written', 'exit-kibibytes-read', 'exit-streams-opened'): - # "<keyword>" port=N,port=N,... - - port_mappings = {} - error_msg = 'Entries in %s line should only be PORT=N entries: %s' % (keyword, line) - - if value: - for entry in value.split(','): - if '=' not in entry: - if validate: - raise ValueError(error_msg) - else: - continue - - port, stat = entry.split('=', 1) - - if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit(): - if port != 'other': - port = int(port) - port_mappings[port] = int(stat) - elif validate: - raise ValueError(error_msg) - - if keyword == 'exit-kibibytes-written': - self.exit_kibibytes_written = port_mappings + _parse_cell_time_in_queue_line(self, entries) + elif keyword == 'published': + _parse_published_line(self, entries) + elif keyword == 'geoip-start-time': + _parse_geoip_start_time_line(self, entries) + elif keyword == 'cell-stats-end': + _parse_cell_stats_end_line(self, entries) + elif keyword == 'entry-stats-end': + _parse_entry_stats_end_line(self, entries) + elif keyword == 'exit-stats-end': + _parse_exit_stats_end_line(self, entries) + elif keyword == 'bridge-stats-end': + _parse_bridge_stats_end_line(self, entries) + elif keyword == 'dirreq-stats-end': + _parse_dirreq_stats_end_line(self, entries) + elif keyword == 'conn-bi-direct': + _parse_conn_bi_direct_line(self, entries) + elif keyword == 'read-history': + _parse_read_history_line(self, entries) + elif keyword == 'write-history': + _parse_write_history_line(self, entries) + elif keyword == 'dirreq-read-history': + _parse_dirreq_read_history_line(self, entries) + elif keyword == 'dirreq-write-history': + _parse_dirreq_write_history_line(self, entries) + elif keyword == 'exit-kibibytes-written': + _parse_exit_kibibytes_written_line(self, entries) elif keyword == 'exit-kibibytes-read': - self.exit_kibibytes_read = port_mappings + _parse_exit_kibibytes_read_line(self, entries) elif keyword == 'exit-streams-opened': - self.exit_streams_opened = port_mappings - elif keyword in ('dirreq-v2-ips', 'dirreq-v3-ips', 'dirreq-v2-reqs', 'dirreq-v3-reqs', 'geoip-client-origins', 'entry-ips', 'bridge-ips'): - # "<keyword>" CC=N,CC=N,... - # - # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric - # locale codes for some special values, for instance... - # A1,"Anonymous Proxy" - # A2,"Satellite Provider" - # ??,"Unknown" - - locale_usage = {} - error_msg = 'Entries in %s line should only be CC=N entries: %s' % (keyword, line) - - if value: - for entry in value.split(','): - if '=' not in entry: - if validate: - raise ValueError(error_msg) - else: - continue - - locale, count = entry.split('=', 1) - - if _locale_re.match(locale) and count.isdigit(): - locale_usage[locale] = int(count) - elif validate: - raise ValueError(error_msg) - - if keyword == 'dirreq-v2-ips': - self.dir_v2_ips = locale_usage + _parse_exit_streams_opened_line(self, entries) + elif keyword == 'dirreq-v2-ips': + _parse_dirreq_v2_ips_line(self, entries) elif keyword == 'dirreq-v3-ips': - self.dir_v3_ips = locale_usage + _parse_dirreq_v3_ips_line(self, entries) elif keyword == 'dirreq-v2-reqs': - self.dir_v2_requests = locale_usage + _parse_dirreq_v2_reqs_line(self, entries) elif keyword == 'dirreq-v3-reqs': - self.dir_v3_requests = locale_usage + _parse_dirreq_v3_reqs_line(self, entries) elif keyword == 'geoip-client-origins': - self.geoip_client_origins = locale_usage + _parse_geoip_client_origins_line(self, entries) elif keyword == 'entry-ips': - self.entry_ips = locale_usage + _parse_entry_ips_line(self, entries) elif keyword == 'bridge-ips': - self.bridge_ips = locale_usage - elif keyword == 'bridge-ip-versions': - self.ip_versions = {} - - if value: - for entry in value.split(','): - if '=' not in entry: - raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line) - - protocol, count = entry.split('=', 1) - - if not count.isdigit(): - raise stem.ProtocolError('IP protocol count was non-numeric (%s): %s' % (count, line)) - - self.ip_versions[protocol] = int(count) - elif keyword == 'bridge-ip-transports': - self.ip_transports = {} - - if value: - for entry in value.split(','): - if '=' not in entry: - raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: %s" % line) - - protocol, count = entry.split('=', 1) - - if not count.isdigit(): - raise stem.ProtocolError('Transport count was non-numeric (%s): %s' % (count, line)) - - self.ip_transports[protocol] = int(count) - else: - self._unrecognized_lines.append(line) + _parse_bridge_ips_line(self, entries) + elif keyword == 'bridge-ip-versions': + _parse_bridge_ip_versions_line(self, entries) + elif keyword == 'bridge-ip-transports': + _parse_bridge_ip_transports_line(self, entries) + else: + self._unrecognized_lines.append(line) + except ValueError as exc: + if validate: + raise exc + else: + continue def digest(self): """ diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index 7e67019..525ff06 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -200,10 +200,10 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({'geoip-db-digest': entry}, content = True) - self._expect_invalid_attr(desc_text, 'geoip_db_digest', entry) + self._expect_invalid_attr(desc_text, 'geoip_db_digest') desc_text = get_relay_extrainfo_descriptor({'geoip6-db-digest': entry}, content = True) - self._expect_invalid_attr(desc_text, 'geoip6_db_digest', entry) + self._expect_invalid_attr(desc_text, 'geoip6_db_digest') def test_cell_circuits_per_decile(self): """ @@ -257,8 +257,8 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) desc = self._expect_invalid_attr(desc_text) - self.assertEqual({}, getattr(desc, attr)) - self.assertEqual({}, getattr(desc, unknown_attr)) + self.assertEqual(None, getattr(desc, attr)) + self.assertEqual(None, getattr(desc, unknown_attr)) def test_dir_stat_lines(self): """ @@ -299,8 +299,8 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) desc = self._expect_invalid_attr(desc_text) - self.assertEqual({}, getattr(desc, attr)) - self.assertEqual({}, getattr(desc, unknown_attr)) + self.assertEqual(None, getattr(desc, attr)) + self.assertEqual(None, getattr(desc, unknown_attr)) def test_conn_bi_direct(self): """ @@ -360,15 +360,15 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw self.assertEqual(expected_value, getattr(desc, attr)) test_entries = ( - ('', None), - (' ', None), - ('100', None), - ('-5%', -0.05), + (''), + (' '), + ('100'), + ('-5%'), ) - for entry, expected in test_entries: + for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) - self._expect_invalid_attr(desc_text, attr, expected) + self._expect_invalid_attr(desc_text, attr) def test_number_list_lines(self): """ @@ -525,7 +525,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) - self._expect_invalid_attr(desc_text, attr, {}) + self._expect_invalid_attr(desc_text, attr) def test_locale_mapping_lines(self): """ @@ -554,7 +554,7 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw for entry in test_entries: desc_text = get_relay_extrainfo_descriptor({keyword: entry}, content = True) - self._expect_invalid_attr(desc_text, attr, {}) + self._expect_invalid_attr(desc_text, attr) def test_minimal_bridge_descriptor(self): """
participants (1)
-
atagar@torproject.org