January 2015 - tor-commits - lists.torproject.org

[stem/master] Transport parsing wasn't idempotent
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 8870510e4194da6c7ee8fb143e898a7c83932234 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jan 18 09:57:03 2015 -0800 Transport parsing wasn't idempotent Oops, this parsing function appended to the descriptor's existing dictionary rather than assigning one of its own. Assuming it ran over the same content this wasn't an issue in practice since it would clobber the existing results, but still not rights. --- stem/descriptor/extrainfo_descriptor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 124ce16..1eebe46 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -256,6 +256,8 @@ def _parse_transport_line(descriptor, entries): # on non-bridges in the wild when the relay operator configured it this # way. + transports = {} + for value in _values('transport', entries): name, address, port, args = None, None, None, None @@ -285,7 +287,9 @@ def _parse_transport_line(descriptor, entries): port = int(port_str) args = value_comp[2:] if len(value_comp) >= 3 else [] - descriptor.transport[name] = (address, port, args) + transports[name] = (address, port, args) + + descriptor.transport = transports def _parse_cell_circuits_per_decline_line(descriptor, entries):

1 0

[stem/master] Moving common parsing helpers to descriptor module
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 79eee85c1b4cac47d91d6a2dfa29867822f1bf31 Author: Damian Johnson <atagar(a)torproject.org> Date: Sat Jan 17 15:31:15 2015 -0800 Moving common parsing helpers to descriptor module Moving a couple common helpers to the common descriptor __init__.py module. --- stem/descriptor/__init__.py | 32 +++++++++++++++ stem/descriptor/extrainfo_descriptor.py | 60 +++++------------------------ stem/descriptor/server_descriptor.py | 60 ++++++++--------------------- test/unit/descriptor/server_descriptor.py | 2 +- 4 files changed, 58 insertions(+), 96 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 0baacdb..bc1b462 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -311,6 +311,38 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version)) +def _value(line, entries): + return entries[line][0][0] + + +def _values(line, entries): + return [entry[0] for entry in entries[line]] + + +def _parse_sha1_digest_line(keyword, attribute): + def _parse(descriptor, entries): + value = _value(keyword, entries) + + if not stem.util.tor_tools.is_hex_digits(value, 40): + raise ValueError('%s line had an invalid sha1 digest: %s %s' % (keyword, keyword, value)) + + setattr(descriptor, attribute, value) + + return _parse + + +def _parse_key_block(keyword, attribute, expected_block_type): + def _parse(descriptor, entries): + value, block_type, block_contents = entries[keyword][0] + + if not block_contents or block_type != expected_block_type: + raise ValueError("'%s' should be followed by a %s block" % (keyword, expected_block_type)) + + setattr(descriptor, attribute, block_contents) + + return _parse + + class Descriptor(object): """ Common parent for all types of descriptors. diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 97623e7..9e86798 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -81,6 +81,10 @@ from stem.descriptor import ( Descriptor, _read_until_keywords, _get_descriptor_components, + _value, + _values, + _parse_sha1_digest_line, + _parse_key_block, ) try: @@ -225,14 +229,6 @@ def _parse_timestamp_and_interval(keyword, content): raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) -def _value(line, entries): - return entries[line][0][0] - - -def _values(line, entries): - return [entry[0] for entry in entries[line]] - - def _parse_extra_info_line(descriptor, entries): # "extra-info" Nickname Fingerprint @@ -250,28 +246,6 @@ def _parse_extra_info_line(descriptor, entries): descriptor.fingerprint = extra_info_comp[1] -def _parse_geoip_db_digest_line(descriptor, entries): - # "geoip-db-digest" Digest - - value = _value('geoip-db-digest', entries) - - if not stem.util.tor_tools.is_hex_digits(value, 40): - raise ValueError('Geoip digest line had an invalid sha1 digest: geoip-db-digest %s' % value) - - descriptor.geoip_db_digest = value - - -def _parse_geoip6_db_digest_line(descriptor, entries): - # "geoip6-db-digest" Digest - - value = _value('geoip6-db-digest', entries) - - if not stem.util.tor_tools.is_hex_digits(value, 40): - raise ValueError('Geoip v6 digest line had an invalid sha1 digest: geoip6-db-digest %s' % value) - - descriptor.geoip6_db_digest = value - - def _parse_transport_line(descriptor, entries): # "transport" transportname address:port [arglist] # Everything after the transportname is scrubbed in published bridge @@ -534,24 +508,8 @@ def _parse_bridge_ip_transports_line(descriptor, entries): descriptor.ip_transports = ip_transports -def _parse_router_signature_line(descriptor, entries): - value, block_type, block_contents = entries['router-signature'][0] - - if not block_contents or block_type != 'SIGNATURE': - raise ValueError("'router-signature' should be followed by a SIGNATURE block rather than a '%s'" % block_type) - - descriptor.signature = block_contents - - -def _parse_router_digest(descriptor, entries): - value = _value('router-digest', entries) - - if not stem.util.tor_tools.is_hex_digits(value, 40): - raise ValueError('Router digest line had an invalid sha1 digest: router-digest %s' % value) - - descriptor._digest = value - - +_parse_geoip_db_digest_line = _parse_sha1_digest_line('geoip-db-digest', 'geoip_db_digest') +_parse_geoip6_db_digest_line = _parse_sha1_digest_line('geoip6-db-digest', 'geoip6_db_digest') _parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown') _parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown') _parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown') @@ -584,6 +542,8 @@ _parse_dirreq_v3_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirr _parse_geoip_client_origins_line = functools.partial(_parse_geoip_to_count_line, 'geoip-client-origins', 'geoip_client_origins') _parse_entry_ips_line = functools.partial(_parse_geoip_to_count_line, 'entry-ips', 'entry_ips') _parse_bridge_ips_line = functools.partial(_parse_geoip_to_count_line, 'bridge-ips', 'bridge_ips') +_parse_router_digest_line = _parse_sha1_digest_line('router-digest', '_digest') +_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE') class ExtraInfoDescriptor(Descriptor): @@ -908,11 +868,11 @@ class BridgeExtraInfoDescriptor(ExtraInfoDescriptor): """ ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{ - '_digest': (None, _parse_router_digest), + '_digest': (None, _parse_router_digest_line), }) PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{ - 'router-digest': _parse_router_digest, + 'router-digest': _parse_router_digest_line, }) def digest(self): diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 5a2ca1e..b6af898 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -33,6 +33,7 @@ etc). This information is provided from a few sources... import base64 import codecs +import functools import hashlib import re @@ -53,6 +54,10 @@ from stem.descriptor import ( _get_bytes_field, _get_descriptor_components, _read_until_keywords, + _value, + _values, + _parse_sha1_digest_line, + _parse_key_block, ) try: @@ -166,14 +171,6 @@ def _parse_file(descriptor_file, is_bridge = False, validate = True, **kwargs): break # done parsing descriptors -def _value(line, entries): - return entries[line][0][0] - - -def _values(line, entries): - return [entry[0] for entry in entries[line]] - - def _parse_router_line(descriptor, entries): # "router" nickname address ORPort SocksPort DirPort @@ -364,23 +361,7 @@ def _parse_or_address_line(descriptor, entries): descriptor.or_addresses = or_addresses -def _parse_read_history_line(descriptor, entries): - timestamp, interval, history_values = _parse_history_line(descriptor, entries, 'read-history') - - descriptor.read_history_end = timestamp - descriptor.read_history_interval = interval - descriptor.read_history_values = history_values - - -def _parse_write_history_line(descriptor, entries): - timestamp, interval, history_values = _parse_history_line(descriptor, entries, 'write-history') - - descriptor.write_history_end = timestamp - descriptor.write_history_interval = interval - descriptor.write_history_values = history_values - - -def _parse_history_line(descriptor, entries, keyword): +def _parse_history_line(keyword, history_end_attribute, history_interval_attribute, history_values_attribute, descriptor, entries): value = _value(keyword, entries) timestamp, interval, remainder = stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value) @@ -392,23 +373,9 @@ def _parse_history_line(descriptor, entries, keyword): except ValueError: raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value)) - return timestamp, interval, history_values - - -def _parse_router_digest_line(descriptor, entries): - descriptor._digest = _value('router-digest', entries) - - if not stem.util.tor_tools.is_hex_digits(descriptor._digest, 40): - raise ValueError('Router digest line had an invalid sha1 digest: router-digest %s' % descriptor._digest) - - -def _key_block(entries, keyword, expected_block_type): - value, block_type, block_contents = entries[keyword][0] - - if not block_contents or block_type != expected_block_type: - raise ValueError("'%s' should be followed by a %s block" % (keyword, expected_block_type)) - - return block_contents + setattr(descriptor, history_end_attribute, timestamp) + setattr(descriptor, history_interval_attribute, interval) + setattr(descriptor, history_values_attribute, history_values) def _parse_exit_policy(descriptor, entries): @@ -421,15 +388,18 @@ def _parse_exit_policy(descriptor, entries): del descriptor._unparsed_exit_policy +_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values') +_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values') _parse_ipv6_policy_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('ipv6-policy', entries))) _parse_allow_single_hop_exits_line = lambda descriptor, entries: setattr(descriptor, 'allow_single_hop_exits', True) _parse_caches_extra_info_line = lambda descriptor, entries: setattr(descriptor, 'extra_info_cache', True) _parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', set(_value('family', entries).split(' '))) _parse_eventdns_line = lambda descriptor, entries: setattr(descriptor, 'eventdns', _value('eventdns', entries) == '1') -_parse_onion_key_line = lambda descriptor, entries: setattr(descriptor, 'onion_key', _key_block(entries, 'onion-key', 'RSA PUBLIC KEY')) -_parse_signing_key_line = lambda descriptor, entries: setattr(descriptor, 'signing_key', _key_block(entries, 'signing-key', 'RSA PUBLIC KEY')) -_parse_router_signature_line = lambda descriptor, entries: setattr(descriptor, 'signature', _key_block(entries, 'router-signature', 'SIGNATURE')) +_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY') +_parse_signing_key_line = _parse_key_block('signing-key', 'signing_key', 'RSA PUBLIC KEY') +_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE') _parse_ntor_onion_key_line = lambda descriptor, entries: setattr(descriptor, 'ntor_onion_key', _value('ntor-onion-key', entries)) +_parse_router_digest_line = _parse_sha1_digest_line('router-digest', '_digest') class ServerDescriptor(Descriptor): diff --git a/test/unit/descriptor/server_descriptor.py b/test/unit/descriptor/server_descriptor.py index 61654a7..f261798 100644 --- a/test/unit/descriptor/server_descriptor.py +++ b/test/unit/descriptor/server_descriptor.py @@ -672,7 +672,7 @@ Qlx9HNCqCY877ztFRC624ja2ql6A2hBcuoYMbkHjcQ4= self.assertRaises(ValueError, BridgeDescriptor, desc_text) desc = BridgeDescriptor(desc_text, validate = False) - self.assertEqual(value, desc.digest()) + self.assertEqual(None, desc.digest()) def test_or_address_v4(self): """

1 0

[stem/master] Microdescriptor lazy loading
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit f19d87ab0f16928be370114c993f512b3d2dfac5 Author: Damian Johnson <atagar(a)torproject.org> Date: Sat Jan 17 17:36:54 2015 -0800 Microdescriptor lazy loading Simplest descriptor type so pretty simple switch. --- stem/descriptor/__init__.py | 4 +- stem/descriptor/extrainfo_descriptor.py | 3 +- stem/descriptor/microdescriptor.py | 115 ++++++++++++++----------------- stem/descriptor/server_descriptor.py | 6 +- 4 files changed, 56 insertions(+), 72 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index bc1b462..ef96dd7 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -351,11 +351,11 @@ class Descriptor(object): ATTRIBUTES = {} # mapping of 'attribute' => (default_value, parsing_function) PARSER_FOR_LINE = {} # line keyword to its associated parsing function - def __init__(self, contents): + def __init__(self, contents, lazy_load = False): self._path = None self._archive_path = None self._raw_contents = contents - self._lazy_loading = False + self._lazy_loading = lazy_load self._unrecognized_lines = [] def get_path(self): diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 9e86798..4137e47 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -786,11 +786,10 @@ class ExtraInfoDescriptor(Descriptor): :raises: **ValueError** if the contents is malformed and validate is True """ - super(ExtraInfoDescriptor, self).__init__(raw_contents) + super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate) raw_contents = stem.util.str_tools._to_unicode(raw_contents) entries = _get_descriptor_components(raw_contents, validate) - self._lazy_loading = not validate if validate: for keyword in self._required_fields(): diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index 093e6f0..0590cbb 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -73,6 +73,9 @@ from stem.descriptor import ( Descriptor, _get_descriptor_components, _read_until_keywords, + _value, + _values, + _parse_key_block, ) try: @@ -151,6 +154,29 @@ def _parse_file(descriptor_file, validate = True, **kwargs): break # done parsing descriptors +def _parse_a_line(descriptor, entries): + for value in _values('a', entries): + stem.descriptor.router_status_entry._parse_a_line(descriptor, value, True) + + +def _parse_id_line(descriptor, entries): + value = _value('id', entries) + value_comp = value.split() + + if len(value_comp) >= 2: + descriptor.identifier_type = value_comp[0] + descriptor.identifier = value_comp[1] + else: + raise ValueError("'id' lines should contain both the key type and digest: id %s" % value) + + +_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY') +_parse_ntor_onion_key_line = lambda descriptor, entries: setattr(descriptor, 'ntor_onion_key', _value('ntor-onion-key', entries)) +_parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', _value('family', entries).split(' ')) +_parse_p_line = lambda descriptor, entries: stem.descriptor.router_status_entry._parse_p_line(descriptor, _value('p', entries), True) +_parse_p6_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('p6', entries))) + + class Microdescriptor(Descriptor): """ Microdescriptor (`descriptor specification @@ -173,33 +199,41 @@ class Microdescriptor(Descriptor): **\*** attribute is required when we're parsed with validation """ + ATTRIBUTES = { + 'onion_key': (None, _parse_onion_key_line), + 'ntor_onion_key': (None, _parse_ntor_onion_key_line), + 'or_addresses': ([], _parse_a_line), + 'family': ([], _parse_family_line), + 'exit_policy': (stem.exit_policy.MicroExitPolicy('reject 1-65535'), _parse_p_line), + 'exit_policy_v6': (None, _parse_p6_line), + 'identifier_type': (None, _parse_id_line), + 'identifier': (None, _parse_id_line), + } + + PARSER_FOR_LINE = { + 'onion-key': _parse_onion_key_line, + 'ntor-onion-key': _parse_ntor_onion_key_line, + 'a': _parse_a_line, + 'family': _parse_family_line, + 'p': _parse_p_line, + 'p6': _parse_p6_line, + 'id': _parse_id_line, + } + def __init__(self, raw_contents, validate = True, annotations = None): - super(Microdescriptor, self).__init__(raw_contents) + super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate) raw_contents = stem.util.str_tools._to_unicode(raw_contents) self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper() - - self.onion_key = None - self.ntor_onion_key = None - self.or_addresses = [] - self.family = [] - self.exit_policy = stem.exit_policy.MicroExitPolicy('reject 1-65535') - self.exit_policy_v6 = None - self.identifier_type = None - self.identifier = None - - self._unrecognized_lines = [] - self._annotation_lines = annotations if annotations else [] entries = _get_descriptor_components(raw_contents, validate) - self._parse(entries, validate) if validate: + self._parse(entries, validate) self._check_constraints(entries) - - def get_unrecognized_lines(self): - return list(self._unrecognized_lines) + else: + self._entries = entries @lru_cache() def get_annotations(self): @@ -237,53 +271,6 @@ class Microdescriptor(Descriptor): return self._annotation_lines - def _parse(self, entries, validate): - """ - Parses a series of 'keyword => (value, pgp block)' mappings and applies - them as attributes. - - :param dict entries: descriptor contents to be applied - :param bool validate: checks the validity of descriptor content if **True** - - :raises: **ValueError** if an error occurs in validation - """ - - for keyword, values in list(entries.items()): - # most just work with the first (and only) value - value, block_type, block_contents = values[0] - - line = '%s %s' % (keyword, value) # original line - - if block_contents: - line += '\n%s' % block_contents - - if keyword == 'onion-key': - if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'): - raise ValueError("'onion-key' should be followed by a RSA PUBLIC KEY block: %s" % line) - - self.onion_key = block_contents - elif keyword == 'ntor-onion-key': - self.ntor_onion_key = value - elif keyword == 'a': - for entry, _, _ in values: - stem.descriptor.router_status_entry._parse_a_line(self, entry, validate) - elif keyword == 'family': - self.family = value.split(' ') - elif keyword == 'p': - stem.descriptor.router_status_entry._parse_p_line(self, value, validate) - elif keyword == 'p6': - self.exit_policy_v6 = stem.exit_policy.MicroExitPolicy(value) - elif keyword == 'id': - value_comp = value.split() - - if len(value_comp) >= 2: - self.identifier_type = value_comp[0] - self.identifier = value_comp[1] - elif validate: - raise ValueError("'id' lines should contain both the key type and digest: %s" % line) - else: - self._unrecognized_lines.append(line) - def _check_constraints(self, entries): """ Does a basic check that the entries conform to this descriptor type's diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index b6af898..a696d6a 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -533,7 +533,7 @@ class ServerDescriptor(Descriptor): :raises: **ValueError** if the contents is malformed and validate is True """ - super(ServerDescriptor, self).__init__(raw_contents) + super(ServerDescriptor, self).__init__(raw_contents, lazy_load = not validate) # Only a few things can be arbitrary bytes according to the dir-spec, so # parsing them separately. @@ -541,9 +541,6 @@ class ServerDescriptor(Descriptor): self.platform = _get_bytes_field('platform', raw_contents) self.contact = _get_bytes_field('contact', raw_contents) - raw_contents = stem.util.str_tools._to_unicode(raw_contents) - - self._lazy_loading = not validate self._annotation_lines = annotations if annotations else [] # A descriptor contains a series of 'keyword lines' which are simply a @@ -554,6 +551,7 @@ class ServerDescriptor(Descriptor): # influences the resulting exit policy, but for everything else the order # does not matter so breaking it into key / value pairs. + raw_contents = stem.util.str_tools._to_unicode(raw_contents) entries, self._unparsed_exit_policy = _get_descriptor_components(raw_contents, validate, ('accept', 'reject')) if validate:

1 0

[stem/master] KeyCertificate lazy loading
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 4f63cbca6f88772c079bb1726c72bd30f6ed8901 Author: Damian Johnson <atagar(a)torproject.org> Date: Sat Jan 17 19:44:03 2015 -0800 KeyCertificate lazy loading Lazy loading support for part of network status documents. --- stem/descriptor/__init__.py | 14 ++ stem/descriptor/extrainfo_descriptor.py | 16 +- stem/descriptor/networkstatus.py | 210 ++++++++------------ stem/descriptor/server_descriptor.py | 13 +- .../descriptor/networkstatus/key_certificate.py | 24 +-- 5 files changed, 116 insertions(+), 161 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index ef96dd7..1e1acb5 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -319,6 +319,20 @@ def _values(line, entries): return [entry[0] for entry in entries[line]] +def _parse_timestamp_line(keyword, attribute): + # "<keyword>" YYYY-MM-DD HH:MM:SS + + def _parse(descriptor, entries): + value = _value(keyword, entries) + + try: + setattr(descriptor, attribute, stem.util.str_tools._parse_timestamp(value)) + except ValueError: + raise ValueError("Timestamp on %s line wasn't parsable: %s %s" % (keyword, keyword, value)) + + return _parse + + def _parse_sha1_digest_line(keyword, attribute): def _parse(descriptor, entries): value = _value(keyword, entries) diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 4137e47..3413711 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -83,6 +83,7 @@ from stem.descriptor import ( _get_descriptor_components, _value, _values, + _parse_timestamp_line, _parse_sha1_digest_line, _parse_key_block, ) @@ -366,17 +367,6 @@ def _parse_cell_line(keyword, attribute, descriptor, entries): raise exc -def _parse_timestamp_line(keyword, attribute, descriptor, entries): - # "<keyword>" YYYY-MM-DD HH:MM:SS - - value = _value(keyword, entries) - - try: - setattr(descriptor, attribute, stem.util.str_tools._parse_timestamp(value)) - except ValueError: - raise ValueError("Timestamp on %s line wasn't parsable: %s %s" % (keyword, keyword, value)) - - def _parse_timestamp_and_interval_line(keyword, end_attribute, interval_attribute, descriptor, entries): # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) @@ -521,8 +511,8 @@ _parse_dirreq_v3_share_line = functools.partial(_parse_dirreq_share_line, 'dirre _parse_cell_processed_cells_line = functools.partial(_parse_cell_line, 'cell-processed-cells', 'cell_processed_cells') _parse_cell_queued_cells_line = functools.partial(_parse_cell_line, 'cell-queued-cells', 'cell_queued_cells') _parse_cell_time_in_queue_line = functools.partial(_parse_cell_line, 'cell-time-in-queue', 'cell_time_in_queue') -_parse_published_line = functools.partial(_parse_timestamp_line, 'published', 'published') -_parse_geoip_start_time_line = functools.partial(_parse_timestamp_line, 'geoip-start-time', 'geoip_start_time') +_parse_published_line = _parse_timestamp_line('published', 'published') +_parse_geoip_start_time_line = _parse_timestamp_line('geoip-start-time', 'geoip_start_time') _parse_cell_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'cell-stats-end', 'cell_stats_end', 'cell_stats_interval') _parse_entry_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'entry-stats-end', 'entry_stats_end', 'entry_stats_interval') _parse_exit_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'exit-stats-end', 'exit_stats_end', 'exit_stats_interval') diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 33beb4a..a70a7cf 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -62,6 +62,9 @@ from stem.descriptor import ( DocumentHandler, _get_descriptor_components, _read_until_keywords, + _value, + _parse_timestamp_line, + _parse_key_block, ) # Version 2 network status document fields, tuples of the form... @@ -1244,6 +1247,58 @@ class DirectoryAuthority(Descriptor): return self._compare(other, lambda s, o: s <= o) +def _parse_dir_key_certificate_version_line(descriptor, entries): + # "dir-key-certificate-version" version + + value = _value('dir-key-certificate-version', entries) + + if not value.isdigit(): + raise ValueError('Key certificate has a non-integer version: dir-key-certificate-version %s' % value) + + descriptor.version = int(value) + + if descriptor.version != 3: + raise ValueError("Expected a version 3 key certificate, got version '%i' instead" % descriptor.version) + + +def _parse_dir_address_line(descriptor, entries): + # "dir-address" IPPort + + value = _value('dir-address', entries) + + if ':' not in value: + raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: dir-address %s" % value) + + address, dirport = value.split(':', 1) + + if not stem.util.connection.is_valid_ipv4_address(address): + raise ValueError("Key certificate's address isn't a valid IPv4 address: dir-address %s" % value) + elif not stem.util.connection.is_valid_port(dirport): + raise ValueError("Key certificate's dirport is invalid: dir-address %s" % value) + + descriptor.address = address + descriptor.dir_port = int(dirport) + + +def _parse_fingerprint_line(descriptor, entries): + # "fingerprint" fingerprint + + value = _value('fingerprint', entries) + + if not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Key certificate's fingerprint is malformed: fingerprint %s" % value) + + descriptor.fingerprint = value + + +_parse_dir_key_published_line = _parse_timestamp_line('dir-key-published', 'published') +_parse_dir_key_expires_line = _parse_timestamp_line('dir-key-expires', 'expires') +_parse_identity_key_line = _parse_key_block('dir-identity-key', 'identity_key', 'RSA PUBLIC KEY') +_parse_signing_key_line = _parse_key_block('dir-signing-key', 'signing_key', 'RSA PUBLIC KEY') +_parse_dir_key_crosscert_line = _parse_key_block('dir-key-crosscert', 'crosscert', 'ID SIGNATURE') +_parse_dir_key_certification_line = _parse_key_block('dir-key-certification', 'certification', 'SIGNATURE') + + class KeyCertificate(Descriptor): """ Directory key certificate for a v3 network status document. @@ -1263,35 +1318,35 @@ class KeyCertificate(Descriptor): **\*** mandatory attribute """ - def __init__(self, raw_content, validate = True): - super(KeyCertificate, self).__init__(raw_content) - raw_content = stem.util.str_tools._to_unicode(raw_content) - - self.version = None - self.address = None - self.dir_port = None - self.fingerprint = None - self.identity_key = None - self.published = None - self.expires = None - self.signing_key = None - self.crosscert = None - self.certification = None - - self._unrecognized_lines = [] - - self._parse(raw_content, validate) + ATTRIBUTES = { + 'version': (None, _parse_dir_key_certificate_version_line), + 'address': (None, _parse_dir_address_line), + 'dir_port': (None, _parse_dir_address_line), + 'fingerprint': (None, _parse_fingerprint_line), + 'identity_key': (None, _parse_identity_key_line), + 'published': (None, _parse_dir_key_published_line), + 'expires': (None, _parse_dir_key_expires_line), + 'signing_key': (None, _parse_signing_key_line), + 'crosscert': (None, _parse_dir_key_crosscert_line), + 'certification': (None, _parse_dir_key_certification_line), + } + + PARSER_FOR_LINE = { + 'dir-key-certificate-version': _parse_dir_key_certificate_version_line, + 'dir-address': _parse_dir_address_line, + 'fingerprint': _parse_fingerprint_line, + 'dir-key-published': _parse_dir_key_published_line, + 'dir-key-expires': _parse_dir_key_expires_line, + 'dir-identity-key': _parse_identity_key_line, + 'dir-signing-key': _parse_signing_key_line, + 'dir-key-crosscert': _parse_dir_key_crosscert_line, + 'dir-key-certification': _parse_dir_key_certification_line, + } - def _parse(self, content, validate): - """ - Parses the given content and applies the attributes. - - :param str content: descriptor content - :param bool validate: checks validity if **True** - - :raises: **ValueError** if a validity check fails - """ + def __init__(self, raw_content, validate = True): + super(KeyCertificate, self).__init__(raw_content, lazy_load = not validate) + content = stem.util.str_tools._to_unicode(raw_content) entries = _get_descriptor_components(content, validate) if validate: @@ -1311,104 +1366,9 @@ class KeyCertificate(Descriptor): if entry_count > 1: raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content)) - for keyword, values in list(entries.items()): - value, block_type, block_contents = values[0] - line = '%s %s' % (keyword, value) - - if keyword == 'dir-key-certificate-version': - # "dir-key-certificate-version" version - - if not value.isdigit(): - if not validate: - continue - - raise ValueError('Key certificate has a non-integer version: %s' % line) - - self.version = int(value) - - if validate and self.version != 3: - raise ValueError("Expected a version 3 key certificate, got version '%i' instead" % self.version) - elif keyword == 'dir-address': - # "dir-address" IPPort - - if ':' not in value: - if not validate: - continue - - raise ValueError("Key certificate's 'dir-address' is expected to be of the form ADDRESS:PORT: %s" % line) - - address, dirport = value.split(':', 1) - - if validate: - if not stem.util.connection.is_valid_ipv4_address(address): - raise ValueError("Key certificate's address isn't a valid IPv4 address: %s" % line) - elif not stem.util.connection.is_valid_port(dirport): - raise ValueError("Key certificate's dirport is invalid: %s" % line) - elif not dirport.isdigit(): - continue - - self.address = address - self.dir_port = int(dirport) - elif keyword == 'fingerprint': - # "fingerprint" fingerprint - - if validate and not stem.util.tor_tools.is_valid_fingerprint(value): - raise ValueError("Key certificate's fingerprint is malformed: %s" % line) - - self.fingerprint = value - elif keyword in ('dir-key-published', 'dir-key-expires'): - # "dir-key-published" YYYY-MM-DD HH:MM:SS - # "dir-key-expires" YYYY-MM-DD HH:MM:SS - - try: - date_value = stem.util.str_tools._parse_timestamp(value) - - if keyword == 'dir-key-published': - self.published = date_value - elif keyword == 'dir-key-expires': - self.expires = date_value - except ValueError: - if validate: - raise ValueError("Key certificate's '%s' time wasn't parsable: %s" % (keyword, value)) - elif keyword == 'dir-identity-key': - # "dir-identity-key" NL a public key in PEM format - - if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'): - raise ValueError("'dir-identity-key' should be followed by a RSA PUBLIC KEY block: %s" % line) - - self.identity_key = block_contents - elif keyword == 'dir-signing-key': - # "dir-signing-key" NL a key in PEM format - - if validate and (not block_contents or block_type != 'RSA PUBLIC KEY'): - raise ValueError("'dir-signing-key' should be followed by a RSA PUBLIC KEY block: %s" % line) - - self.signing_key = block_contents - elif keyword == 'dir-key-crosscert': - # "dir-key-crosscert" NL CrossSignature - - if validate and (not block_contents or block_type != 'ID SIGNATURE'): - raise ValueError("'dir-key-crosscert' should be followed by a ID SIGNATURE block: %s" % line) - - self.crosscert = block_contents - elif keyword == 'dir-key-certification': - # "dir-key-certification" NL Signature - - if validate and (not block_contents or block_type != 'SIGNATURE'): - raise ValueError("'dir-key-certification' should be followed by a SIGNATURE block: %s" % line) - - self.certification = block_contents - else: - self._unrecognized_lines.append(line) - - def get_unrecognized_lines(self): - """ - Returns any unrecognized lines. - - :returns: **list** of unrecognized lines - """ - - return self._unrecognized_lines + self._parse(entries, validate) + else: + self._entries = entries def _compare(self, other, method): if not isinstance(other, KeyCertificate): diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index a696d6a..09dc9c3 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -56,6 +56,7 @@ from stem.descriptor import ( _read_until_keywords, _value, _values, + _parse_timestamp_line, _parse_sha1_digest_line, _parse_key_block, ) @@ -241,17 +242,6 @@ def _parse_platform_line(descriptor, entries): pass -def _parse_published_line(descriptor, entries): - # "published" YYYY-MM-DD HH:MM:SS - - value = _value('published', entries) - - try: - descriptor.published = stem.util.str_tools._parse_timestamp(value) - except ValueError: - raise ValueError("Published line's time wasn't parsable: published %s" % value) - - def _parse_fingerprint_line(descriptor, entries): # This is forty hex digits split into space separated groups of four. # Checking that we match this pattern. @@ -388,6 +378,7 @@ def _parse_exit_policy(descriptor, entries): del descriptor._unparsed_exit_policy +_parse_published_line = _parse_timestamp_line('published', 'published') _parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values') _parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values') _parse_ipv6_policy_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('ipv6-policy', entries))) diff --git a/test/unit/descriptor/networkstatus/key_certificate.py b/test/unit/descriptor/networkstatus/key_certificate.py index 8939c7e..12facaa 100644 --- a/test/unit/descriptor/networkstatus/key_certificate.py +++ b/test/unit/descriptor/networkstatus/key_certificate.py @@ -112,23 +112,23 @@ class TestKeyCertificate(unittest.TestCase): self.assertEqual(80, certificate.dir_port) test_values = ( - ('', None, None), - (' ', None, None), - ('127.0.0.1', None, None), - ('127.0.0.1:', None, None), - ('80', None, None), - (':80', '', 80), - ('127.0.0.1a:80', '127.0.0.1a', 80), - ('127.0.0.1:80a', None, None), + (''), + (' '), + ('127.0.0.1'), + ('127.0.0.1:'), + ('80'), + (':80'), + ('127.0.0.1a:80'), + ('127.0.0.1:80a'), ) - for test_value, expected_address, expected_port in test_values: + for test_value in test_values: content = get_key_certificate({'dir-address': test_value}, content = True) self.assertRaises(ValueError, KeyCertificate, content) certificate = KeyCertificate(content, False) - self.assertEqual(expected_address, certificate.address) - self.assertEqual(expected_port, certificate.dir_port) + self.assertEqual(None, certificate.address) + self.assertEqual(None, certificate.dir_port) def test_fingerprint(self): """ @@ -147,7 +147,7 @@ class TestKeyCertificate(unittest.TestCase): self.assertRaises(ValueError, KeyCertificate, content) certificate = KeyCertificate(content, False) - self.assertEqual(test_value.strip(), certificate.fingerprint) + self.assertEqual(None, certificate.fingerprint) def test_time_fields(self): """

1 0

[stem/master] Moving DocumentHeader parsing to helpers
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit a1575e9ff1aa053fa19fde4d715ecd1f5a3ace0a Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jan 18 13:00:12 2015 -0800 Moving DocumentHeader parsing to helpers The v3 network status document is gonna be a bit trickier since it delegates parsing to sub-objects. Essentially it acts as a collection of sub-documents, then adds those attributes to itself. Starting by moving the header parsing to helpers like the other document types. --- stem/descriptor/networkstatus.py | 327 ++++++++++++--------- test/unit/descriptor/networkstatus/document_v3.py | 30 +- 2 files changed, 199 insertions(+), 158 deletions(-) diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 29d2593..910f684 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -569,7 +569,175 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): return self._compare(other, lambda s, o: s <= o) +def _parse_network_status_version_line(descriptor, entries): + # "network-status-version" version + + value = _value('network-status-version', entries) + + if ' ' in value: + version, flavor = value.split(' ', 1) + else: + version, flavor = value, None + + if not version.isdigit(): + raise ValueError('Network status document has a non-numeric version: network-status-version %s' % value) + + descriptor.version = int(version) + descriptor.version_flavor = flavor + descriptor.is_microdescriptor = flavor == 'microdesc' + + if descriptor.version != 3: + raise ValueError("Expected a version 3 network status document, got version '%s' instead" % descriptor.version) + + +def _parse_vote_status_line(descriptor, entries): + # "vote-status" type + # + # The consensus-method and consensus-methods fields are optional since + # they weren't included in version 1. Setting a default now that we + # know if we're a vote or not. + + value = _value('vote-status', entries) + + if value == 'consensus': + descriptor.is_consensus, descriptor.is_vote = True, False + elif value == 'vote': + descriptor.is_consensus, descriptor.is_vote = False, True + else: + raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value) + + +def _parse_consensus_methods_line(descriptor, entries): + # "consensus-methods" IntegerList + + value, consensus_methods = _value('consensus-methods', entries), [] + + for entry in value.split(' '): + if not entry.isdigit(): + raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value) + + consensus_methods.append(int(entry)) + + descriptor.consensus_methods = consensus_methods + + +def _parse_consensus_method_line(descriptor, entries): + # "consensus-method" Integer + + value = _value('consensus-method', entries) + + if not value.isdigit(): + raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value) + + descriptor.consensus_method = int(value) + + +def _parse_voting_delay_line(descriptor, entries): + # "voting-delay" VoteSeconds DistSeconds + + value = _value('voting-delay', entries) + value_comp = value.split(' ') + + if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit(): + descriptor.vote_delay = int(value_comp[0]) + descriptor.dist_delay = int(value_comp[1]) + else: + raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value) + + +def _parse_versions_line(keyword, attribute): + def _parse(descriptor, entries): + value, entries = _value(keyword, entries), [] + + for entry in value.split(','): + try: + entries.append(stem.version._get_version(entry)) + except ValueError: + raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s %s" % (keyword, entry, keyword, value)) + + setattr(descriptor, attribute, entries) + + return _parse + + +def _parse_flag_thresholds_line(descriptor, entries): + # "flag-thresholds" SP THRESHOLDS + + value, thresholds = _value('flag-thresholds', entries).strip(), {} + + if value: + for entry in value.split(' '): + if '=' not in entry: + raise ValueError("Network status document's 'flag-thresholds' line is expected to be space separated key=value mappings, got: flag-thresholds %s" % value) + + entry_key, entry_value = entry.split('=', 1) + + try: + if entry_value.endswith('%'): + # opting for string manipulation rather than just + # 'float(entry_value) / 100' because floating point arithmetic + # will lose precision + + thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1)) + elif '.' in entry_value: + thresholds[entry_key] = float(entry_value) + else: + thresholds[entry_key] = int(entry_value) + except ValueError: + raise ValueError("Network status document's 'flag-thresholds' line is expected to have float values, got: flag-thresholds %s" % value) + + descriptor.flag_thresholds = thresholds + + +def _parse_parameters_line(descriptor, entries): + # "params" [Parameters] + # Parameter ::= Keyword '=' Int32 + # Int32 ::= A decimal integer between -2147483648 and 2147483647. + # Parameters ::= Parameter | Parameters SP Parameter + + value = _value('params', entries) + + # should only appear in consensus-method 7 or later + + if not descriptor.meets_consensus_method(7): + raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later") + + # skip if this is a blank line + + params = dict(DEFAULT_PARAMS) if descriptor._default_params else {} + + if value != '': + params.update(_parse_int_mappings('params', value, True)) + descriptor.params = params + descriptor._check_params_constraints() + + +_parse_valid_after_line = _parse_timestamp_line('valid-after', 'valid_after') +_parse_fresh_until_line = _parse_timestamp_line('fresh-until', 'fresh_until') +_parse_valid_until_line = _parse_timestamp_line('valid-until', 'valid_until') +_parse_client_versions_line = _parse_versions_line('client-versions', 'client_versions') +_parse_server_versions_line = _parse_versions_line('server-versions', 'server_versions') +_parse_known_flags_line = lambda descriptor, entries: setattr(descriptor, 'known_flags', [entry for entry in _value('known-flags', entries).split(' ') if entry]) + + class _DocumentHeader(object): + PARSER_FOR_LINE = { + 'network-status-version': _parse_network_status_version_line, + 'vote-status': _parse_vote_status_line, + 'consensus-methods': _parse_consensus_methods_line, + 'consensus-method': _parse_consensus_method_line, + 'published': _parse_published_line, + 'valid-after': _parse_valid_after_line, + 'fresh-until': _parse_fresh_until_line, + 'valid-until': _parse_valid_until_line, + 'voting-delay': _parse_voting_delay_line, + 'client-versions': _parse_client_versions_line, + 'server-versions': _parse_server_versions_line, + 'known-flags': _parse_known_flags_line, + 'flag-thresholds': _parse_flag_thresholds_line, + 'params': _parse_parameters_line, + } + def __init__(self, document_file, validate, default_params): self.version = None self.version_flavor = None @@ -590,6 +758,8 @@ class _DocumentHeader(object): self.flag_thresholds = {} self.params = dict(DEFAULT_PARAMS) if default_params else {} + self._default_params = default_params + self._unrecognized_lines = [] content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) @@ -621,152 +791,21 @@ class _DocumentHeader(object): if validate and len(values) > 1 and keyword in HEADER_FIELDS: raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) - if keyword == 'network-status-version': - # "network-status-version" version - - if ' ' in value: - version, flavor = value.split(' ', 1) + try: + if keyword in self.PARSER_FOR_LINE: + self.PARSER_FOR_LINE[keyword](self, entries) else: - version, flavor = value, None - - if not version.isdigit(): - if not validate: - continue - - raise ValueError('Network status document has a non-numeric version: %s' % line) - - self.version = int(version) - self.version_flavor = flavor - self.is_microdescriptor = flavor == 'microdesc' - - if validate and self.version != 3: - raise ValueError("Expected a version 3 network status document, got version '%s' instead" % self.version) - elif keyword == 'vote-status': - # "vote-status" type - # - # The consensus-method and consensus-methods fields are optional since - # they weren't included in version 1. Setting a default now that we - # know if we're a vote or not. - - if value == 'consensus': - self.is_consensus, self.is_vote = True, False - self.consensus_method = 1 - elif value == 'vote': - self.is_consensus, self.is_vote = False, True - self.consensus_methods = [1] - elif validate: - raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value) - elif keyword == 'consensus-methods': - # "consensus-methods" IntegerList - - consensus_methods = [] - for entry in value.split(' '): - if entry.isdigit(): - consensus_methods.append(int(entry)) - elif validate: - raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value) - - self.consensus_methods = consensus_methods - elif keyword == 'consensus-method': - # "consensus-method" Integer - - if value.isdigit(): - self.consensus_method = int(value) - elif validate: - raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value) - elif keyword in ('published', 'valid-after', 'fresh-until', 'valid-until'): - try: - date_value = stem.util.str_tools._parse_timestamp(value) - - if keyword == 'published': - self.published = date_value - elif keyword == 'valid-after': - self.valid_after = date_value - elif keyword == 'fresh-until': - self.fresh_until = date_value - elif keyword == 'valid-until': - self.valid_until = date_value - except ValueError: - if validate: - raise ValueError("Network status document's '%s' time wasn't parsable: %s" % (keyword, value)) - elif keyword == 'voting-delay': - # "voting-delay" VoteSeconds DistSeconds - - value_comp = value.split(' ') - - if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit(): - self.vote_delay = int(value_comp[0]) - self.dist_delay = int(value_comp[1]) - elif validate: - raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value) - elif keyword in ('client-versions', 'server-versions'): - for entry in value.split(','): - try: - version_value = stem.version._get_version(entry) - - if keyword == 'client-versions': - self.client_versions.append(version_value) - elif keyword == 'server-versions': - self.server_versions.append(version_value) - except ValueError: - if validate: - raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s" % (keyword, entry, line)) - elif keyword == 'known-flags': - # "known-flags" FlagList - - # simply fetches the entries, excluding empty strings - self.known_flags = [entry for entry in value.split(' ') if entry] - elif keyword == 'flag-thresholds': - # "flag-thresholds" SP THRESHOLDS - - value = value.strip() - - if value: - for entry in value.split(' '): - if '=' not in entry: - if not validate: - continue - - raise ValueError("Network status document's '%s' line is expected to be space separated key=value mappings, got: %s" % (keyword, line)) - - entry_key, entry_value = entry.split('=', 1) - - try: - if entry_value.endswith('%'): - # opting for string manipulation rather than just - # 'float(entry_value) / 100' because floating point arithmetic - # will lose precision - - self.flag_thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1)) - elif '.' in entry_value: - self.flag_thresholds[entry_key] = float(entry_value) - else: - self.flag_thresholds[entry_key] = int(entry_value) - except ValueError: - if validate: - raise ValueError("Network status document's '%s' line is expected to have float values, got: %s" % (keyword, line)) - elif keyword == 'params': - # "params" [Parameters] - # Parameter ::= Keyword '=' Int32 - # Int32 ::= A decimal integer between -2147483648 and 2147483647. - # Parameters ::= Parameter | Parameters SP Parameter - - # should only appear in consensus-method 7 or later - - if validate and not self.meets_consensus_method(7): - raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later") - - # skip if this is a blank line - - if value == '': - continue - - self.params.update(_parse_int_mappings(keyword, value, validate)) - + self._unrecognized_lines.append(line) + except ValueError as exc: if validate: - self._check_params_constraints() - else: - self._unrecognized_lines.append(line) + raise exc + + # default consensus_method and consensus_methods based on if we're a consensus or vote + + if self.is_consensus and not self.consensus_method: + self.consensus_method = 1 + elif self.is_vote and not self.consensus_methods: + self.consensus_methods = [1] def _check_params_constraints(self): """ diff --git a/test/unit/descriptor/networkstatus/document_v3.py b/test/unit/descriptor/networkstatus/document_v3.py index 4b8efc4..e31600f 100644 --- a/test/unit/descriptor/networkstatus/document_v3.py +++ b/test/unit/descriptor/networkstatus/document_v3.py @@ -586,19 +586,21 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= self.assertEqual(None, document.consensus_method) test_values = ( - ('', []), - (' ', []), - ('1 2 3 a 5', [1, 2, 3, 5]), - ('1 2 3 4.0 5', [1, 2, 3, 5]), - ('2 3 4', [2, 3, 4]), # spec says version one must be included + (''), + (' '), + ('1 2 3 a 5'), + ('1 2 3 4.0 5'), + ('2 3 4'), # spec says version one must be included ) - for test_value, expected_consensus_methods in test_values: + for test_value in test_values: content = get_network_status_document_v3({'vote-status': 'vote', 'consensus-methods': test_value}, content = True) self.assertRaises(ValueError, NetworkStatusDocumentV3, content) + expected_value = [2, 3, 4] if test_value == '2 3 4' else [1] + document = NetworkStatusDocumentV3(content, False) - self.assertEqual(expected_consensus_methods, document.consensus_methods) + self.assertEqual(expected_value, document.consensus_methods) def test_consensus_method(self): """ @@ -708,21 +710,21 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= self.assertEqual(expected, document.server_versions) test_values = ( - ('', []), - (' ', []), - ('1.2.3.4,', [stem.version.Version('1.2.3.4')]), - ('1.2.3.4,1.2.3.a', [stem.version.Version('1.2.3.4')]), + (''), + (' '), + ('1.2.3.4,'), + ('1.2.3.4,1.2.3.a'), ) for field in ('client-versions', 'server-versions'): attr = field.replace('-', '_') - for test_value, expected_value in test_values: + for test_value in test_values: content = get_network_status_document_v3({field: test_value}, content = True) self.assertRaises(ValueError, NetworkStatusDocumentV3, content) document = NetworkStatusDocumentV3(content, False) - self.assertEqual(expected_value, getattr(document, attr)) + self.assertEqual([], getattr(document, attr)) def test_known_flags(self): """ @@ -872,7 +874,7 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= self.assertRaises(ValueError, NetworkStatusDocumentV3, content) document = NetworkStatusDocumentV3(content, False, default_params = False) - self.assertEqual({'unrecognized': -122, 'bwauthpid': 1}, document.params) + self.assertEqual({}, document.params) def test_footer_consensus_method_requirement(self): """

1 0

[stem/master] Moving DocumentFooter parsing to helpers
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 65595d3e0d3bb771b40ef3ea99281b6110036c47 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jan 18 13:34:25 2015 -0800 Moving DocumentFooter parsing to helpers --- stem/descriptor/networkstatus.py | 72 ++++++++++++++------- test/unit/descriptor/networkstatus/document_v3.py | 6 +- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 910f684..a712eb2 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -871,7 +871,46 @@ class _DocumentHeader(object): raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value)) +def _parse_directory_footer_line(descriptor, entries): + # nothing to parse, simply checking that we don't have a value + + value = _value('directory-footer', entries) + + if value: + raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got 'directory-footer %s'" % value) + + +def _parse_directory_signature_line(descriptor, entries): + signatures = [] + + for sig_value, block_type, block_contents in entries['directory-signature']: + if sig_value.count(' ') not in (1, 2): + raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value) + + if not block_contents or block_type != 'SIGNATURE': + raise ValueError("'directory-signature' should be followed by a SIGNATURE block, but was a %s" % block_type) + + if sig_value.count(' ') == 1: + method = 'sha1' # default if none was provided + fingerprint, key_digest = sig_value.split(' ', 1) + else: + method, fingerprint, key_digest = sig_value.split(' ', 2) + + signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, True)) + + descriptor.signatures = signatures + + +_parse_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True)) + + class _DocumentFooter(object): + PARSER_FOR_LINE = { + 'directory-footer': _parse_directory_footer_line, + 'bandwidth-weights': _parse_bandwidth_weights_line, + 'directory-signature': _parse_directory_signature_line, + } + def __init__(self, document_file, validate, header): self.signatures = [] self.bandwidth_weights = {} @@ -912,31 +951,14 @@ class _DocumentFooter(object): if not (keyword == 'directory-signature' and header.is_consensus): raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) - if keyword == 'directory-footer': - # nothing to parse, simply checking that we don't have a value - - if validate and value: - raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got '%s'" % line) - elif keyword == 'bandwidth-weights': - self.bandwidth_weights = _parse_int_mappings(keyword, value, validate) - elif keyword == 'directory-signature': - for sig_value, block_type, block_contents in values: - if sig_value.count(' ') not in (1, 2): - if not validate: - continue - - raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value) - - if validate and (not block_contents or block_type != 'SIGNATURE'): - raise ValueError("'directory-signature' should be followed by a SIGNATURE block: %s" % line) - - if sig_value.count(' ') == 1: - method = 'sha1' # default if none was provided - fingerprint, key_digest = sig_value.split(' ', 1) - else: - method, fingerprint, key_digest = sig_value.split(' ', 2) - - self.signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, validate)) + try: + if keyword in self.PARSER_FOR_LINE: + self.PARSER_FOR_LINE[keyword](self, entries) + else: + self._unrecognized_lines.append(line) + except ValueError as exc: + if validate: + raise exc def _check_for_missing_and_disallowed_fields(header, entries, fields): diff --git a/test/unit/descriptor/networkstatus/document_v3.py b/test/unit/descriptor/networkstatus/document_v3.py index e31600f..b0bb14a 100644 --- a/test/unit/descriptor/networkstatus/document_v3.py +++ b/test/unit/descriptor/networkstatus/document_v3.py @@ -953,7 +953,6 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= ) base_weight_entry = ' '.join(['%s=5' % e for e in BANDWIDTH_WEIGHT_ENTRIES]) - expected = dict([(e, 5) for e in BANDWIDTH_WEIGHT_ENTRIES if e != 'Wbe']) for test_value in test_values: weight_entry = base_weight_entry.replace('Wbe=5', test_value) @@ -961,7 +960,7 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= self.assertRaises(ValueError, NetworkStatusDocumentV3, content) document = NetworkStatusDocumentV3(content, False) - self.assertEqual(expected, document.bandwidth_weights) + self.assertEqual({}, document.bandwidth_weights) def test_bandwidth_wights_misordered(self): """ @@ -969,13 +968,12 @@ DnN5aFtYKiTc19qIC7Nmo+afPdDEf0MlJvEOP5EWl3w= """ weight_entry = ' '.join(['%s=5' % e for e in reversed(BANDWIDTH_WEIGHT_ENTRIES)]) - expected = dict([(e, 5) for e in BANDWIDTH_WEIGHT_ENTRIES]) content = get_network_status_document_v3({'bandwidth-weights': weight_entry}, content = True) self.assertRaises(ValueError, NetworkStatusDocumentV3, content) document = NetworkStatusDocumentV3(content, False) - self.assertEqual(expected, document.bandwidth_weights) + self.assertEqual({}, document.bandwidth_weights) def test_bandwidth_wights_in_vote(self): """

1 0

[stem/master] Merging DocumentHeader into NetworkStatusDocumentV3
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 9ebc08da696a8607908761d2b54a622b24eb4b3f Author: Damian Johnson <atagar(a)torproject.org> Date: Thu Jan 22 09:32:59 2015 -0800 Merging DocumentHeader into NetworkStatusDocumentV3 --- stem/descriptor/__init__.py | 3 +- stem/descriptor/networkstatus.py | 505 ++++++++++++++++++-------------------- 2 files changed, 240 insertions(+), 268 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 13f0e56..1a7a097 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -451,7 +451,8 @@ class Descriptor(object): # set defaults for attr in self.ATTRIBUTES: - setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0])) + if not hasattr(self, attr): + setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0])) for keyword, values in list(entries.items()): try: diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index ae1ab48..2289cdf 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -432,6 +432,153 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): raise ValueError("Network status document (v2) are expected to start with a 'network-status-version' line:\n%s" % str(self)) +def _parse_header_network_status_version_line(descriptor, entries): + # "network-status-version" version + + value = _value('network-status-version', entries) + + if ' ' in value: + version, flavor = value.split(' ', 1) + else: + version, flavor = value, None + + if not version.isdigit(): + raise ValueError('Network status document has a non-numeric version: network-status-version %s' % value) + + descriptor.version = int(version) + descriptor.version_flavor = flavor + descriptor.is_microdescriptor = flavor == 'microdesc' + + if descriptor.version != 3: + raise ValueError("Expected a version 3 network status document, got version '%s' instead" % descriptor.version) + + +def _parse_header_vote_status_line(descriptor, entries): + # "vote-status" type + # + # The consensus-method and consensus-methods fields are optional since + # they weren't included in version 1. Setting a default now that we + # know if we're a vote or not. + + value = _value('vote-status', entries) + + if value == 'consensus': + descriptor.is_consensus, descriptor.is_vote = True, False + elif value == 'vote': + descriptor.is_consensus, descriptor.is_vote = False, True + else: + raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value) + + +def _parse_header_consensus_methods_line(descriptor, entries): + # "consensus-methods" IntegerList + + if descriptor._lazy_loading and descriptor.is_vote: + descriptor.consensus_methods = [1] + + value, consensus_methods = _value('consensus-methods', entries), [] + + for entry in value.split(' '): + if not entry.isdigit(): + raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value) + + consensus_methods.append(int(entry)) + + descriptor.consensus_methods = consensus_methods + + +def _parse_header_consensus_method_line(descriptor, entries): + # "consensus-method" Integer + + if descriptor._lazy_loading and descriptor.is_consensus: + descriptor.consensus_method = 1 + + value = _value('consensus-method', entries) + + if not value.isdigit(): + raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value) + + descriptor.consensus_method = int(value) + + +def _parse_header_voting_delay_line(descriptor, entries): + # "voting-delay" VoteSeconds DistSeconds + + value = _value('voting-delay', entries) + value_comp = value.split(' ') + + if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit(): + descriptor.vote_delay = int(value_comp[0]) + descriptor.dist_delay = int(value_comp[1]) + else: + raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value) + + +def _parse_versions_line(keyword, attribute): + def _parse(descriptor, entries): + value, entries = _value(keyword, entries), [] + + for entry in value.split(','): + try: + entries.append(stem.version._get_version(entry)) + except ValueError: + raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s %s" % (keyword, entry, keyword, value)) + + setattr(descriptor, attribute, entries) + + return _parse + + +def _parse_header_flag_thresholds_line(descriptor, entries): + # "flag-thresholds" SP THRESHOLDS + + value, thresholds = _value('flag-thresholds', entries).strip(), {} + + if value: + for entry in value.split(' '): + if '=' not in entry: + raise ValueError("Network status document's 'flag-thresholds' line is expected to be space separated key=value mappings, got: flag-thresholds %s" % value) + + entry_key, entry_value = entry.split('=', 1) + + try: + if entry_value.endswith('%'): + # opting for string manipulation rather than just + # 'float(entry_value) / 100' because floating point arithmetic + # will lose precision + + thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1)) + elif '.' in entry_value: + thresholds[entry_key] = float(entry_value) + else: + thresholds[entry_key] = int(entry_value) + except ValueError: + raise ValueError("Network status document's 'flag-thresholds' line is expected to have float values, got: flag-thresholds %s" % value) + + descriptor.flag_thresholds = thresholds + + +def _parse_header_parameters_line(descriptor, entries): + # "params" [Parameters] + # Parameter ::= Keyword '=' Int32 + # Int32 ::= A decimal integer between -2147483648 and 2147483647. + # Parameters ::= Parameter | Parameters SP Parameter + + if descriptor._lazy_loading and descriptor._default_params: + descriptor.params = dict(DEFAULT_PARAMS) + + value = _value('params', entries) + + # should only appear in consensus-method 7 or later + + if not descriptor.meets_consensus_method(7): + raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later") + + if value != '': + descriptor.params = _parse_int_mappings('params', value, True) + descriptor._check_params_constraints() + + def _parse_directory_footer_line(descriptor, entries): # nothing to parse, simply checking that we don't have a value @@ -462,7 +609,13 @@ def _parse_footer_directory_signature_line(descriptor, entries): descriptor.signatures = signatures -_parse_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True)) +_parse_header_valid_after_line = _parse_timestamp_line('valid-after', 'valid_after') +_parse_header_fresh_until_line = _parse_timestamp_line('fresh-until', 'fresh_until') +_parse_header_valid_until_line = _parse_timestamp_line('valid-until', 'valid_until') +_parse_header_client_versions_line = _parse_versions_line('client-versions', 'client_versions') +_parse_header_server_versions_line = _parse_versions_line('server-versions', 'server_versions') +_parse_header_known_flags_line = lambda descriptor, entries: setattr(descriptor, 'known_flags', [entry for entry in _value('known-flags', entries).split(' ') if entry]) +_parse_footer_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True)) class NetworkStatusDocumentV3(NetworkStatusDocument): @@ -510,13 +663,49 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): """ ATTRIBUTES = { + 'version': (None, _parse_header_network_status_version_line), + 'version_flavor': (None, _parse_header_network_status_version_line), + 'is_consensus': (True, _parse_header_vote_status_line), + 'is_vote': (False, _parse_header_vote_status_line), + 'is_microdescriptor': (False, _parse_header_network_status_version_line), + 'consensus_methods': ([], _parse_header_consensus_methods_line), + 'published': (None, _parse_published_line), + 'consensus_method': (None, _parse_header_consensus_method_line), + 'valid_after': (None, _parse_header_valid_after_line), + 'fresh_until': (None, _parse_header_fresh_until_line), + 'valid_until': (None, _parse_header_valid_until_line), + 'vote_delay': (None, _parse_header_voting_delay_line), + 'dist_delay': (None, _parse_header_voting_delay_line), + 'client_versions': ([], _parse_header_client_versions_line), + 'server_versions': ([], _parse_header_server_versions_line), + 'known_flags': ([], _parse_header_known_flags_line), + 'flag_thresholds': ({}, _parse_header_flag_thresholds_line), + 'params': ({}, _parse_header_parameters_line), + 'signatures': ([], _parse_footer_directory_signature_line), - 'bandwidth_weights': ({}, _parse_bandwidth_weights_line), + 'bandwidth_weights': ({}, _parse_footer_bandwidth_weights_line), + } + + HEADER_PARSER_FOR_LINE = { + 'network-status-version': _parse_header_network_status_version_line, + 'vote-status': _parse_header_vote_status_line, + 'consensus-methods': _parse_header_consensus_methods_line, + 'consensus-method': _parse_header_consensus_method_line, + 'published': _parse_published_line, + 'valid-after': _parse_header_valid_after_line, + 'fresh-until': _parse_header_fresh_until_line, + 'valid-until': _parse_header_valid_until_line, + 'voting-delay': _parse_header_voting_delay_line, + 'client-versions': _parse_header_client_versions_line, + 'server-versions': _parse_header_server_versions_line, + 'known-flags': _parse_header_known_flags_line, + 'flag-thresholds': _parse_header_flag_thresholds_line, + 'params': _parse_header_parameters_line, } FOOTER_PARSER_FOR_LINE = { 'directory-footer': _parse_directory_footer_line, - 'bandwidth-weights': _parse_bandwidth_weights_line, + 'bandwidth-weights': _parse_footer_bandwidth_weights_line, 'directory-signature': _parse_footer_directory_signature_line, } @@ -535,14 +724,8 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): super(NetworkStatusDocumentV3, self).__init__(raw_content, lazy_load = not validate) document_file = io.BytesIO(raw_content) - header = _DocumentHeader(document_file, validate, default_params) - - # merge header attributes into us - for attr, value in vars(header).items(): - if attr != '_unrecognized_lines': - setattr(self, attr, value) - else: - self._unrecognized_lines += value + self._default_params = default_params + self._header(document_file, validate) self.directory_authorities = tuple(stem.descriptor.router_status_entry._parse_file( document_file, @@ -576,6 +759,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): def get_unrecognized_lines(self): if self._lazy_loading: + self._parse(self._header_entries, False, parser_for_line = self.HEADER_PARSER_FOR_LINE) self._parse(self._footer_entries, False, parser_for_line = self.FOOTER_PARSER_FOR_LINE) self._lazy_loading = False @@ -605,13 +789,39 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): return method(str(self).strip(), str(other).strip()) - def _footer(self, document_file, validate): - content = stem.util.str_tools._to_unicode(document_file.read()) + def _header(self, document_file, validate): + content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) + content = stem.util.str_tools._to_unicode(content) + entries = _get_descriptor_components(content, validate) + + if validate: + # all known header fields can only appear once except + + for keyword, values in list(entries.items()): + if len(values) > 1 and keyword in HEADER_FIELDS: + raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) + + if self._default_params: + self.params = dict(DEFAULT_PARAMS) + + self._parse(entries, validate, parser_for_line = self.HEADER_PARSER_FOR_LINE) - if content: - entries = _get_descriptor_components(content, validate) + _check_for_missing_and_disallowed_fields(self, entries, HEADER_STATUS_DOCUMENT_FIELDS) + _check_for_misordered_fields(entries, HEADER_FIELDS) + + # default consensus_method and consensus_methods based on if we're a consensus or vote + + if self.is_consensus and not self.consensus_method: + self.consensus_method = 1 + elif self.is_vote and not self.consensus_methods: + self.consensus_methods = [1] else: - entries = {} + self._header_entries = entries + self._entries.update(entries) + + def _footer(self, document_file, validate): + content = stem.util.str_tools._to_unicode(document_file.read()) + entries = _get_descriptor_components(content, validate) if content else {} if validate: for keyword, values in list(entries.items()): @@ -642,257 +852,6 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): self._footer_entries = entries self._entries.update(entries) - def __hash__(self): - return hash(str(self).strip()) - - def __eq__(self, other): - return self._compare(other, lambda s, o: s == o) - - def __lt__(self, other): - return self._compare(other, lambda s, o: s < o) - - def __le__(self, other): - return self._compare(other, lambda s, o: s <= o) - - -def _parse_network_status_version_line(descriptor, entries): - # "network-status-version" version - - value = _value('network-status-version', entries) - - if ' ' in value: - version, flavor = value.split(' ', 1) - else: - version, flavor = value, None - - if not version.isdigit(): - raise ValueError('Network status document has a non-numeric version: network-status-version %s' % value) - - descriptor.version = int(version) - descriptor.version_flavor = flavor - descriptor.is_microdescriptor = flavor == 'microdesc' - - if descriptor.version != 3: - raise ValueError("Expected a version 3 network status document, got version '%s' instead" % descriptor.version) - - -def _parse_vote_status_line(descriptor, entries): - # "vote-status" type - # - # The consensus-method and consensus-methods fields are optional since - # they weren't included in version 1. Setting a default now that we - # know if we're a vote or not. - - value = _value('vote-status', entries) - - if value == 'consensus': - descriptor.is_consensus, descriptor.is_vote = True, False - elif value == 'vote': - descriptor.is_consensus, descriptor.is_vote = False, True - else: - raise ValueError("A network status document's vote-status line can only be 'consensus' or 'vote', got '%s' instead" % value) - - -def _parse_consensus_methods_line(descriptor, entries): - # "consensus-methods" IntegerList - - value, consensus_methods = _value('consensus-methods', entries), [] - - for entry in value.split(' '): - if not entry.isdigit(): - raise ValueError("A network status document's consensus-methods must be a list of integer values, but was '%s'" % value) - - consensus_methods.append(int(entry)) - - descriptor.consensus_methods = consensus_methods - - -def _parse_consensus_method_line(descriptor, entries): - # "consensus-method" Integer - - value = _value('consensus-method', entries) - - if not value.isdigit(): - raise ValueError("A network status document's consensus-method must be an integer, but was '%s'" % value) - - descriptor.consensus_method = int(value) - - -def _parse_voting_delay_line(descriptor, entries): - # "voting-delay" VoteSeconds DistSeconds - - value = _value('voting-delay', entries) - value_comp = value.split(' ') - - if len(value_comp) == 2 and value_comp[0].isdigit() and value_comp[1].isdigit(): - descriptor.vote_delay = int(value_comp[0]) - descriptor.dist_delay = int(value_comp[1]) - else: - raise ValueError("A network status document's 'voting-delay' line must be a pair of integer values, but was '%s'" % value) - - -def _parse_versions_line(keyword, attribute): - def _parse(descriptor, entries): - value, entries = _value(keyword, entries), [] - - for entry in value.split(','): - try: - entries.append(stem.version._get_version(entry)) - except ValueError: - raise ValueError("Network status document's '%s' line had '%s', which isn't a parsable tor version: %s %s" % (keyword, entry, keyword, value)) - - setattr(descriptor, attribute, entries) - - return _parse - - -def _parse_flag_thresholds_line(descriptor, entries): - # "flag-thresholds" SP THRESHOLDS - - value, thresholds = _value('flag-thresholds', entries).strip(), {} - - if value: - for entry in value.split(' '): - if '=' not in entry: - raise ValueError("Network status document's 'flag-thresholds' line is expected to be space separated key=value mappings, got: flag-thresholds %s" % value) - - entry_key, entry_value = entry.split('=', 1) - - try: - if entry_value.endswith('%'): - # opting for string manipulation rather than just - # 'float(entry_value) / 100' because floating point arithmetic - # will lose precision - - thresholds[entry_key] = float('0.' + entry_value[:-1].replace('.', '', 1)) - elif '.' in entry_value: - thresholds[entry_key] = float(entry_value) - else: - thresholds[entry_key] = int(entry_value) - except ValueError: - raise ValueError("Network status document's 'flag-thresholds' line is expected to have float values, got: flag-thresholds %s" % value) - - descriptor.flag_thresholds = thresholds - - -def _parse_parameters_line(descriptor, entries): - # "params" [Parameters] - # Parameter ::= Keyword '=' Int32 - # Int32 ::= A decimal integer between -2147483648 and 2147483647. - # Parameters ::= Parameter | Parameters SP Parameter - - value = _value('params', entries) - - # should only appear in consensus-method 7 or later - - if not descriptor.meets_consensus_method(7): - raise ValueError("A network status document's 'params' line should only appear in consensus-method 7 or later") - - # skip if this is a blank line - - params = dict(DEFAULT_PARAMS) if descriptor._default_params else {} - - if value != '': - params.update(_parse_int_mappings('params', value, True)) - descriptor.params = params - descriptor._check_params_constraints() - - -_parse_valid_after_line = _parse_timestamp_line('valid-after', 'valid_after') -_parse_fresh_until_line = _parse_timestamp_line('fresh-until', 'fresh_until') -_parse_valid_until_line = _parse_timestamp_line('valid-until', 'valid_until') -_parse_client_versions_line = _parse_versions_line('client-versions', 'client_versions') -_parse_server_versions_line = _parse_versions_line('server-versions', 'server_versions') -_parse_known_flags_line = lambda descriptor, entries: setattr(descriptor, 'known_flags', [entry for entry in _value('known-flags', entries).split(' ') if entry]) - - -class _DocumentHeader(object): - PARSER_FOR_LINE = { - 'network-status-version': _parse_network_status_version_line, - 'vote-status': _parse_vote_status_line, - 'consensus-methods': _parse_consensus_methods_line, - 'consensus-method': _parse_consensus_method_line, - 'published': _parse_published_line, - 'valid-after': _parse_valid_after_line, - 'fresh-until': _parse_fresh_until_line, - 'valid-until': _parse_valid_until_line, - 'voting-delay': _parse_voting_delay_line, - 'client-versions': _parse_client_versions_line, - 'server-versions': _parse_server_versions_line, - 'known-flags': _parse_known_flags_line, - 'flag-thresholds': _parse_flag_thresholds_line, - 'params': _parse_parameters_line, - } - - def __init__(self, document_file, validate, default_params): - self.version = None - self.version_flavor = None - self.is_consensus = True - self.is_vote = False - self.is_microdescriptor = False - self.consensus_methods = [] - self.published = None - self.consensus_method = None - self.valid_after = None - self.fresh_until = None - self.valid_until = None - self.vote_delay = None - self.dist_delay = None - self.client_versions = [] - self.server_versions = [] - self.known_flags = [] - self.flag_thresholds = {} - self.params = dict(DEFAULT_PARAMS) if default_params else {} - - self._default_params = default_params - - self._unrecognized_lines = [] - - content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) - content = stem.util.str_tools._to_unicode(content) - entries = _get_descriptor_components(content, validate) - self._parse(entries, validate) - - # doing this validation afterward so we know our 'is_consensus' and - # 'is_vote' attributes - - if validate: - _check_for_missing_and_disallowed_fields(self, entries, HEADER_STATUS_DOCUMENT_FIELDS) - _check_for_misordered_fields(entries, HEADER_FIELDS) - - def meets_consensus_method(self, method): - if self.consensus_method is not None: - return self.consensus_method >= method - elif self.consensus_methods is not None: - return bool([x for x in self.consensus_methods if x >= method]) - else: - return False # malformed document - - def _parse(self, entries, validate): - for keyword, values in list(entries.items()): - value, _, _ = values[0] - line = '%s %s' % (keyword, value) - - # all known header fields can only appear once except - if validate and len(values) > 1 and keyword in HEADER_FIELDS: - raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) - - try: - if keyword in self.PARSER_FOR_LINE: - self.PARSER_FOR_LINE[keyword](self, entries) - else: - self._unrecognized_lines.append(line) - except ValueError as exc: - if validate: - raise exc - - # default consensus_method and consensus_methods based on if we're a consensus or vote - - if self.is_consensus and not self.consensus_method: - self.consensus_method = 1 - elif self.is_vote and not self.consensus_methods: - self.consensus_methods = [1] - def _check_params_constraints(self): """ Checks that the params we know about are within their documented ranges. @@ -956,6 +915,18 @@ class _DocumentHeader(object): if value < minimum or value > maximum: raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value)) + def __hash__(self): + return hash(str(self).strip()) + + def __eq__(self, other): + return self._compare(other, lambda s, o: s == o) + + def __lt__(self, other): + return self._compare(other, lambda s, o: s < o) + + def __le__(self, other): + return self._compare(other, lambda s, o: s <= o) + def _check_for_missing_and_disallowed_fields(document, entries, fields): """

1 0

[stem/master] Limit header and footer's scope
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit 13d71d0bfc93ef771419ddeec65a24ea99680867 Author: Damian Johnson <atagar(a)torproject.org> Date: Mon Jan 19 08:32:05 2015 -0800 Limit header and footer's scope Limiting their scope to locals so we can merge them into the document itself. --- stem/descriptor/networkstatus.py | 41 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index a712eb2..5530049 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -491,10 +491,10 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): super(NetworkStatusDocumentV3, self).__init__(raw_content) document_file = io.BytesIO(raw_content) - self._header = _DocumentHeader(document_file, validate, default_params) + header = _DocumentHeader(document_file, validate, default_params) # merge header attributes into us - for attr, value in vars(self._header).items(): + for attr, value in vars(header).items(): if attr != '_unrecognized_lines': setattr(self, attr, value) else: @@ -506,13 +506,13 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): entry_class = DirectoryAuthority, entry_keyword = AUTH_START, section_end_keywords = (ROUTERS_START, FOOTER_START, V2_FOOTER_START), - extra_args = (self._header.is_vote,), + extra_args = (self.is_vote,), )) - if validate and self._header.is_vote and len(self.directory_authorities) != 1: + if validate and self.is_vote and len(self.directory_authorities) != 1: raise ValueError('Votes should only have an authority entry for the one that issued it, got %i: %s' % (len(self.directory_authorities), self.directory_authorities)) - if not self._header.is_microdescriptor: + if not self.is_microdescriptor: router_type = stem.descriptor.router_status_entry.RouterStatusEntryV3 else: router_type = stem.descriptor.router_status_entry.RouterStatusEntryMicroV3 @@ -528,10 +528,10 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): self.routers = dict((desc.fingerprint, desc) for desc in router_iter) - self._footer = _DocumentFooter(document_file, validate, self._header) + footer = _DocumentFooter(document_file, validate, self) # merge header attributes into us - for attr, value in vars(self._footer).items(): + for attr, value in vars(footer).items(): if attr != '_unrecognized_lines': setattr(self, attr, value) else: @@ -548,7 +548,12 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): :returns: **True** if we meet the given consensus-method, and **False** otherwise """ - return self._header.meets_consensus_method(method) + if self.consensus_method is not None: + return self.consensus_method >= method + elif self.consensus_methods is not None: + return bool([x for x in self.consensus_methods if x >= method]) + else: + return False # malformed document def _compare(self, other, method): if not isinstance(other, NetworkStatusDocumentV3): @@ -911,7 +916,7 @@ class _DocumentFooter(object): 'directory-signature': _parse_directory_signature_line, } - def __init__(self, document_file, validate, header): + def __init__(self, document_file, validate, document): self.signatures = [] self.bandwidth_weights = {} self._unrecognized_lines = [] @@ -922,24 +927,24 @@ class _DocumentFooter(object): return # footer is optional and there's nothing to parse entries = _get_descriptor_components(content, validate) - self._parse(entries, validate, header) + self._parse(entries, validate, document) if validate: # Check that the footer has the right initial line. Prior to consensus # method 9 it's a 'directory-signature' and after that footers start with # 'directory-footer'. - if header.meets_consensus_method(9): + if document.meets_consensus_method(9): if list(entries.keys())[0] != 'directory-footer': raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later") else: if list(entries.keys())[0] != 'directory-signature': raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9") - _check_for_missing_and_disallowed_fields(header, entries, FOOTER_STATUS_DOCUMENT_FIELDS) + _check_for_missing_and_disallowed_fields(document, entries, FOOTER_STATUS_DOCUMENT_FIELDS) _check_for_misordered_fields(entries, FOOTER_FIELDS) - def _parse(self, entries, validate, header): + def _parse(self, entries, validate, document): for keyword, values in list(entries.items()): value, block_type, block_contents = values[0] line = '%s %s' % (keyword, value) @@ -948,7 +953,7 @@ class _DocumentFooter(object): # * 'directory-signature' in a consensus if validate and len(values) > 1 and keyword in FOOTER_FIELDS: - if not (keyword == 'directory-signature' and header.is_consensus): + if not (keyword == 'directory-signature' and document.is_consensus): raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) try: @@ -961,13 +966,13 @@ class _DocumentFooter(object): raise exc -def _check_for_missing_and_disallowed_fields(header, entries, fields): +def _check_for_missing_and_disallowed_fields(document, entries, fields): """ Checks that we have mandatory fields for our type, and that we don't have any fields exclusive to the other (ie, no vote-only fields appear in a consensus or vice versa). - :param _DocumentHeader header: document header + :param NetworkStatusDocumentV3 document: network status document :param dict entries: ordered keyword/value mappings of the header or footer :param list fields: expected field attributes (either **HEADER_STATUS_DOCUMENT_FIELDS** or **FOOTER_STATUS_DOCUMENT_FIELDS**) @@ -978,11 +983,11 @@ def _check_for_missing_and_disallowed_fields(header, entries, fields): missing_fields, disallowed_fields = [], [] for field, in_votes, in_consensus, mandatory in fields: - if mandatory and ((header.is_consensus and in_consensus) or (header.is_vote and in_votes)): + if mandatory and ((document.is_consensus and in_consensus) or (document.is_vote and in_votes)): # mandatory field, check that we have it if field not in entries.keys(): missing_fields.append(field) - elif (header.is_consensus and not in_consensus) or (header.is_vote and not in_votes): + elif (document.is_consensus and not in_consensus) or (document.is_vote and not in_votes): # field we shouldn't have, check that we don't if field in entries.keys(): disallowed_fields.append(field)

1 0

[stem/master] Merging DocumentFooter into NetworkStatusDocumentV3
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit ecb6bbda8008e03b54b6127b14b132c9b14f58c1 Author: Damian Johnson <atagar(a)torproject.org> Date: Mon Jan 19 09:42:18 2015 -0800 Merging DocumentFooter into NetworkStatusDocumentV3 For lazy loading to work we need this class to behave more like other descriptor types. Starting by merging the footer rather than the header since it's simpler. --- stem/descriptor/__init__.py | 11 ++- stem/descriptor/networkstatus.py | 187 ++++++++++++++++++-------------------- 2 files changed, 97 insertions(+), 101 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 9a3412d..13f0e56 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -382,6 +382,7 @@ class Descriptor(object): self._archive_path = None self._raw_contents = contents self._lazy_loading = lazy_load + self._entries = {} self._unrecognized_lines = [] def get_path(self): @@ -432,17 +433,21 @@ class Descriptor(object): return list(self._unrecognized_lines) - def _parse(self, entries, validate): + def _parse(self, entries, validate, parser_for_line = None): """ Parses a series of 'keyword => (value, pgp block)' mappings and applies them as attributes. :param dict entries: descriptor contents to be applied :param bool validate: checks the validity of descriptor content if True + :param dict parsers: mapping of lines to the function for parsing it :raises: **ValueError** if an error occurs in validation """ + if parser_for_line is None: + parser_for_line = self.PARSER_FOR_LINE + # set defaults for attr in self.ATTRIBUTES: @@ -450,8 +455,8 @@ class Descriptor(object): for keyword, values in list(entries.items()): try: - if keyword in self.PARSER_FOR_LINE: - self.PARSER_FOR_LINE[keyword](self, entries) + if keyword in parser_for_line: + parser_for_line[keyword](self, entries) else: for value, block_type, block_contents in values: line = '%s %s' % (keyword, value) diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 5530049..ae1ab48 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -432,6 +432,39 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): raise ValueError("Network status document (v2) are expected to start with a 'network-status-version' line:\n%s" % str(self)) +def _parse_directory_footer_line(descriptor, entries): + # nothing to parse, simply checking that we don't have a value + + value = _value('directory-footer', entries) + + if value: + raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got 'directory-footer %s'" % value) + + +def _parse_footer_directory_signature_line(descriptor, entries): + signatures = [] + + for sig_value, block_type, block_contents in entries['directory-signature']: + if sig_value.count(' ') not in (1, 2): + raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value) + + if not block_contents or block_type != 'SIGNATURE': + raise ValueError("'directory-signature' should be followed by a SIGNATURE block, but was a %s" % block_type) + + if sig_value.count(' ') == 1: + method = 'sha1' # default if none was provided + fingerprint, key_digest = sig_value.split(' ', 1) + else: + method, fingerprint, key_digest = sig_value.split(' ', 2) + + signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, True)) + + descriptor.signatures = signatures + + +_parse_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True)) + + class NetworkStatusDocumentV3(NetworkStatusDocument): """ Version 3 network status document. This could be either a vote or consensus. @@ -476,6 +509,17 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): a default value, others are left as None if undefined """ + ATTRIBUTES = { + 'signatures': ([], _parse_footer_directory_signature_line), + 'bandwidth_weights': ({}, _parse_bandwidth_weights_line), + } + + FOOTER_PARSER_FOR_LINE = { + 'directory-footer': _parse_directory_footer_line, + 'bandwidth-weights': _parse_bandwidth_weights_line, + 'directory-signature': _parse_footer_directory_signature_line, + } + def __init__(self, raw_content, validate = True, default_params = True): """ Parse a v3 network status document. @@ -488,7 +532,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): :raises: **ValueError** if the document is invalid """ - super(NetworkStatusDocumentV3, self).__init__(raw_content) + super(NetworkStatusDocumentV3, self).__init__(raw_content, lazy_load = not validate) document_file = io.BytesIO(raw_content) header = _DocumentHeader(document_file, validate, default_params) @@ -528,14 +572,14 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): self.routers = dict((desc.fingerprint, desc) for desc in router_iter) - footer = _DocumentFooter(document_file, validate, self) + self._footer(document_file, validate) - # merge header attributes into us - for attr, value in vars(footer).items(): - if attr != '_unrecognized_lines': - setattr(self, attr, value) - else: - self._unrecognized_lines += value + def get_unrecognized_lines(self): + if self._lazy_loading: + self._parse(self._footer_entries, False, parser_for_line = self.FOOTER_PARSER_FOR_LINE) + self._lazy_loading = False + + return super(NetworkStatusDocumentV3, self).get_unrecognized_lines() def meets_consensus_method(self, method): """ @@ -561,6 +605,43 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): return method(str(self).strip(), str(other).strip()) + def _footer(self, document_file, validate): + content = stem.util.str_tools._to_unicode(document_file.read()) + + if content: + entries = _get_descriptor_components(content, validate) + else: + entries = {} + + if validate: + for keyword, values in list(entries.items()): + # all known footer fields can only appear once except... + # * 'directory-signature' in a consensus + + if len(values) > 1 and keyword in FOOTER_FIELDS: + if not (keyword == 'directory-signature' and self.is_consensus): + raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) + + self._parse(entries, validate, parser_for_line = self.FOOTER_PARSER_FOR_LINE) + + # Check that the footer has the right initial line. Prior to consensus + # method 9 it's a 'directory-signature' and after that footers start with + # 'directory-footer'. + + if entries: + if self.meets_consensus_method(9): + if list(entries.keys())[0] != 'directory-footer': + raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later") + else: + if list(entries.keys())[0] != 'directory-signature': + raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9") + + _check_for_missing_and_disallowed_fields(self, entries, FOOTER_STATUS_DOCUMENT_FIELDS) + _check_for_misordered_fields(entries, FOOTER_FIELDS) + else: + self._footer_entries = entries + self._entries.update(entries) + def __hash__(self): return hash(str(self).strip()) @@ -876,96 +957,6 @@ class _DocumentHeader(object): raise ValueError("'%s' value on the params line must be in the range of %i - %i, was %i" % (key, minimum, maximum, value)) -def _parse_directory_footer_line(descriptor, entries): - # nothing to parse, simply checking that we don't have a value - - value = _value('directory-footer', entries) - - if value: - raise ValueError("A network status document's 'directory-footer' line shouldn't have any content, got 'directory-footer %s'" % value) - - -def _parse_directory_signature_line(descriptor, entries): - signatures = [] - - for sig_value, block_type, block_contents in entries['directory-signature']: - if sig_value.count(' ') not in (1, 2): - raise ValueError("Authority signatures in a network status document are expected to be of the form 'directory-signature [METHOD] FINGERPRINT KEY_DIGEST', received: %s" % sig_value) - - if not block_contents or block_type != 'SIGNATURE': - raise ValueError("'directory-signature' should be followed by a SIGNATURE block, but was a %s" % block_type) - - if sig_value.count(' ') == 1: - method = 'sha1' # default if none was provided - fingerprint, key_digest = sig_value.split(' ', 1) - else: - method, fingerprint, key_digest = sig_value.split(' ', 2) - - signatures.append(DocumentSignature(method, fingerprint, key_digest, block_contents, True)) - - descriptor.signatures = signatures - - -_parse_bandwidth_weights_line = lambda descriptor, entries: setattr(descriptor, 'bandwidth_weights', _parse_int_mappings('bandwidth-weights', _value('bandwidth-weights', entries), True)) - - -class _DocumentFooter(object): - PARSER_FOR_LINE = { - 'directory-footer': _parse_directory_footer_line, - 'bandwidth-weights': _parse_bandwidth_weights_line, - 'directory-signature': _parse_directory_signature_line, - } - - def __init__(self, document_file, validate, document): - self.signatures = [] - self.bandwidth_weights = {} - self._unrecognized_lines = [] - - content = stem.util.str_tools._to_unicode(document_file.read()) - - if not content: - return # footer is optional and there's nothing to parse - - entries = _get_descriptor_components(content, validate) - self._parse(entries, validate, document) - - if validate: - # Check that the footer has the right initial line. Prior to consensus - # method 9 it's a 'directory-signature' and after that footers start with - # 'directory-footer'. - - if document.meets_consensus_method(9): - if list(entries.keys())[0] != 'directory-footer': - raise ValueError("Network status document's footer should start with a 'directory-footer' line in consensus-method 9 or later") - else: - if list(entries.keys())[0] != 'directory-signature': - raise ValueError("Network status document's footer should start with a 'directory-signature' line prior to consensus-method 9") - - _check_for_missing_and_disallowed_fields(document, entries, FOOTER_STATUS_DOCUMENT_FIELDS) - _check_for_misordered_fields(entries, FOOTER_FIELDS) - - def _parse(self, entries, validate, document): - for keyword, values in list(entries.items()): - value, block_type, block_contents = values[0] - line = '%s %s' % (keyword, value) - - # all known footer fields can only appear once except... - # * 'directory-signature' in a consensus - - if validate and len(values) > 1 and keyword in FOOTER_FIELDS: - if not (keyword == 'directory-signature' and document.is_consensus): - raise ValueError("Network status documents can only have a single '%s' line, got %i" % (keyword, len(values))) - - try: - if keyword in self.PARSER_FOR_LINE: - self.PARSER_FOR_LINE[keyword](self, entries) - else: - self._unrecognized_lines.append(line) - except ValueError as exc: - if validate: - raise exc - - def _check_for_missing_and_disallowed_fields(document, entries, fields): """ Checks that we have mandatory fields for our type, and that we don't have

1 0

[stem/master] Do unicode conversion in _get_descriptor_components()
by atagar＠torproject.org 25 Jan '15

25 Jan '15

commit e2a55771cfc85882b0e00a071c3a8dd6ea126fd3 Author: Damian Johnson <atagar(a)torproject.org> Date: Sun Jan 25 11:15:44 2015 -0800 Do unicode conversion in _get_descriptor_components() Every descriptor type does the bytes => unicode conversion for it. Might as well just do this in the helper itself. --- stem/descriptor/__init__.py | 3 +++ stem/descriptor/extrainfo_descriptor.py | 2 -- stem/descriptor/microdescriptor.py | 1 - stem/descriptor/networkstatus.py | 21 +++++++-------------- stem/descriptor/router_status_entry.py | 2 -- stem/descriptor/server_descriptor.py | 3 +-- 6 files changed, 11 insertions(+), 21 deletions(-) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index a5eb87d..0d2295d 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -649,6 +649,9 @@ def _get_descriptor_components(raw_contents, validate, extra_keywords = ()): value tuple, the second being a list of those entries. """ + if isinstance(raw_contents, bytes): + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + entries = OrderedDict() extra_entries = [] # entries with a keyword in extra_keywords remaining_lines = raw_contents.split('\n') diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 1eebe46..5b0339c 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -781,8 +781,6 @@ class ExtraInfoDescriptor(Descriptor): """ super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate) - raw_contents = stem.util.str_tools._to_unicode(raw_contents) - entries = _get_descriptor_components(raw_contents, validate) if validate: diff --git a/stem/descriptor/microdescriptor.py b/stem/descriptor/microdescriptor.py index 3d9447f..5b8d0a3 100644 --- a/stem/descriptor/microdescriptor.py +++ b/stem/descriptor/microdescriptor.py @@ -220,7 +220,6 @@ class Microdescriptor(Descriptor): def __init__(self, raw_contents, validate = True, annotations = None): super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate) - raw_contents = stem.util.str_tools._to_unicode(raw_contents) self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper() self._annotation_lines = annotations if annotations else [] diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py index 2289cdf..a8ec38a 100644 --- a/stem/descriptor/networkstatus.py +++ b/stem/descriptor/networkstatus.py @@ -399,10 +399,7 @@ class NetworkStatusDocumentV2(NetworkStatusDocument): self.routers = dict((desc.fingerprint, desc) for desc in router_iter) - document_content += b'\n' + document_file.read() - document_content = stem.util.str_tools._to_unicode(document_content) - - entries = _get_descriptor_components(document_content, validate) + entries = _get_descriptor_components(document_content + b'\n' + document_file.read(), validate) if validate: self._check_constraints(entries) @@ -791,7 +788,6 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): def _header(self, document_file, validate): content = bytes.join(b'', _read_until_keywords((AUTH_START, ROUTERS_START, FOOTER_START), document_file)) - content = stem.util.str_tools._to_unicode(content) entries = _get_descriptor_components(content, validate) if validate: @@ -820,8 +816,7 @@ class NetworkStatusDocumentV3(NetworkStatusDocument): self._entries.update(entries) def _footer(self, document_file, validate): - content = stem.util.str_tools._to_unicode(document_file.read()) - entries = _get_descriptor_components(content, validate) if content else {} + entries = _get_descriptor_components(document_file.read(), validate) if validate: for keyword, values in list(entries.items()): @@ -1282,26 +1277,24 @@ class KeyCertificate(Descriptor): def __init__(self, raw_content, validate = True): super(KeyCertificate, self).__init__(raw_content, lazy_load = not validate) - - content = stem.util.str_tools._to_unicode(raw_content) - entries = _get_descriptor_components(content, validate) + entries = _get_descriptor_components(raw_content, validate) if validate: if 'dir-key-certificate-version' != list(entries.keys())[0]: - raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (content)) + raise ValueError("Key certificates must start with a 'dir-key-certificate-version' line:\n%s" % (raw_content)) elif 'dir-key-certification' != list(entries.keys())[-1]: - raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (content)) + raise ValueError("Key certificates must end with a 'dir-key-certification' line:\n%s" % (raw_content)) # check that we have mandatory fields and that our known fields only # appear once for keyword, is_mandatory in KEY_CERTIFICATE_PARAMS: if is_mandatory and keyword not in entries: - raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, content)) + raise ValueError("Key certificates must have a '%s' line:\n%s" % (keyword, raw_content)) entry_count = len(entries.get(keyword, [])) if entry_count > 1: - raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, content)) + raise ValueError("Key certificates can only have a single '%s' line, got %i:\n%s" % (keyword, entry_count, raw_content)) self._parse(entries, validate) else: diff --git a/stem/descriptor/router_status_entry.py b/stem/descriptor/router_status_entry.py index f80c56b..292ec7e 100644 --- a/stem/descriptor/router_status_entry.py +++ b/stem/descriptor/router_status_entry.py @@ -399,8 +399,6 @@ class RouterStatusEntry(Descriptor): """ super(RouterStatusEntry, self).__init__(content, lazy_load = not validate) - content = stem.util.str_tools._to_unicode(content) - self.document = document entries = _get_descriptor_components(content, validate) diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 5786ab9..b107b33 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -530,8 +530,7 @@ class ServerDescriptor(Descriptor): # influences the resulting exit policy, but for everything else the order # does not matter so breaking it into key / value pairs. - raw_contents = stem.util.str_tools._to_unicode(raw_contents) - entries, self._unparsed_exit_policy = _get_descriptor_components(raw_contents, validate, ('accept', 'reject')) + entries, self._unparsed_exit_policy = _get_descriptor_components(stem.util.str_tools._to_unicode(raw_contents), validate, ('accept', 'reject')) if validate: self._parse(entries, validate)

1 0