commit 91103d1a7d94bdde7edfa537ee62554891bbc54f Author: Damian Johnson atagar@torproject.org Date: Wed Mar 25 09:27:17 2015 -0700
Read multiple descriptors from files with bridge descriptors
Usually descriptors have a keyword that indicates the end of the descriptor. Sanitized bridge descriptors, however, omit the keyword we check for so they have no defined ending. This caused our parser to treat the whole file as one big bridge descriptor.
To fix this we're now splitting these up by their @type annotation. This is a bit of a hack, but in practice metrics is the only place that provides sanitized bridge descriptors so should be fine. --- docs/change_log.rst | 1 + stem/descriptor/__init__.py | 2 +- stem/descriptor/extrainfo_descriptor.py | 15 +++-- stem/descriptor/server_descriptor.py | 16 +++-- .../data/extrainfo_bridge_descriptor_multiple | 62 ++++++++++++++++++++ test/unit/descriptor/extrainfo_descriptor.py | 16 +++++ 6 files changed, 103 insertions(+), 9 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst index d6bbec1..6a6c23c 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -60,6 +60,7 @@ conversion (:trac:`14075`).
* Lazy-loading descriptors, improving performance by 25-70% depending on what type it is (:trac:`14011`) * Added `support for hidden service descriptors <api/descriptor/hidden_service_descriptor.html>`_ (:trac:`15004`) + * When reading sanitised bridge descriptors (server or extrainfo), :func:`~stem.descriptor.__init__.parse_file` treated the whole file as a single descriptor * The :class:`~stem.descriptor.networkstatus.DirectoryAuthority` 'fingerprint' attribute was actually its 'v3ident' * Added consensus' new package attribute (:spec:`ab64534`) * Added extra info' new hs_stats_end, hs_rend_cells, hs_rend_cells_attr, hs_dir_onions_seen, and hs_dir_onions_seen_attr attributes (:spec:`ddb630d`) diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index 8c0df9d..1ebe578 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -651,7 +651,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi if ignore_first: first_line = descriptor_file.readline()
- if first_line is not None: + if first_line: content_append(first_line)
keyword_match = re.compile(SPECIFIC_KEYWORD_LINE % '|'.join(keywords)) diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index e255320..716340e 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -178,11 +178,18 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs): """
while True: - extrainfo_content = _read_until_keywords('router-signature', descriptor_file) + if not is_bridge: + extrainfo_content = _read_until_keywords('router-signature', descriptor_file)
- # we've reached the 'router-signature', now include the pgp style block - block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] - extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + # we've reached the 'router-signature', now include the pgp style block + + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + else: + # bridge descriptors lack a well defined ending, so checking for a @type + # annotation + + extrainfo_content = _read_until_keywords('@type', descriptor_file, ignore_first = True)
if extrainfo_content: if extrainfo_content[0].startswith(b'@type'): diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py index 54ee645..80245ff 100644 --- a/stem/descriptor/server_descriptor.py +++ b/stem/descriptor/server_descriptor.py @@ -144,11 +144,19 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
while True: annotations = _read_until_keywords('router', descriptor_file) - descriptor_content = _read_until_keywords('router-signature', descriptor_file)
- # we've reached the 'router-signature', now include the pgp style block - block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] - descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + if not is_bridge: + descriptor_content = _read_until_keywords('router-signature', descriptor_file) + + # we've reached the 'router-signature', now include the pgp style block + + block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0] + descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True) + else: + # bridge descriptors lack a well defined ending, so checking for a @type + # annotation + + descriptor_content = _read_until_keywords('@type', descriptor_file, ignore_first = True)
if descriptor_content: if descriptor_content[0].startswith(b'@type'): diff --git a/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple b/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple new file mode 100644 index 0000000..12b1232 --- /dev/null +++ b/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple @@ -0,0 +1,62 @@ +@type bridge-extra-info 1.2 +extra-info default 909B07DB17E21D263C55794AB815BF1DB195FDD9 +published 2015-03-20 10:49:48 +router-digest 560269C81DEC146D94399FF5E95505A6DE591F3C +@type bridge-extra-info 1.2 +extra-info AndreasBaader 7F7798A3CBB0F643B1CFCE3FD4F2B7C553764498 +published 2015-03-20 10:52:01 +write-history 2015-03-20 10:37:14 (900 s) 7168,24576,89088,14336,9216,9216,215040,62464,5120,10240,4096,10240,7168,45056,79872,7317504,566272,3484672,1227776,19456,92160,9216,11264,176128,17408,58368,46080,7168,10240,15360,8192,17408,115712,8268800,5313536,4535296,4857856,4766720,2446336,4061184,17056768,12321792,9737216,4689920,41984,128000,10240,9216,75776,3697664,8046592,3249152,3329024,15947776,30720,5165056,11705344,10077184,2409472,1417216,944128,3110912,558080,14336,9216,63488,10240,70656,15360,72704,14336,6144,11264,8192,225280,40960,15360,8192,13312,7168,44032,11264,7168,94208,10240,5120,21504,38912,39936,71680,11264,5120,49152,15360,114688,17408 +read-history 2015-03-20 10:37:14 (900 s) 8192,26624,896000,26624,12288,12288,2320384,107520,4096,12288,5120,11264,6144,644096,907264,7287808,723968,3475456,1826816,17408,914432,14336,13312,2001920,54272,663552,657408,9216,17408,24576,15360,19456,966656,8250368,5293056,4518912,6098944,5287936,2457600,4047872,17041408,12282880,9706496,4666368,683008,1456128,19456,10240,1010688,3686400,8024064,3246080,5412864,15894528,31744,5157888,12541952,11269120,2410496,1417216,947200,3341312,557056,11264,9216,874496,12288,1241088,16384,935936,13312,6144,11264,11264,2270208,279552,26624,12288,21504,8192,628736,13312,10240,1213440,8192,7168,21504,626688,635904,905216,8192,8192,649216,18432,1274880,40960 +dirreq-write-history 2015-03-20 01:52:14 (900 s) 1024,0,0,0,0,0,0,0,0,0,0,0,20480,2048,0,2048,1024,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29696,2048,5120,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,1024,1024,4096,0,0,613376,1024,0,0,0,1024,0,0,0,0,0,0,7168,2048,0,1024,2048,0,0,21504,4096,2048,0,3072 +dirreq-read-history 2015-03-20 01:52:14 (900 s) 0,0,0,0,0,0,0,0,0,0,0,0,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4096,0,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4096,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3072,0,0,0,0 +geoip-db-digest 9EF0A1874377BFB6413ED3F9EB5504B1DB17BE13 +geoip6-db-digest 542D349827A88738A04332DAFF2516A384BCC8FF +dirreq-stats-end 2015-03-20 07:52:16 (86400 s) +dirreq-v3-ips mx=8 +dirreq-v3-reqs mx=8 +dirreq-v3-resp ok=8,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0 +dirreq-v3-direct-dl complete=0,timeout=0,running=0 +dirreq-v3-tunneled-dl complete=4,timeout=0,running=0 +transport obfs3 +bridge-stats-end 2015-03-20 07:52:22 (86400 s) +bridge-ips ??=8,mx=8 +bridge-ip-versions v4=8,v6=8 +bridge-ip-transports <OR>=8,obfs3=8 +router-digest A493F46D74014938951862D30A71C59E652AEBE7 +@type bridge-extra-info 1.2 +extra-info UlrikeMeinhof B4869206C1EEA4A090FE614155BD6942701F80F1 +published 2015-03-20 10:54:03 +write-history 2015-03-20 10:40:27 (14400 s) 440320,487424,483328,495616,435200,1073152 +read-history 2015-03-20 10:40:27 (14400 s) 5354496,5104640,4907008,4946944,4852736,6280192 +geoip-db-digest C1EB5237F2FBAF63381D8551157F13D12EFCCA25 +geoip6-db-digest 1F99B6B0EC78E9DB34D61AE7E0FC261D558E8E5D +dirreq-stats-end 2015-03-20 07:55:27 (86400 s) +dirreq-v3-ips +dirreq-v3-reqs +dirreq-v3-resp ok=0,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0 +dirreq-v3-direct-dl complete=0,timeout=0,running=0 +dirreq-v3-tunneled-dl complete=0,timeout=0,running=0 +transport obfs3 +bridge-stats-end 2015-03-20 07:55:31 (86400 s) +bridge-ips +bridge-ip-versions v4=0,v6=0 +bridge-ip-transports +router-digest 2BEA3A5A03D1125617892D5A4E69A1BFDAE07AB6 +@type bridge-extra-info 1.2 +extra-info Unnamed C18896EB6274DC8123491FAE1DD17E1769C54C4F +published 2015-03-20 10:55:41 +write-history 2015-03-20 10:52:59 (900 s) 1520640,17408,24576,19456,11264,5120,130048,25600,20480,14336,11264,100352,12288 +read-history 2015-03-20 10:52:59 (900 s) 7316480,16384,24576,23552,11264,4096,2151424,33792,23552,13312,12288,1253376,8192 +router-digest 5D531EB5BC5B4D887B6AE1CFE8B20D61E9E4C43D +@type bridge-extra-info 1.2 +extra-info default 478B4CB438302981DE9AAF246F48DBE57F69050A +published 2015-03-20 10:56:17 +write-history 2015-03-20 10:54:10 (900 s) 1387520 +read-history 2015-03-20 10:54:10 (900 s) 7402496 +router-digest 132180906EAF02DD018CBD939AFE56236B2D3218 +@type bridge-extra-info 1.2 +extra-info Unnamed 25D9D52A0350B42E69C8AB7CE945DB1CA38DA0CF +published 2015-03-20 10:59:53 +write-history 2015-03-20 10:51:53 (900 s) 1398784,70656,20480,21504,9216,7168,55296,35840,104448,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +read-history 2015-03-20 10:51:53 (900 s) 8119296,885760,26624,32768,18432,16384,1226752,60416,1311744,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +router-digest 1AA6F27BB6A55EE97FF8C22D8BC2026A437BC1EF diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index c418ff3..5cb3205 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -121,6 +121,22 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw self.assertEqual({}, desc.dir_v2_responses_unknown) self.assertEqual({}, desc.dir_v2_responses_unknown)
+ def test_multiple_metrics_bridge_descriptors(self): + """ + Check that we can read bridge descriptors when there's multiple in a file. + """ + + descriptor_file = open(get_resource('extrainfo_bridge_descriptor_multiple'), 'rb') + desc_list = list(stem.descriptor.parse_file(descriptor_file)) + + self.assertEqual(6, len(desc_list)) + self.assertEqual('909B07DB17E21D263C55794AB815BF1DB195FDD9', desc_list[0].fingerprint) + self.assertEqual('7F7798A3CBB0F643B1CFCE3FD4F2B7C553764498', desc_list[1].fingerprint) + self.assertEqual('B4869206C1EEA4A090FE614155BD6942701F80F1', desc_list[2].fingerprint) + self.assertEqual('C18896EB6274DC8123491FAE1DD17E1769C54C4F', desc_list[3].fingerprint) + self.assertEqual('478B4CB438302981DE9AAF246F48DBE57F69050A', desc_list[4].fingerprint) + self.assertEqual('25D9D52A0350B42E69C8AB7CE945DB1CA38DA0CF', desc_list[5].fingerprint) + def test_minimal_extrainfo_descriptor(self): """ Basic sanity check that we can parse an extrainfo descriptor with minimal
tor-commits@lists.torproject.org