[tor-commits] [stem/master] Read multiple descriptors from files with bridge descriptors

atagar at torproject.org atagar at torproject.org
Wed Mar 25 16:33:59 UTC 2015


commit 91103d1a7d94bdde7edfa537ee62554891bbc54f
Author: Damian Johnson <atagar at torproject.org>
Date:   Wed Mar 25 09:27:17 2015 -0700

    Read multiple descriptors from files with bridge descriptors
    
    Usually descriptors have a keyword that indicates the end of the descriptor.
    Sanitized bridge descriptors, however, omit the keyword we check for so they
    have no defined ending. This caused our parser to treat the whole file as one
    big bridge descriptor.
    
    To fix this we're now splitting these up by their @type annotation. This is a
    bit of a hack, but in practice metrics is the only place that provides
    sanitized bridge descriptors so should be fine.
---
 docs/change_log.rst                                |    1 +
 stem/descriptor/__init__.py                        |    2 +-
 stem/descriptor/extrainfo_descriptor.py            |   15 +++--
 stem/descriptor/server_descriptor.py               |   16 +++--
 .../data/extrainfo_bridge_descriptor_multiple      |   62 ++++++++++++++++++++
 test/unit/descriptor/extrainfo_descriptor.py       |   16 +++++
 6 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index d6bbec1..6a6c23c 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -60,6 +60,7 @@ conversion (:trac:`14075`).
 
   * Lazy-loading descriptors, improving performance by 25-70% depending on what type it is (:trac:`14011`)
   * Added `support for hidden service descriptors <api/descriptor/hidden_service_descriptor.html>`_ (:trac:`15004`)
+  * When reading sanitised bridge descriptors (server or extrainfo), :func:`~stem.descriptor.__init__.parse_file` treated the whole file as a single descriptor
   * The :class:`~stem.descriptor.networkstatus.DirectoryAuthority` 'fingerprint' attribute was actually its 'v3ident'
   * Added consensus' new package attribute (:spec:`ab64534`)
   * Added extra info' new hs_stats_end, hs_rend_cells, hs_rend_cells_attr, hs_dir_onions_seen, and hs_dir_onions_seen_attr attributes (:spec:`ddb630d`)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 8c0df9d..1ebe578 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -651,7 +651,7 @@ def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_fi
   if ignore_first:
     first_line = descriptor_file.readline()
 
-    if first_line is not None:
+    if first_line:
       content_append(first_line)
 
   keyword_match = re.compile(SPECIFIC_KEYWORD_LINE % '|'.join(keywords))
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index e255320..716340e 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -178,11 +178,18 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
   """
 
   while True:
-    extrainfo_content = _read_until_keywords('router-signature', descriptor_file)
+    if not is_bridge:
+      extrainfo_content = _read_until_keywords('router-signature', descriptor_file)
 
-    # we've reached the 'router-signature', now include the pgp style block
-    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
-    extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+      # we've reached the 'router-signature', now include the pgp style block
+
+      block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+      extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+    else:
+      # bridge descriptors lack a well defined ending, so checking for a @type
+      # annotation
+
+      extrainfo_content = _read_until_keywords('@type', descriptor_file, ignore_first = True)
 
     if extrainfo_content:
       if extrainfo_content[0].startswith(b'@type'):
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 54ee645..80245ff 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -144,11 +144,19 @@ def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
 
   while True:
     annotations = _read_until_keywords('router', descriptor_file)
-    descriptor_content = _read_until_keywords('router-signature', descriptor_file)
 
-    # we've reached the 'router-signature', now include the pgp style block
-    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
-    descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+    if not is_bridge:
+      descriptor_content = _read_until_keywords('router-signature', descriptor_file)
+
+      # we've reached the 'router-signature', now include the pgp style block
+
+      block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+      descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
+    else:
+      # bridge descriptors lack a well defined ending, so checking for a @type
+      # annotation
+
+      descriptor_content = _read_until_keywords('@type', descriptor_file, ignore_first = True)
 
     if descriptor_content:
       if descriptor_content[0].startswith(b'@type'):
diff --git a/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple b/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple
new file mode 100644
index 0000000..12b1232
--- /dev/null
+++ b/test/unit/descriptor/data/extrainfo_bridge_descriptor_multiple
@@ -0,0 +1,62 @@
+ at type bridge-extra-info 1.2
+extra-info default 909B07DB17E21D263C55794AB815BF1DB195FDD9
+published 2015-03-20 10:49:48
+router-digest 560269C81DEC146D94399FF5E95505A6DE591F3C
+ at type bridge-extra-info 1.2
+extra-info AndreasBaader 7F7798A3CBB0F643B1CFCE3FD4F2B7C553764498
+published 2015-03-20 10:52:01
+write-history 2015-03-20 10:37:14 (900 s) 7168,24576,89088,14336,9216,9216,215040,62464,5120,10240,4096,10240,7168,45056,79872,7317504,566272,3484672,1227776,19456,92160,9216,11264,176128,17408,58368,46080,7168,10240,15360,8192,17408,115712,8268800,5313536,4535296,4857856,4766720,2446336,4061184,17056768,12321792,9737216,4689920,41984,128000,10240,9216,75776,3697664,8046592,3249152,3329024,15947776,30720,5165056,11705344,10077184,2409472,1417216,944128,3110912,558080,14336,9216,63488,10240,70656,15360,72704,14336,6144,11264,8192,225280,40960,15360,8192,13312,7168,44032,11264,7168,94208,10240,5120,21504,38912,39936,71680,11264,5120,49152,15360,114688,17408
+read-history 2015-03-20 10:37:14 (900 s) 8192,26624,896000,26624,12288,12288,2320384,107520,4096,12288,5120,11264,6144,644096,907264,7287808,723968,3475456,1826816,17408,914432,14336,13312,2001920,54272,663552,657408,9216,17408,24576,15360,19456,966656,8250368,5293056,4518912,6098944,5287936,2457600,4047872,17041408,12282880,9706496,4666368,683008,1456128,19456,10240,1010688,3686400,8024064,3246080,5412864,15894528,31744,5157888,12541952,11269120,2410496,1417216,947200,3341312,557056,11264,9216,874496,12288,1241088,16384,935936,13312,6144,11264,11264,2270208,279552,26624,12288,21504,8192,628736,13312,10240,1213440,8192,7168,21504,626688,635904,905216,8192,8192,649216,18432,1274880,40960
+dirreq-write-history 2015-03-20 01:52:14 (900 s) 1024,0,0,0,0,0,0,0,0,0,0,0,20480,2048,0,2048,1024,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29696,2048,5120,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,1024,1024,4096,0,0,613376,1024,0,0,0,1024,0,0,0,0,0,0,7168,2048,0,1024,2048,0,0,21504,4096,2048,0,3072
+dirreq-read-history 2015-03-20 01:52:14 (900 s) 0,0,0,0,0,0,0,0,0,0,0,0,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4096,0,2048,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4096,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3072,0,0,0,0
+geoip-db-digest 9EF0A1874377BFB6413ED3F9EB5504B1DB17BE13
+geoip6-db-digest 542D349827A88738A04332DAFF2516A384BCC8FF
+dirreq-stats-end 2015-03-20 07:52:16 (86400 s)
+dirreq-v3-ips mx=8
+dirreq-v3-reqs mx=8
+dirreq-v3-resp ok=8,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0
+dirreq-v3-direct-dl complete=0,timeout=0,running=0
+dirreq-v3-tunneled-dl complete=4,timeout=0,running=0
+transport obfs3
+bridge-stats-end 2015-03-20 07:52:22 (86400 s)
+bridge-ips ??=8,mx=8
+bridge-ip-versions v4=8,v6=8
+bridge-ip-transports <OR>=8,obfs3=8
+router-digest A493F46D74014938951862D30A71C59E652AEBE7
+ at type bridge-extra-info 1.2
+extra-info UlrikeMeinhof B4869206C1EEA4A090FE614155BD6942701F80F1
+published 2015-03-20 10:54:03
+write-history 2015-03-20 10:40:27 (14400 s) 440320,487424,483328,495616,435200,1073152
+read-history 2015-03-20 10:40:27 (14400 s) 5354496,5104640,4907008,4946944,4852736,6280192
+geoip-db-digest C1EB5237F2FBAF63381D8551157F13D12EFCCA25
+geoip6-db-digest 1F99B6B0EC78E9DB34D61AE7E0FC261D558E8E5D
+dirreq-stats-end 2015-03-20 07:55:27 (86400 s)
+dirreq-v3-ips 
+dirreq-v3-reqs 
+dirreq-v3-resp ok=0,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0
+dirreq-v3-direct-dl complete=0,timeout=0,running=0
+dirreq-v3-tunneled-dl complete=0,timeout=0,running=0
+transport obfs3
+bridge-stats-end 2015-03-20 07:55:31 (86400 s)
+bridge-ips 
+bridge-ip-versions v4=0,v6=0
+bridge-ip-transports 
+router-digest 2BEA3A5A03D1125617892D5A4E69A1BFDAE07AB6
+ at type bridge-extra-info 1.2
+extra-info Unnamed C18896EB6274DC8123491FAE1DD17E1769C54C4F
+published 2015-03-20 10:55:41
+write-history 2015-03-20 10:52:59 (900 s) 1520640,17408,24576,19456,11264,5120,130048,25600,20480,14336,11264,100352,12288
+read-history 2015-03-20 10:52:59 (900 s) 7316480,16384,24576,23552,11264,4096,2151424,33792,23552,13312,12288,1253376,8192
+router-digest 5D531EB5BC5B4D887B6AE1CFE8B20D61E9E4C43D
+ at type bridge-extra-info 1.2
+extra-info default 478B4CB438302981DE9AAF246F48DBE57F69050A
+published 2015-03-20 10:56:17
+write-history 2015-03-20 10:54:10 (900 s) 1387520
+read-history 2015-03-20 10:54:10 (900 s) 7402496
+router-digest 132180906EAF02DD018CBD939AFE56236B2D3218
+ at type bridge-extra-info 1.2
+extra-info Unnamed 25D9D52A0350B42E69C8AB7CE945DB1CA38DA0CF
+published 2015-03-20 10:59:53
+write-history 2015-03-20 10:51:53 (900 s) 1398784,70656,20480,21504,9216,7168,55296,35840,104448,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+read-history 2015-03-20 10:51:53 (900 s) 8119296,885760,26624,32768,18432,16384,1226752,60416,1311744,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+router-digest 1AA6F27BB6A55EE97FF8C22D8BC2026A437BC1EF
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index c418ff3..5cb3205 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -121,6 +121,22 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw
     self.assertEqual({}, desc.dir_v2_responses_unknown)
     self.assertEqual({}, desc.dir_v2_responses_unknown)
 
+  def test_multiple_metrics_bridge_descriptors(self):
+    """
+    Check that we can read bridge descriptors when there's multiple in a file.
+    """
+
+    descriptor_file = open(get_resource('extrainfo_bridge_descriptor_multiple'), 'rb')
+    desc_list = list(stem.descriptor.parse_file(descriptor_file))
+
+    self.assertEqual(6, len(desc_list))
+    self.assertEqual('909B07DB17E21D263C55794AB815BF1DB195FDD9', desc_list[0].fingerprint)
+    self.assertEqual('7F7798A3CBB0F643B1CFCE3FD4F2B7C553764498', desc_list[1].fingerprint)
+    self.assertEqual('B4869206C1EEA4A090FE614155BD6942701F80F1', desc_list[2].fingerprint)
+    self.assertEqual('C18896EB6274DC8123491FAE1DD17E1769C54C4F', desc_list[3].fingerprint)
+    self.assertEqual('478B4CB438302981DE9AAF246F48DBE57F69050A', desc_list[4].fingerprint)
+    self.assertEqual('25D9D52A0350B42E69C8AB7CE945DB1CA38DA0CF', desc_list[5].fingerprint)
+
   def test_minimal_extrainfo_descriptor(self):
     """
     Basic sanity check that we can parse an extrainfo descriptor with minimal



More information about the tor-commits mailing list