[tor-commits] [stem/master] Helper to guess descriptor type

atagar at torproject.org atagar at torproject.org
Sat Aug 17 20:44:27 UTC 2019


commit bee688e60c19c5ce8879ffbcc1e0c6b6c2e8597c
Author: Damian Johnson <atagar at torproject.org>
Date:   Thu Jul 11 14:35:40 2019 -0700

    Helper to guess descriptor type
    
    Ideally our index would include descriptor types, but without that next best
    thing is to guess based on file paths.
---
 stem/descriptor/collector.py      | 55 +++++++++++++++++++++++++++++++++++++++
 test/unit/descriptor/collector.py | 15 ++++++++++-
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 1742c596..d94d5871 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -70,6 +70,40 @@ import stem.util.str_tools
 COLLECTOR_URL = 'https://collector.torproject.org/'
 REFRESH_INDEX_RATE = 3600  # get new index if cached copy is an hour old
 
+# mapping of path prefixes to their descriptor type (sampled 7/11/19)
+
+COLLECTOR_DESC_TYPES = {
+  'archive/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2',
+  'archive/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3',
+  'archive/bridge-descriptors/statuses/': 'bridge-network-status 1.1',
+  'archive/bridge-pool-assignments/': 'bridge-pool-assignment 1.0',
+  'archive/exit-lists/': 'tordnsel 1.0',
+  'archive/relay-descriptors/bandwidths/': 'bandwidth-file 1.0',
+  'archive/relay-descriptors/certs': 'dir-key-certificate-3 1.0',
+  'archive/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0',
+  'archive/relay-descriptors/extra-infos/': 'extra-info 1.0',
+  'archive/relay-descriptors/microdescs/': ('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'),
+  'archive/relay-descriptors/server-descriptors/': 'server-descriptor 1.0',
+  'archive/relay-descriptors/statuses/': 'network-status-2 1.0',
+  'archive/relay-descriptors/tor/': 'directory 1.0',
+  'archive/relay-descriptors/votes/': 'network-status-vote-3 1.0',
+  'archive/torperf/': 'torperf 1.0',
+  'archive/webstats/': (),
+  'recent/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3',
+  'recent/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2',
+  'recent/bridge-descriptors/statuses/': 'bridge-network-status 1.2',
+  'recent/exit-lists/': 'tordnsel 1.0',
+  'recent/relay-descriptors/bandwidths/': 'bandwidth-file 1.0',
+  'recent/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0',
+  'recent/relay-descriptors/extra-infos/': 'extra-info 1.0',
+  'recent/relay-descriptors/microdescs/consensus-microdesc/': 'network-status-microdesc-consensus-3 1.0',
+  'recent/relay-descriptors/microdescs/micro/': 'microdescriptor 1.0',
+  'recent/relay-descriptors/server-descriptors/': 'server-descriptor 1.0',
+  'recent/relay-descriptors/votes/': 'network-status-vote-3 1.0',
+  'recent/torperf/': 'torperf 1.1',
+  'recent/webstats/': (),
+}
+
 
 def _download(url, compression, timeout, retries):
   """
@@ -135,6 +169,27 @@ class File(object):
     self.path = path
     self.size = size
     self.last_modified = datetime.datetime.strptime(last_modified, '%Y-%m-%d %H:%M')
+    self._guessed_type = None
+
+  def guess_descriptor_types(self):
+    """
+    Descriptor @type this file is expected to have based on its path. If unable
+    to determine any this tuple is empty.
+
+    :returns: **tuple** with the descriptor types this file is expected to have
+    """
+
+    if self._guessed_type is None:
+      guessed_type = ()
+
+      for path_prefix, types in COLLECTOR_DESC_TYPES.items():
+        if self.path.startswith(path_prefix):
+          guessed_type = (types,) if isinstance(types, str) else types
+          break
+
+      self._guessed_type = guessed_type
+
+    return self._guessed_type
 
 
 class CollecTor(object):
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index 3403ee50..609a4929 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -9,7 +9,7 @@ import unittest
 import stem.prereq
 
 from stem.descriptor import Compression
-from stem.descriptor.collector import CollecTor
+from stem.descriptor.collector import CollecTor, File
 from test.unit.descriptor import get_resource
 from test.unit.descriptor.data.collector_index import EXAMPLE_INDEX
 
@@ -135,3 +135,16 @@ class TestCollector(unittest.TestCase):
     self.assertEqual(test_path, extrainfo_file.path)
     self.assertEqual(6459884, extrainfo_file.size)
     self.assertEqual(datetime.datetime(2016, 6, 23, 9, 54), extrainfo_file.last_modified)
+
+  def test_guess_descriptor_types(self):
+    f = File('archive/bridge-descriptors/extra-infos/bridge-extra-infos-2008-05.tar.xz', 377644, '2016-09-04 09:21')
+    self.assertEqual(('bridge-extra-info 1.3',), f.guess_descriptor_types())
+
+    f = File('archive/relay-descriptors/microdescs/microdescs-2014-01.tar.xz', 7515396, '2014-02-07 03:59')
+    self.assertEqual(('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'), f.guess_descriptor_types())
+
+    f = File('archive/webstats/webstats-2015-03.tar', 20480, '2018-03-19 16:07')
+    self.assertEqual((), f.guess_descriptor_types())
+
+    f = File('archive/no_such_file.tar', 20480, '2018-03-19 16:07')
+    self.assertEqual((), f.guess_descriptor_types())





More information about the tor-commits mailing list