commit bee688e60c19c5ce8879ffbcc1e0c6b6c2e8597c Author: Damian Johnson atagar@torproject.org Date: Thu Jul 11 14:35:40 2019 -0700
Helper to guess descriptor type
Ideally our index would include descriptor types, but without that next best thing is to guess based on file paths. --- stem/descriptor/collector.py | 55 +++++++++++++++++++++++++++++++++++++++ test/unit/descriptor/collector.py | 15 ++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py index 1742c596..d94d5871 100644 --- a/stem/descriptor/collector.py +++ b/stem/descriptor/collector.py @@ -70,6 +70,40 @@ import stem.util.str_tools COLLECTOR_URL = 'https://collector.torproject.org/' REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
+# mapping of path prefixes to their descriptor type (sampled 7/11/19) + +COLLECTOR_DESC_TYPES = { + 'archive/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2', + 'archive/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3', + 'archive/bridge-descriptors/statuses/': 'bridge-network-status 1.1', + 'archive/bridge-pool-assignments/': 'bridge-pool-assignment 1.0', + 'archive/exit-lists/': 'tordnsel 1.0', + 'archive/relay-descriptors/bandwidths/': 'bandwidth-file 1.0', + 'archive/relay-descriptors/certs': 'dir-key-certificate-3 1.0', + 'archive/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0', + 'archive/relay-descriptors/extra-infos/': 'extra-info 1.0', + 'archive/relay-descriptors/microdescs/': ('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'), + 'archive/relay-descriptors/server-descriptors/': 'server-descriptor 1.0', + 'archive/relay-descriptors/statuses/': 'network-status-2 1.0', + 'archive/relay-descriptors/tor/': 'directory 1.0', + 'archive/relay-descriptors/votes/': 'network-status-vote-3 1.0', + 'archive/torperf/': 'torperf 1.0', + 'archive/webstats/': (), + 'recent/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3', + 'recent/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2', + 'recent/bridge-descriptors/statuses/': 'bridge-network-status 1.2', + 'recent/exit-lists/': 'tordnsel 1.0', + 'recent/relay-descriptors/bandwidths/': 'bandwidth-file 1.0', + 'recent/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0', + 'recent/relay-descriptors/extra-infos/': 'extra-info 1.0', + 'recent/relay-descriptors/microdescs/consensus-microdesc/': 'network-status-microdesc-consensus-3 1.0', + 'recent/relay-descriptors/microdescs/micro/': 'microdescriptor 1.0', + 'recent/relay-descriptors/server-descriptors/': 'server-descriptor 1.0', + 'recent/relay-descriptors/votes/': 'network-status-vote-3 1.0', + 'recent/torperf/': 'torperf 1.1', + 'recent/webstats/': (), +} +
def _download(url, compression, timeout, retries): """ @@ -135,6 +169,27 @@ class File(object): self.path = path self.size = size self.last_modified = datetime.datetime.strptime(last_modified, '%Y-%m-%d %H:%M') + self._guessed_type = None + + def guess_descriptor_types(self): + """ + Descriptor @type this file is expected to have based on its path. If unable + to determine any this tuple is empty. + + :returns: **tuple** with the descriptor types this file is expected to have + """ + + if self._guessed_type is None: + guessed_type = () + + for path_prefix, types in COLLECTOR_DESC_TYPES.items(): + if self.path.startswith(path_prefix): + guessed_type = (types,) if isinstance(types, str) else types + break + + self._guessed_type = guessed_type + + return self._guessed_type
class CollecTor(object): diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py index 3403ee50..609a4929 100644 --- a/test/unit/descriptor/collector.py +++ b/test/unit/descriptor/collector.py @@ -9,7 +9,7 @@ import unittest import stem.prereq
from stem.descriptor import Compression -from stem.descriptor.collector import CollecTor +from stem.descriptor.collector import CollecTor, File from test.unit.descriptor import get_resource from test.unit.descriptor.data.collector_index import EXAMPLE_INDEX
@@ -135,3 +135,16 @@ class TestCollector(unittest.TestCase): self.assertEqual(test_path, extrainfo_file.path) self.assertEqual(6459884, extrainfo_file.size) self.assertEqual(datetime.datetime(2016, 6, 23, 9, 54), extrainfo_file.last_modified) + + def test_guess_descriptor_types(self): + f = File('archive/bridge-descriptors/extra-infos/bridge-extra-infos-2008-05.tar.xz', 377644, '2016-09-04 09:21') + self.assertEqual(('bridge-extra-info 1.3',), f.guess_descriptor_types()) + + f = File('archive/relay-descriptors/microdescs/microdescs-2014-01.tar.xz', 7515396, '2014-02-07 03:59') + self.assertEqual(('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'), f.guess_descriptor_types()) + + f = File('archive/webstats/webstats-2015-03.tar', 20480, '2018-03-19 16:07') + self.assertEqual((), f.guess_descriptor_types()) + + f = File('archive/no_such_file.tar', 20480, '2018-03-19 16:07') + self.assertEqual((), f.guess_descriptor_types())
tor-commits@lists.torproject.org