commit 4631228400e0cca43d8c7ba514c40cbcbf2bda34 Author: Arlo Breault arlolra@gmail.com Date: Mon Aug 19 22:49:40 2013 -0700
Parse exit list entry.
Published by DNSEL or TorBEL to indicate what ip address exit relay X had at timestamp Y.
See #8255 --- stem/descriptor/__init__.py | 9 ++- stem/descriptor/tordnsel.py | 120 ++++++++++++++++++++++++++++++++++++++ test/settings.cfg | 1 + test/unit/descriptor/tordnsel.py | 80 +++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-)
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py index cd1f42a..b4fc54a 100644 --- a/stem/descriptor/__init__.py +++ b/stem/descriptor/__init__.py @@ -46,6 +46,7 @@ __all__ = [ "microdescriptor", "networkstatus", "router_status_entry", + "tordnsel", "parse_file", "Descriptor", ] @@ -115,7 +116,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen bridge-extra-info 1.1 :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor` torperf 1.0 **unsupported** bridge-pool-assignment 1.0 **unsupported** - tordnsel 1.0 **unsupported** + tordnsel 1.0 :class:`~stem.descriptor.tordnsel.TorDNSEL` ========================================= =====
If you're using **python 3** then beware that the open() function defaults to @@ -255,6 +256,11 @@ def _parse_metrics_file(descriptor_type, major_version, minor_version, descripto
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs): yield desc + elif descriptor_type == "tordnsel" and major_version == 1: + document_type = stem.descriptor.tordnsel.TorDNSEL + + for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs): + yield desc else: raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
@@ -542,3 +548,4 @@ import stem.descriptor.server_descriptor import stem.descriptor.extrainfo_descriptor import stem.descriptor.networkstatus import stem.descriptor.microdescriptor +import stem.descriptor.tordnsel diff --git a/stem/descriptor/tordnsel.py b/stem/descriptor/tordnsel.py new file mode 100644 index 0000000..ddeef2e --- /dev/null +++ b/stem/descriptor/tordnsel.py @@ -0,0 +1,120 @@ +# Copyright 2012-2013, Damian Johnson +# See LICENSE for licensing information + +""" +Parsing for TorDNSEL files. +""" + +import datetime + +import stem.util.connection +import stem.util.str_tools +import stem.util.tor_tools + +from stem.descriptor import ( + Descriptor, + _read_until_keywords, + _get_descriptor_components, +) + + +def _parse_file(tordnsel_file, validate = True, **kwargs): + """ + Iterates over a tordnsel file. + + :returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL` + instances in the file + + :raises: + * **ValueError** if the contents is malformed and validate is **True** + * **IOError** if the file can't be read + """ + + # skip content prior to the first ExitNode + _read_until_keywords("ExitNode", tordnsel_file, skip = True) + + while True: + contents = _read_until_keywords("ExitAddress", tordnsel_file) + contents += _read_until_keywords("ExitNode", tordnsel_file) + if contents: + yield TorDNSEL(bytes.join(b"", contents), validate, **kwargs) + else: + break # done parsing file + + +class TorDNSEL(Descriptor): + """ + TorDNSEL descriptor (`exitlist specification + https://www.torproject.org/tordnsel/exitlist-spec.txt`_) + + :var str fingerprint: ***** authority's fingerprint + :var datetime published: ***** time in UTC when this descriptor was made + :var datetime last_status: ***** time in UTC when the relay was seen in a v2 network status + :var list exit_addresses: ***** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time + + ***** attribute is either required when we're parsed with validation or has + a default value, others are left as **None** if undefined + """ + + def __init__(self, raw_contents, validate): + super(TorDNSEL, self).__init__(raw_contents) + raw_contents = stem.util.str_tools._to_unicode(raw_contents) + entries = _get_descriptor_components(raw_contents, validate) + + self.fingerprint = None + self.published = None + self.last_status = None + self.exit_addresses = [] + + self._parse(entries, validate) + + def _parse(self, entries, validate): + + for keyword, values in entries.items(): + value, block_content = values[0] + + if validate and block_content: + raise ValueError("Unexpected block content: %s" % block_content) + + if keyword == "ExitNode": + if validate and not stem.util.tor_tools.is_valid_fingerprint(value): + raise ValueError("Tor relay fingerprints consist of forty hex digits: %s" % value) + self.fingerprint = value + + elif keyword == "Published": + try: + self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("Published time wasn't parsable: %s" % value) + + elif keyword == "LastStatus": + try: + self.last_status = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("LastStatus time wasn't parsable: %s" % value) + + elif keyword == "ExitAddress": + for value, block_content in values: + + if validate and block_content: + raise ValueError("Unexpected block content: %s" % block_content) + + address, date = value.split(" ", 1) + + if validate and not stem.util.connection.is_valid_ipv4_address(address): + raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address) + try: + date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") + except ValueError: + if validate: + raise ValueError("ExitAddress found time wasn't parsable: %s" % value) + else: + continue + + self.exit_addresses.append((address, date)) + + else: + if validate: + raise ValueError("Saw a keyword that wasn't expected.") diff --git a/test/settings.cfg b/test/settings.cfg index 80aaf9a..b97c57f 100644 --- a/test/settings.cfg +++ b/test/settings.cfg @@ -161,6 +161,7 @@ test.unit_tests |test.unit.descriptor.extrainfo_descriptor.TestExtraInfoDescriptor |test.unit.descriptor.microdescriptor.TestMicrodescriptor |test.unit.descriptor.router_status_entry.TestRouterStatusEntry +|test.unit.descriptor.tordnsel.TestTorDNSELDescriptor |test.unit.descriptor.networkstatus.directory_authority.TestDirectoryAuthority |test.unit.descriptor.networkstatus.key_certificate.TestKeyCertificate |test.unit.descriptor.networkstatus.document_v2.TestNetworkStatusDocument diff --git a/test/unit/descriptor/tordnsel.py b/test/unit/descriptor/tordnsel.py new file mode 100644 index 0000000..4861b38 --- /dev/null +++ b/test/unit/descriptor/tordnsel.py @@ -0,0 +1,80 @@ +""" +Unit tests for stem.descriptor.tordnsel. +""" + +import io +import unittest +import datetime + +from stem.util.tor_tools import is_valid_fingerprint +from stem.descriptor.tordnsel import TorDNSEL, _parse_file + + +class TestTorDNSELDescriptor(unittest.TestCase): + def test_parse_file(self): + """ + Try parsing a document via the _parse_file() function. + """ + desc_text = """ +@type tordnsel 1.0 +Downloaded 2013-08-19 04:02:03 +ExitNode 003A71137D959748C8157C4A76ECA639CEF5E33E +Published 2013-08-19 02:13:53 +LastStatus 2013-08-19 03:02:47 +ExitAddress 66.223.170.168 2013-08-19 03:18:51 +ExitNode 00FF300624FECA7F40515C8D854EE925332580D6 +Published 2013-08-18 07:02:14 +LastStatus 2013-08-18 09:02:58 +ExitAddress 82.252.181.153 2013-08-18 08:03:01 +ExitAddress 82.252.181.154 2013-08-18 08:03:02 +ExitAddress 82.252.181.155 2013-08-18 08:03:03 +ExitNode 030B22437D99B2DB2908B747B6962EAD13AB4039 +Published 2013-08-18 12:44:20 +LastStatus 2013-08-18 13:02:57 +ExitAddress 46.10.211.205 2013-08-18 13:18:48 +""" + + # parse file and assert values + descriptors = list(_parse_file(io.BytesIO(desc_text))) + self.assertEqual(3, len(descriptors)) + self.assertTrue(isinstance(descriptors[0], TorDNSEL)) + desc = descriptors[1] + self.assertTrue(is_valid_fingerprint(desc.fingerprint)) + self.assertEqual("00FF300624FECA7F40515C8D854EE925332580D6", desc.fingerprint) + self.assertEqual(datetime.datetime(2013, 8, 18, 7, 2, 14), desc.published) + self.assertEqual(datetime.datetime(2013, 8, 18, 9, 2, 58), desc.last_status) + self.assertEqual(3, len(desc.exit_addresses)) + exit = desc.exit_addresses[0] + self.assertEqual("82.252.181.153", exit[0]) + self.assertEqual(datetime.datetime(2013, 8, 18, 8, 3, 1), exit[1]) + + # block content raises value error + extra = "ExtraContent goes here\n" + descriptors = _parse_file(io.BytesIO(desc_text + extra)) + self.assertRaises(ValueError, list, descriptors) + + # malformed fingerprint raises value errors + extra = "ExitNode 030B22437D99B2DB2908B747B6" + self.assertRaises(ValueError, list, _parse_file(io.BytesIO(desc_text + extra))) + + # malformed date raises value errors + extra = """ +ExitNode 030B22437D99B2DB2908B747B6962EAD13AB4038 +Published Today! +LastStatus 2013-08-18 13:02:57 +ExitAddress 46.10.211.205 2013-08-18 13:18:48 +""" + self.assertRaises(ValueError, list, _parse_file(io.BytesIO(desc_text + extra))) + + # skip exit address if malformed date and validate is False + extra = """ +@type tordnsel 1.0 +ExitNode 030B22437D99B2DB2908B747B6962EAD13AB4038 +Published Today! +LastStatus 2013-08-18 13:02:57 +ExitAddress 46.10.211.205 2013-08-18 Never +""" + desc = _parse_file(io.BytesIO(extra), validate=False).next() + self.assertTrue(is_valid_fingerprint(desc.fingerprint)) + self.assertEqual("030B22437D99B2DB2908B747B6962EAD13AB4038", desc.fingerprint) + self.assertEqual(0, len(desc.exit_addresses))