[stem/master] Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines

commit bbf0e80cf889cc4b0738acf93c698fc54720c767 Author: Damian Johnson <atagar@torproject.org> Date: Sun May 13 13:35:33 2012 -0700 Parsing dirreq-v*-direct-dl and dirreq-v*-tunneled-dl lines Parsing four extrainfo descriptor fields related to directory mirroring stat collection. These are similar to dirreq-v*-resp in that they're key=value mappings, with largely known key sets. --- stem/descriptor/extrainfo_descriptor.py | 69 ++++++++++++++++++++++--- test/integ/descriptor/extrainfo_descriptor.py | 6 ++ test/unit/descriptor/extrainfo_descriptor.py | 45 ++++++++++++++++- 3 files changed, 111 insertions(+), 9 deletions(-) diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index d1967a9..9493f2b 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -24,6 +24,16 @@ DirResponses - known statuses for ExtraInfoDescriptor's dir_*_responses |- NOT_MODIFIED - network status unmodified since If-Modified-Since time +- BUSY - directory was busy +DirStats - known stats for ExtraInfoDescriptor's dir_*_direct_dl and dir_*_tunneled_dl + |- COMPLETE - requests that completed successfully + |- TIMEOUT - requests that didn't complete within a ten minute timeout + |- RUNNING - requests still in procress when measurement's taken + |- MIN - smallest rate at which a descriptor was downloaded in B/s + |- MAX - largest rate at which a descriptor was downloaded in B/s + |- D1-4 and D6-9 - rate of the slowest x/10 download rates in B/s + |- Q1 and Q3 - rate of the slowest and fastest querter download rates in B/s + +- MD - median download rate in B/s + parse_file - Iterates over the extra-info descriptors in a file. ExtraInfoDescriptor - Tor extra-info descriptor. +- get_unrecognized_lines - lines with unrecognized content @@ -45,6 +55,12 @@ DirResponses = stem.util.enum.Enum( ("BUSY", "busy"), ) +# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl... +dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md'] +dir_stats += ['d%i' % i for i in range(1, 5)] +dir_stats += ['d%i' % i for i in range(6, 10)] +DirStats = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats]) + # relay descriptors must have exactly one of the following REQUIRED_FIELDS = ( "extra-info", @@ -178,14 +194,25 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): dir_stats_interval (int) - length in seconds of the interval dir_v2_ips (dict) - mapping of locales to rounded count of requester ips dir_v3_ips (dict) - mapping of locales to rounded count of requester ips + dir_v2_share (float) - percent of total directory traffic it expects to serve + dir_v3_share (float) - percent of total directory traffic it expects to serve dir_v2_requests (dict) - mapping of locales to rounded count of requests dir_v3_requests (dict) - mapping of locales to rounded count of requests + dir_v2_responses (dict) - mapping of DirResponses to their rounded count dir_v3_responses (dict) - mapping of DirResponses to their rounded count dir_v2_responses_unknown (dict) - mapping of unrecognized statuses to their count dir_v3_responses_unknown (dict) - mapping of unrecognized statuses to their count - dir_v2_share (float) - percent of total directory traffic it expects to serve - dir_v3_share (float) - percent of total directory traffic it expects to serve + + dir_v2_direct_dl (dict) - mapping of DirStats to measurement over DirPort + dir_v3_direct_dl (dict) - mapping of DirStats to measurement over DirPort + dir_v2_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement + dir_v3_direct_dl_unknown (dict) - mapping of unrecognized stats to their measurement + + dir_v2_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort + dir_v3_tunneled_dl (dict) - mapping of DirStats to measurement over ORPort + dir_v2_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement + dir_v3_tunneled_dl_unknown (dict) - mapping of unrecognized stats to their measurement Bytes read/written for directory mirroring dir_read_history_end (datetime) - end of the sampling interval @@ -246,14 +273,22 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): self.dir_stats_interval = None self.dir_v2_ips = None self.dir_v3_ips = None + self.dir_v2_share = None + self.dir_v3_share = None self.dir_v2_requests = None self.dir_v3_requests = None self.dir_v2_responses = None self.dir_v3_responses = None self.dir_v2_responses_unknown = None self.dir_v3_responses_unknown = None - self.dir_v2_share = None - self.dir_v3_share = None + self.dir_v2_direct_dl = None + self.dir_v3_direct_dl = None + self.dir_v2_direct_dl_unknown = None + self.dir_v3_direct_dl_unknown = None + self.dir_v2_tunneled_dl = None + self.dir_v3_tunneled_dl = None + self.dir_v2_tunneled_dl_unknown = None + self.dir_v3_tunneled_dl_unknown = None self.dir_read_history_end = None self.dir_read_history_interval = None @@ -336,10 +371,16 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line) self.geoip_db_digest = value - elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp"): + elif keyword in ("dirreq-v2-resp", "dirreq-v3-resp", "dirreq-v2-direct-dl", "dirreq-v3-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v3-tunneled-dl"): recognized_counts = {} unrecognized_counts = {} - error_msg = "%s lines should contain STATUS=COUNT mappings: %s" % (keyword, line) + + is_response_stats = keyword in ("dirreq-v2-resp", "dirreq-v3-resp") + key_set = DirResponses if is_response_stats else DirStats + + key_type = "STATUS" if is_response_stats else "STAT" + error_msg = "%s lines should contain %s=COUNT mappings: %s" % (keyword, key_type, line) + if value: for entry in value.split(","): @@ -350,7 +391,7 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): status, count = entry.split("=", 1) if count.isdigit(): - if status in DirResponses: + if status in key_set: recognized_counts[status] = int(count) else: unrecognized_counts[status] = int(count) @@ -360,9 +401,21 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor): if keyword == "dirreq-v2-resp": self.dir_v2_responses = recognized_counts self.dir_v2_responses_unknown = unrecognized_counts - else: + elif keyword == "dirreq-v3-resp": self.dir_v3_responses = recognized_counts self.dir_v3_responses_unknown = unrecognized_counts + elif keyword == "dirreq-v2-direct-dl": + self.dir_v2_direct_dl = recognized_counts + self.dir_v2_direct_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v3-direct-dl": + self.dir_v3_direct_dl = recognized_counts + self.dir_v3_direct_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v2-tunneled-dl": + self.dir_v2_tunneled_dl = recognized_counts + self.dir_v2_tunneled_dl_unknown = unrecognized_counts + elif keyword == "dirreq-v3-tunneled-dl": + self.dir_v3_tunneled_dl = recognized_counts + self.dir_v3_tunneled_dl_unknown = unrecognized_counts elif keyword in ("dirreq-v2-share", "dirreq-v3-share"): # "<keyword>" num% diff --git a/test/integ/descriptor/extrainfo_descriptor.py b/test/integ/descriptor/extrainfo_descriptor.py index 841aa54..99edfea 100644 --- a/test/integ/descriptor/extrainfo_descriptor.py +++ b/test/integ/descriptor/extrainfo_descriptor.py @@ -87,6 +87,12 @@ k0d2aofcVbHr4fPQOSST0LXDrhFl5Fqo5um296zpJGvRUeO6S44U/EfJAGShtqWw self.fail("Unrecognized statuses on dirreq-v2-resp lines: %s" % desc.dir_v2_responses_unknown) elif desc.dir_v3_responses_unknown: self.fail("Unrecognized statuses on dirreq-v3-resp lines: %s" % desc.dir_v3_responses_unknown) + elif desc.dir_v2_direct_dl_unknown: + self.fail("Unrecognized stats on dirreq-v2-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown) + elif desc.dir_v3_direct_dl_unknown: + self.fail("Unrecognized stats on dirreq-v3-direct-dl lines: %s" % desc.dir_v2_direct_dl_unknown) + elif desc.dir_v2_tunneled_dl_unknown: + self.fail("Unrecognized stats on dirreq-v2-tunneled-dl lines: %s" % desc.dir_v2_tunneled_dl_unknown) elif unrecognized_lines: # TODO: This isn't actually a problem, and rather than failing we # should alert the user about these entries at the end of the tests diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py index b167ad4..e855a1d 100644 --- a/test/unit/descriptor/extrainfo_descriptor.py +++ b/test/unit/descriptor/extrainfo_descriptor.py @@ -4,7 +4,7 @@ Unit tests for stem.descriptor.extrainfo_descriptor. import datetime import unittest -from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses +from stem.descriptor.extrainfo_descriptor import ExtraInfoDescriptor, DirResponses, DirStats CRYPTO_BLOB = """ K5FSywk7qvw/boA4DQcqkls6Ize5vcBYfhQ8JnOeRQC9+uDxbnpm3qaYN9jZ8myj @@ -162,6 +162,49 @@ class TestExtraInfoDescriptor(unittest.TestCase): self.assertEqual({}, getattr(desc, attr)) self.assertEqual({}, getattr(desc, unknown_attr)) + def test_dir_stat_lines(self): + """ + Parses the dirreq-v2-direct-dl, dirreq-v3-direct-dl, dirreq-v2-tunneled-dl, + and dirreq-v3-tunneled-dl lines with valid and invalid data. + """ + + for keyword in ("dirreq-v2-direct-dl", "dirreq-v2-direct-dl", "dirreq-v2-tunneled-dl", "dirreq-v2-tunneled-dl"): + attr = keyword.replace('-', '_').replace('dirreq', 'dir') + unknown_attr = attr + "_unknown" + + test_value = "complete=2712,timeout=32,running=4,min=741,d1=14507,d2=22702,q1=28881,d3=38277,d4=73729,md=111455,d6=168231,d7=257218,q3=319833,d8=390507,d9=616301,something-new=11,max=29917857" + desc_text = _make_descriptor({keyword: test_value}) + desc = ExtraInfoDescriptor(desc_text) + self.assertEquals(2712, getattr(desc, attr)[DirStats.COMPLETE]) + self.assertEquals(32, getattr(desc, attr)[DirStats.TIMEOUT]) + self.assertEquals(4, getattr(desc, attr)[DirStats.RUNNING]) + self.assertEquals(741, getattr(desc, attr)[DirStats.MIN]) + self.assertEquals(14507, getattr(desc, attr)[DirStats.D1]) + self.assertEquals(22702, getattr(desc, attr)[DirStats.D2]) + self.assertEquals(28881, getattr(desc, attr)[DirStats.Q1]) + self.assertEquals(38277, getattr(desc, attr)[DirStats.D3]) + self.assertEquals(73729, getattr(desc, attr)[DirStats.D4]) + self.assertEquals(111455, getattr(desc, attr)[DirStats.MD]) + self.assertEquals(168231, getattr(desc, attr)[DirStats.D6]) + self.assertEquals(257218, getattr(desc, attr)[DirStats.D7]) + self.assertEquals(319833, getattr(desc, attr)[DirStats.Q3]) + self.assertEquals(390507, getattr(desc, attr)[DirStats.D8]) + self.assertEquals(616301, getattr(desc, attr)[DirStats.D9]) + self.assertEquals(29917857, getattr(desc, attr)[DirStats.MAX]) + self.assertEquals(11, getattr(desc, unknown_attr)["something-new"]) + + test_entries = ( + "complete=-4", + "complete:4", + "complete=4.timeout=3", + ) + + for entry in test_entries: + desc_text = _make_descriptor({keyword: entry}) + desc = self._expect_invalid_attr(desc_text) + self.assertEqual({}, getattr(desc, attr)) + self.assertEqual({}, getattr(desc, unknown_attr)) + def test_percentage_lines(self): """ Uses valid and invalid data to tests lines of the form...
participants (1)
-
atagar@torproject.org