[tor-commits] [stem/master] Reorganizing extrainfo fields by value type

atagar at torproject.org atagar at torproject.org
Mon May 14 00:14:27 UTC 2012


commit b9cd295c09afe662247cc6a89442b40860b9157b
Author: Damian Johnson <atagar at torproject.org>
Date:   Sat May 12 15:07:02 2012 -0700

    Reorganizing extrainfo fields by value type
    
    Grouping both parsing and unit tests by the value format to make it more
    maintainable and easier to add the dozens of other attributes. Also using a
    'dir_' prefix for all directory mirroring attributes rather than 'dirreq_' (I
    was previously being inconsistent on this).
    
    This expands the testing to cover more attributes, along with adding support
    for the "dirreq-v2-ips" and "dirreq-v3-ips" lines. Those lines revealed some
    misses in how I was parsing the maxmind locales. In particular I was missing
    '??' and numeric locales like 'a1' which it uses for an 'Anonymous Proxy'.
---
 stem/descriptor/extrainfo_descriptor.py      |  128 +++++++++++++------------
 test/unit/descriptor/extrainfo_descriptor.py |  130 ++++++++++++++++++--------
 2 files changed, 157 insertions(+), 101 deletions(-)

diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 93b438c..ce39ee3 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -155,8 +155,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
       write_history_values (list)  - bytes written during each interval
     
     Directory Mirror Attributes:
-      dirreq_stats_end (datetime) - end of the period when stats were gathered
-      dirreq_stats_interval (int) - length in seconds of the interval
+      dir_stats_end (datetime) - end of the period when stats were gathered
+      dir_stats_interval (int) - length in seconds of the interval
+      dir_v2_ips (dict) - mapping of locales to rounded count of requester ips
+      dir_v3_ips (dict) - mapping of locales to rounded count of requester ips
       
       Bytes read/written for directory mirroring
         dir_read_history_end (datetime) - end of the sampling interval
@@ -213,8 +215,10 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
     self.write_history_interval = None
     self.write_history_values = None
     
-    self.dirreq_stats_end = None
-    self.dirreq_stats_interval = None
+    self.dir_stats_end = None
+    self.dir_stats_interval = None
+    self.dir_v2_ips = None
+    self.dir_v3_ips = None
     
     self.dir_read_history_end = None
     self.dir_read_history_interval = None
@@ -290,14 +294,6 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
         
         self.nickname = extra_info_comp[0]
         self.fingerprint = extra_info_comp[1]
-      elif keyword == "published":
-        # "published" YYYY-MM-DD HH:MM:SS
-        
-        try:
-          self.published = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
-        except ValueError:
-          if validate:
-            raise ValueError("Published line's time wasn't parseable: %s" % line)
       elif keyword == "geoip-db-digest":
         # "geoip-db-digest" Digest
         
@@ -305,65 +301,44 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
           raise ValueError("Geoip digest line had an invalid sha1 digest: %s" % line)
         
         self.geoip_db_digest = value
-      elif keyword == "geoip-start-time":
-        # "geoip-start-time" YYYY-MM-DD HH:MM:SS
+      elif keyword in ("published", "geoip-start-time"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS
         
         try:
-          self.geoip_start_time = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+          timestamp = datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
+          
+          if keyword == "published":
+            self.published = timestamp
+          elif keyword == "geoip-start-time":
+            self.geoip_start_time = timestamp
         except ValueError:
           if validate:
-            raise ValueError("Geoip start time line's time wasn't parseable: %s" % line)
-      elif keyword == "bridge-stats-end":
-        # "bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s)
+            raise ValueError("Timestamp on %s line wasn't parseable: %s" % (keyword, line))
+      elif keyword in ("bridge-stats-end", "dirreq-stats-end"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
         
         try:
           timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
-          self.bridge_stats_end = timestamp
-          self.bridge_stats_interval = interval
-        except ValueError, exc:
-          if validate: raise exc
-      elif keyword in ("geoip-client-origins", "bridge-ips"):
-        # "geoip-client-origins" CC=N,CC=N,...
-        
-        locale_usage = {}
-        error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
-        
-        for entry in value.split(","):
-          if not "=" in entry:
-            if validate: raise ValueError(error_msg)
-            else: continue
           
-          locale, count = entry.split("=", 1)
-          
-          if re.match("^[a-zA-Z]{2}$", locale) and count.isdigit():
-            locale_usage[locale] = int(count)
-          elif validate:
-            raise ValueError(error_msg)
-        
-        if keyword == "geoip-client-origins":
-          self.geoip_client_origins = locale_usage
-        elif keyword == "bridge-ips":
-          self.bridge_ips = locale_usage
-      elif keyword == "dirreq-stats-end":
-        # "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s)
-        
-        try:
-          timestamp, interval, _ = _parse_timestamp_and_interval(keyword, value)
-          self.dirreq_stats_end = timestamp
-          self.dirreq_stats_interval = interval
+          if keyword == "bridge-stats-end":
+            self.bridge_stats_end = timestamp
+            self.bridge_stats_interval = interval
+          elif keyword == "dirreq-stats-end":
+            self.dir_stats_end = timestamp
+            self.dir_stats_interval = interval
         except ValueError, exc:
           if validate: raise exc
       elif keyword in ("read-history", "write-history", "dirreq-read-history", "dirreq-write-history"):
+        # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
         try:
           timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
+          history_values = []
           
-          try:
-            if remainder:
+          if remainder:
+            try:
               history_values = [int(entry) for entry in remainder.split(",")]
-            else:
-              history_values = []
-          except ValueError:
-            raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
+            except ValueError:
+              raise ValueError("%s line has non-numeric values: %s" % (keyword, line))
           
           if keyword == "read-history":
             self.read_history_end = timestamp
@@ -381,12 +356,45 @@ class ExtraInfoDescriptor(stem.descriptor.Descriptor):
             self.dir_write_history_end = timestamp
             self.dir_write_history_interval = interval
             self.dir_write_history_values = history_values
-          else:
-            # not gonna happen unless we change the main loop's conditional
-            # without fixing this one
-            raise ValueError("BUG: unrecognized keyword '%s'" % keyword)
         except ValueError, exc:
           if validate: raise exc
+      elif keyword in ("dirreq-v2-ips", "dirreq-v3-ips", "geoip-client-origins", "bridge-ips"):
+        # "<keyword>" CC=N,CC=N,...
+        
+        locale_usage = {}
+        error_msg = "Entries in %s line should only be CC=N entries: %s" % (keyword, line)
+        
+        if value:
+          for entry in value.split(","):
+            if not "=" in entry:
+              if validate: raise ValueError(error_msg)
+              else: continue
+            
+            # The maxmind geoip has numeric locale codes for some special
+            # values, for instance...
+            #
+            #   A1,"Anonymous Proxy"
+            #   A2,"Satellite Provider"
+            #   ??,"Unknown"
+            #
+            # https://www.maxmind.com/app/iso3166
+            
+            
+            locale, count = entry.split("=", 1)
+            
+            if re.match("^[a-zA-Z0-9\?]{2}$", locale) and count.isdigit():
+              locale_usage[locale] = int(count)
+            elif validate:
+              raise ValueError(error_msg)
+        
+        if keyword == "dirreq-v2-ips":
+          self.dir_v2_ips = locale_usage
+        elif keyword == "dirreq-v3-ips":
+          self.dir_v3_ips = locale_usage
+        elif keyword == "geoip-client-origins":
+          self.geoip_client_origins = locale_usage
+        elif keyword == "bridge-ips":
+          self.bridge_ips = locale_usage
       elif keyword == "router-signature":
         if validate and not block_contents:
           raise ValueError("Router signature line must be followed by a signature block: %s" % line)
diff --git a/test/unit/descriptor/extrainfo_descriptor.py b/test/unit/descriptor/extrainfo_descriptor.py
index b4c5fbe..21f49a4 100644
--- a/test/unit/descriptor/extrainfo_descriptor.py
+++ b/test/unit/descriptor/extrainfo_descriptor.py
@@ -96,14 +96,14 @@ class TestExtraInfoDescriptor(unittest.TestCase):
     None.
     """
     
-    test_entry = (
+    test_entries = (
       "ninja",
       "ninja ",
       "B2289C3EAB83ECD6EB916A2F481A02E6B76A0A48",
       " B2289C3EAB83ECD6EB916A2F481A02E6B76A0A48",
     )
     
-    for entry in test_entry:
+    for entry in test_entries:
       desc_text = _make_descriptor({"extra-info": entry})
       desc = self._expect_invalid_attr(desc_text, "nickname")
       self.assertEquals(None, desc.nickname)
@@ -119,7 +119,7 @@ class TestExtraInfoDescriptor(unittest.TestCase):
     desc = ExtraInfoDescriptor(desc_text)
     self.assertEquals(geoip_db_digest, desc.geoip_db_digest)
     
-    test_entry = (
+    test_entries = (
       "",
       "916A3CA8B7DF61473D5AE5B21711F35F301CE9E",
       "916A3CA8B7DF61473D5AE5B21711F35F301CE9E88",
@@ -127,38 +127,42 @@ class TestExtraInfoDescriptor(unittest.TestCase):
       "916A3CA8B7DF61473D5AE5B21711F35F301CE9E-",
     )
     
-    for entry in test_entry:
+    for entry in test_entries:
       desc_text = _make_descriptor({"geoip-db-digest": entry})
       desc = self._expect_invalid_attr(desc_text, "geoip_db_digest", entry)
   
-  def test_geoip_start_time(self):
+  def test_timestamp_lines(self):
     """
-    Parses the geoip-start-time line with valid and invalid data.
+    Uses valid and invalid data to tests lines of the form...
+    "<keyword>" YYYY-MM-DD HH:MM:SS
     """
     
-    desc_text = _make_descriptor({"geoip-start-time": "2012-05-03 12:07:50"})
-    desc = ExtraInfoDescriptor(desc_text)
-    self.assertEquals(datetime.datetime(2012, 5, 3, 12, 7, 50), desc.geoip_start_time)
-    
-    test_entry = (
-      "",
-      "2012-05-03 12:07:60",
-      "2012-05-03 ",
-      "2012-05-03",
-    )
-    
-    for entry in test_entry:
-      desc_text = _make_descriptor({"geoip-start-time": entry})
-      desc = self._expect_invalid_attr(desc_text, "geoip_start_time")
+    for keyword in ('published', 'geoip-start-time'):
+      attr = keyword.replace('-', '_')
+      
+      desc_text = _make_descriptor({keyword: "2012-05-03 12:07:50"})
+      desc = ExtraInfoDescriptor(desc_text)
+      self.assertEquals(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, attr))
+      
+      test_entries = (
+        "",
+        "2012-05-03 12:07:60",
+        "2012-05-03 ",
+        "2012-05-03",
+      )
+      
+      for entry in test_entries:
+        desc_text = _make_descriptor({keyword: entry})
+        self._expect_invalid_attr(desc_text, attr)
   
-  def test_stats_end(self):
+  def test_timestamp_and_interval_lines(self):
     """
-    Parses the bridge-stats-end and dirreq-stats-end lines with valid and
-    invalid data.
+    Uses valid and invalid data to tests lines of the form...
+    "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
     """
     
     for keyword in ('bridge-stats-end', 'dirreq-stats-end'):
-      end_attr = keyword.replace('-', '_')
+      end_attr = keyword.replace('-', '_').replace('dirreq', 'dir')
       interval_attr = end_attr[:-4] + "_interval"
       
       desc_text = _make_descriptor({keyword: "2012-05-03 12:07:50 (500 s)"})
@@ -166,48 +170,92 @@ class TestExtraInfoDescriptor(unittest.TestCase):
       self.assertEquals(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr))
       self.assertEquals(500, getattr(desc, interval_attr))
       
-      test_entry = (
+      test_entries = (
         "",
+        "2012-05-03 ",
+        "2012-05-03",
         "2012-05-03 12:07:60 (500 s)",
         "2012-05-03 12:07:50 (500s)",
         "2012-05-03 12:07:50 (500 s",
         "2012-05-03 12:07:50 (500 )",
+      )
+      
+      for entry in test_entries:
+        desc_text = _make_descriptor({keyword: entry})
+        desc = self._expect_invalid_attr(desc_text)
+        self.assertEquals(None, getattr(desc, end_attr))
+        self.assertEquals(None, getattr(desc, interval_attr))
+  
+  def test_timestamp_interval_and_value_lines(self):
+    """
+    Uses valid and invalid data to tests lines of the form...
+    "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
+    """
+    
+    for keyword in ('read-history', 'write-history', 'dirreq-read-history', 'dirreq-write-history'):
+      base_attr = keyword.replace('-', '_').replace('dirreq', 'dir')
+      end_attr = base_attr + "_end"
+      interval_attr = base_attr + "_interval"
+      values_attr = base_attr + "_values"
+      
+      test_entries = (
+        ("", []),
+        (" ", []),
+        (" 50,11,5", [50, 11, 5]),
+      )
+      
+      for test_values, expected_values in test_entries:
+        desc_text = _make_descriptor({keyword: "2012-05-03 12:07:50 (500 s)%s" % test_values})
+        desc = ExtraInfoDescriptor(desc_text)
+        self.assertEquals(datetime.datetime(2012, 5, 3, 12, 7, 50), getattr(desc, end_attr))
+        self.assertEquals(500, getattr(desc, interval_attr))
+        self.assertEquals(expected_values, getattr(desc, values_attr))
+      
+      test_entries = (
+        "",
         "2012-05-03 ",
         "2012-05-03",
+        "2012-05-03 12:07:60 (500 s)",
+        "2012-05-03 12:07:50 (500s)",
+        "2012-05-03 12:07:50 (500 s",
+        "2012-05-03 12:07:50 (500 )",
+        "2012-05-03 12:07:50 (500 s)11",
       )
       
-      for entry in test_entry:
+      for entry in test_entries:
         desc_text = _make_descriptor({keyword: entry})
         desc = self._expect_invalid_attr(desc_text)
         self.assertEquals(None, getattr(desc, end_attr))
         self.assertEquals(None, getattr(desc, interval_attr))
+        self.assertEquals(None, getattr(desc, values_attr))
   
-  def test_bridge_ips(self):
+  def test_locale_mapping_lines(self):
     """
-    Parses both the bridge-ips and geoip-client-origins lines with valid and
-    invalid data.
+    Uses valid and invalid data to tests lines of the form...
+    "<keyword>" CC=N,CC=N,...
     """
     
-    # Testing both attributes since they contain the exact same data,
-    # geoip-client-origins was simply replaced by bridge-ips while adding an
-    # interval value for the period.
-    
-    for keyword in ('bridge-ips', 'geoip-client-origins'):
-      attr = keyword.replace('-', '_')
+    for keyword in ('dirreq-v2-ips', 'dirreq-v3-ips', 'geoip-client-origins', 'bridge-ips'):
+      attr = keyword.replace('-', '_').replace('dirreq', 'dir')
       
-      desc_text = _make_descriptor({keyword: "uk=5,de=3,jp=2"})
-      desc = ExtraInfoDescriptor(desc_text)
-      self.assertEquals({'uk': 5, 'de': 3, 'jp': 2}, getattr(desc, attr))
+      test_entries = (
+        ("", {}),
+        ("uk=5,de=3,jp=2", {'uk': 5, 'de': 3, 'jp': 2}),
+      )
       
-      test_entry = (
-        "",
+      for test_value, expected_value in test_entries:
+        desc_text = _make_descriptor({keyword: test_value})
+        desc = ExtraInfoDescriptor(desc_text)
+        self.assertEquals(expected_value, getattr(desc, attr))
+      
+      test_entries = (
         "uk=-4",
         "uki=4",
         "uk:4",
         "uk=4.de=3",
       )
       
-      for entry in test_entry:
+      for entry in test_entries:
         desc_text = _make_descriptor({keyword: entry})
         desc = self._expect_invalid_attr(desc_text, attr, {})
   





More information about the tor-commits mailing list