[tor-commits] [stem/master] Parse BandwidthFile body

atagar at torproject.org atagar at torproject.org
Mon Jan 21 01:52:11 UTC 2019


commit a9ad5ba07bc4de3e4368c9d9d814de62703c254d
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Jan 20 16:43:07 2019 -0800

    Parse BandwidthFile body
    
    Huh. That was easy. Our spec is pretty sparse on what our body includes,
    mandating that each line is a series of key=value pairs and includes a
    'node_id' but not much beyond that.
    
    Minimal specificity limits what our parser can provide, but also grants sbws
    flexability and makes my work dead easy. Body content is vended to users as a
    mapping of relay fingerprints to measurement metadata without any additional
    processing (no mandatory fields, type casting, etc).
---
 stem/descriptor/bandwidth_file.py      | 47 ++++++++++++++++++++++++++++++----
 test/unit/descriptor/bandwidth_file.py | 36 ++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/stem/descriptor/bandwidth_file.py b/stem/descriptor/bandwidth_file.py
index 0b2b211a..9c87385c 100644
--- a/stem/descriptor/bandwidth_file.py
+++ b/stem/descriptor/bandwidth_file.py
@@ -19,7 +19,10 @@ import time
 
 import stem.util.str_tools
 
-from stem.descriptor import Descriptor
+from stem.descriptor import (
+  _mappings_for,
+  Descriptor,
+)
 
 HEADER_DIV = '====='
 
@@ -122,10 +125,46 @@ def _parse_timestamp(descriptor, entries):
     raise ValueError("First line should be a unix timestamp, but was '%s'" % first_line)
 
 
+def _parse_body(descriptor, entries):
+  # In version 1.0.0 the body is everything after the first line. Otherwise
+  # it's everything after the header's divider.
+
+  div = '\n' if descriptor.version == '1.0.0' else HEADER_DIV
+
+  if div in str(descriptor):
+    body = str(descriptor).split(div, 1)[1].strip()
+  else:
+    body = ''
+
+  measurements = {}
+
+  if body:
+    for line in body.split('\n'):
+      attr = dict(_mappings_for('measurement', line))
+
+      if 'node_id' not in attr:
+        raise ValueError("Every meaurement must include 'node_id': %s" % line)
+      elif attr['node_id'] in measurements:
+        # Relay is listed multiple times. This is a bug for the bandwidth
+        # authority that made this descriptor, but according to the spec
+        # should be ignored by parsers.
+
+        continue
+
+      fingerprint = attr['node_id'].lstrip('$')  # bwauths prefix fingerprints with '$'
+      measurements[fingerprint] = attr
+
+  descriptor.measurements = measurements
+
+
 class BandwidthFile(Descriptor):
   """
   Tor bandwidth authroity measurements.
 
+  :var dict measurements: **\*** mapping of relay fingerprints to their
+    bandwidth measurement metadata
+
+  :var dict header: **\*** header metadata
   :var datetime timestamp: **\*** time when these metrics were published
   :var str version: **\*** document format version
 
@@ -143,8 +182,6 @@ class BandwidthFile(Descriptor):
   :var int min_count: minimum eligible relays for results to be provided
   :var int min_percent: minimum measured percentage of the consensus
 
-  :var dict header: **\*** header metadata
-
   **\*** attribute is either required when we're parsed with validation or has
   a default value, others are left as **None** if undefined
   """
@@ -154,6 +191,7 @@ class BandwidthFile(Descriptor):
   ATTRIBUTES = {
     'timestamp': (None, _parse_timestamp),
     'header': ({}, _parse_header),
+    'measurements': ({}, _parse_body),
   }
 
   ATTRIBUTES.update(dict([(k, (None, _parse_header)) for k in HEADER_ATTR.keys()]))
@@ -211,8 +249,7 @@ class BandwidthFile(Descriptor):
   def __init__(self, raw_content, validate = False):
     super(BandwidthFile, self).__init__(raw_content, lazy_load = not validate)
 
-    self.content = []  # TODO: implement
-
     if validate:
       _parse_timestamp(self, None)
       _parse_header(self, None)
+      _parse_body(self, None)
diff --git a/test/unit/descriptor/bandwidth_file.py b/test/unit/descriptor/bandwidth_file.py
index e1f8ffb4..43af974b 100644
--- a/test/unit/descriptor/bandwidth_file.py
+++ b/test/unit/descriptor/bandwidth_file.py
@@ -16,6 +16,36 @@ try:
 except ImportError:
   from mock import Mock, patch
 
+EXPECTED_MEASUREMENT_1 = {
+  'scanner': '/scanner.1/scan-data/bws-0.0:0.8-done-2019-01-13-22:55:22',
+  'measured_at': '1547441722',
+  'pid_delta': '1.07534299311',
+  'updated_at': '1547441722',
+  'pid_error_sum': '3.23746667827',
+  'nick': 'baldr',
+  'node_id': '$D8B9CAA5B818DEFE80857F83FDABBB6429DCFCA0',
+  'pid_bw': '47625769',
+  'bw': '47600',
+  'pid_error': '3.23746667827',
+  'circ_fail': '0.0',
+}
+
+EXPECTED_MEASUREMENT_2 = {
+  'desc_bw_obs_last': '473188',
+  'success': '13',
+  'desc_bw_obs_mean': '581671',
+  'bw_median': '202438',
+  'nick': 'Teinetteiine',
+  'bw': '1',
+  'desc_bw_avg': '1024000',
+  'time': '2019-01-13T12:21:29',
+  'bw_mean': '184647',
+  'error_circ': '0',
+  'error_stream': '0',
+  'node_id': '$9C7E1AFDACC53228F6FB57B3A08C7D36240B8F6F',
+  'error_misc': '0',
+}
+
 EXPECTED_NEW_HEADER_CONTENT = """
 1410723598
 version=1.1.0
@@ -49,6 +79,9 @@ class TestBandwidthFile(unittest.TestCase):
     self.assertEqual(None, desc.min_count)
     self.assertEqual(None, desc.min_percent)
 
+    self.assertEqual(94, len(desc.measurements))
+    self.assertEqual(EXPECTED_MEASUREMENT_1, desc.measurements['D8B9CAA5B818DEFE80857F83FDABBB6429DCFCA0'])
+
   def test_format_v1_2(self):
     """
     Parse version 1.2 formatted files.
@@ -73,6 +106,9 @@ class TestBandwidthFile(unittest.TestCase):
     self.assertEqual(3908, desc.min_count)
     self.assertEqual(60, desc.min_percent)
 
+    self.assertEqual(81, len(desc.measurements))
+    self.assertEqual(EXPECTED_MEASUREMENT_2, desc.measurements['9C7E1AFDACC53228F6FB57B3A08C7D36240B8F6F'])
+
   @patch('time.time', Mock(return_value = 1410723598.276578))
   def test_minimal_bandwidth_file(self):
     """





More information about the tor-commits mailing list