[tor-commits] [sbws/master] new: v3bwfile: Add time to report half network

juga at torproject.org juga at torproject.org
Thu Mar 21 18:30:42 UTC 2019


commit 3db367b26eb584cce6f7e06a0cf1e98fd5771e71
Author: juga0 <juga at riseup.net>
Date:   Wed Mar 13 16:11:26 2019 +0000

    new: v3bwfile: Add time to report half network
    
    to the bandwidth file header.
    
    Closes: #28983.
---
 sbws/lib/v3bwfile.py            | 59 ++++++++++++++++++++++++++++++++++++++++-
 sbws/util/timestamp.py          |  4 +++
 tests/unit/lib/test_v3bwfile.py | 15 +++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index 048f1b3..61657b9 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -18,7 +18,7 @@ from sbws.globals import (SPEC_VERSION, BW_LINE_SIZE, SBWS_SCALE_CONSTANT,
 from sbws.lib.resultdump import ResultSuccess, _ResultType
 from sbws.util.filelock import DirectoryLock
 from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str,
-                                 now_unixts)
+                                 now_unixts, isostr_to_dt_obj)
 from sbws.util.state import State
 
 log = logging.getLogger(__name__)
@@ -76,6 +76,9 @@ BW_HEADER_KEYVALUES_MONITOR = [
     'recent_measurement_exclusion_not_distanciated_count',
     'recent_measurement_exclusion_not_recent_count',
     'recent_measurement_exclusion_not_min_num_count',
+
+    # The time it took to report about half of the network.
+    'time_to_report_half_network',
 ]
 BANDWIDTH_HEADER_KEY_VALUES_INIT = \
     ['earliest_bandwidth', 'generator_started',
@@ -432,6 +435,59 @@ class V3BWHeader(object):
         [setattr(self, k, str(v)) for k, v in kwargs.items()
          if k in STATS_KEYVALUES]
 
+    def add_time_report_half_network(self):
+        """Add to the header the time it took to measure half of the network.
+
+        It is not the time the scanner actually takes on measuring all the
+        network, but the ``number_eligible_relays`` that are reported in the
+        bandwidth file and directory authorities will vote on.
+
+        This is calculated for half of the network, so that failed or not
+        reported relays do not affect too much.
+
+        For instance, if there are 6500 relays in the network, half of the
+        network would be 3250. And if there were 4000 eligible relays
+        measured in an interval of 3 days, the time to measure half of the
+        network would be 3 days * 3250 / 4000.
+
+        Since the elapsed time is calculated from the earliest and the
+        latest measurement and a relay might have more than 2 measurements,
+        this would give an estimate on how long it would take to measure
+        the network including all the valid measurements.
+
+        Log also an estimated on how long it would take with the current
+        number of relays included in the bandwidth file.
+        """
+        # NOTE: in future refactor do not convert attributes to str until
+        # writing to the file, so that they do not need to be converted back
+        # to do some calculations.
+        elapsed_time = (
+            (isostr_to_dt_obj(self.latest_bandwidth)
+             - isostr_to_dt_obj(self.earliest_bandwidth))
+            .total_seconds())
+
+        # This attributes were added later and some tests that
+        # do not initialize them would fail.
+        eligible_relays = int(getattr(self, 'number_eligible_relays', 0))
+        consensus_relays = int(getattr(self, 'number_consensus_relays', 0))
+        if not(eligible_relays and consensus_relays):
+            return
+
+        half_network = consensus_relays / 2
+        # Calculate the time it would take to measure half of the network
+        if eligible_relays >= half_network:
+            time_half_network = round(
+                elapsed_time * half_network / eligible_relays
+            )
+            self.time_to_report_half_network = str(time_half_network)
+
+        # In any case log an estimated on the time to measure all the network.
+        estimated_time = round(
+            elapsed_time * consensus_relays / eligible_relays
+        )
+        log.info("Estimated time to measure the network: %s hours.",
+                 round(estimated_time / 60 / 60))
+
 
 class V3BWLine(object):
     """
@@ -773,6 +829,7 @@ class V3BWFile(object):
             # log.debug(bw_lines[-1])
         # Not using the result for now, just warning
         cls.is_max_bw_diff_perc_reached(bw_lines, max_bw_diff_perc)
+        header.add_time_report_half_network()
         f = cls(header, bw_lines)
         return f
 
diff --git a/sbws/util/timestamp.py b/sbws/util/timestamp.py
index 00f3d1c..ff3efd8 100644
--- a/sbws/util/timestamp.py
+++ b/sbws/util/timestamp.py
@@ -14,6 +14,10 @@ def dt_obj_to_isodt_str(dt):
     return dt.replace(microsecond=0).isoformat()
 
 
+def isostr_to_dt_obj(isostr):
+    return datetime.strptime(isostr, "%Y-%m-%dT%H:%M:%S")
+
+
 def unixts_to_dt_obj(unixts):
     """
     Convert unix timestamp to naive datetime object in UTC time zone.
diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py
index c4c6a20..a7c0946 100644
--- a/tests/unit/lib/test_v3bwfile.py
+++ b/tests/unit/lib/test_v3bwfile.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Test generation of bandwidth measurements document (v3bw)"""
 import json
+import logging
 import math
 import os.path
 
@@ -393,3 +394,17 @@ def test_update_progress(datadir, tmpdir):
     assert header.number_consensus_relays == '3'
     assert header.number_eligible_relays == '3'
     assert header.percent_eligible_relays == '100'
+
+
+def test_time_measure_half_network(caplog):
+    header = V3BWHeader(timestamp_l,
+                        file_created=file_created,
+                        generator_started=generator_started,
+                        earliest_bandwidth=earliest_bandwidth)
+    header.number_consensus_relays = '6500'
+    header.number_eligible_relays = '4000'
+    caplog.set_level(logging.INFO)
+    header.add_time_report_half_network()
+    assert header.time_to_report_half_network == '70200'  # 19.5h
+    expected_log = "Estimated time to measure the network: 39 hours."  # 19.5*2
+    assert caplog.records[-1].getMessage() == expected_log





More information about the tor-commits mailing list