commit 3db367b26eb584cce6f7e06a0cf1e98fd5771e71 Author: juga0 juga@riseup.net Date: Wed Mar 13 16:11:26 2019 +0000
new: v3bwfile: Add time to report half network
to the bandwidth file header.
Closes: #28983. --- sbws/lib/v3bwfile.py | 59 ++++++++++++++++++++++++++++++++++++++++- sbws/util/timestamp.py | 4 +++ tests/unit/lib/test_v3bwfile.py | 15 +++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py index 048f1b3..61657b9 100644 --- a/sbws/lib/v3bwfile.py +++ b/sbws/lib/v3bwfile.py @@ -18,7 +18,7 @@ from sbws.globals import (SPEC_VERSION, BW_LINE_SIZE, SBWS_SCALE_CONSTANT, from sbws.lib.resultdump import ResultSuccess, _ResultType from sbws.util.filelock import DirectoryLock from sbws.util.timestamp import (now_isodt_str, unixts_to_isodt_str, - now_unixts) + now_unixts, isostr_to_dt_obj) from sbws.util.state import State
log = logging.getLogger(__name__) @@ -76,6 +76,9 @@ BW_HEADER_KEYVALUES_MONITOR = [ 'recent_measurement_exclusion_not_distanciated_count', 'recent_measurement_exclusion_not_recent_count', 'recent_measurement_exclusion_not_min_num_count', + + # The time it took to report about half of the network. + 'time_to_report_half_network', ] BANDWIDTH_HEADER_KEY_VALUES_INIT = \ ['earliest_bandwidth', 'generator_started', @@ -432,6 +435,59 @@ class V3BWHeader(object): [setattr(self, k, str(v)) for k, v in kwargs.items() if k in STATS_KEYVALUES]
+ def add_time_report_half_network(self): + """Add to the header the time it took to measure half of the network. + + It is not the time the scanner actually takes on measuring all the + network, but the ``number_eligible_relays`` that are reported in the + bandwidth file and directory authorities will vote on. + + This is calculated for half of the network, so that failed or not + reported relays do not affect too much. + + For instance, if there are 6500 relays in the network, half of the + network would be 3250. And if there were 4000 eligible relays + measured in an interval of 3 days, the time to measure half of the + network would be 3 days * 3250 / 4000. + + Since the elapsed time is calculated from the earliest and the + latest measurement and a relay might have more than 2 measurements, + this would give an estimate on how long it would take to measure + the network including all the valid measurements. + + Log also an estimated on how long it would take with the current + number of relays included in the bandwidth file. + """ + # NOTE: in future refactor do not convert attributes to str until + # writing to the file, so that they do not need to be converted back + # to do some calculations. + elapsed_time = ( + (isostr_to_dt_obj(self.latest_bandwidth) + - isostr_to_dt_obj(self.earliest_bandwidth)) + .total_seconds()) + + # This attributes were added later and some tests that + # do not initialize them would fail. + eligible_relays = int(getattr(self, 'number_eligible_relays', 0)) + consensus_relays = int(getattr(self, 'number_consensus_relays', 0)) + if not(eligible_relays and consensus_relays): + return + + half_network = consensus_relays / 2 + # Calculate the time it would take to measure half of the network + if eligible_relays >= half_network: + time_half_network = round( + elapsed_time * half_network / eligible_relays + ) + self.time_to_report_half_network = str(time_half_network) + + # In any case log an estimated on the time to measure all the network. + estimated_time = round( + elapsed_time * consensus_relays / eligible_relays + ) + log.info("Estimated time to measure the network: %s hours.", + round(estimated_time / 60 / 60)) +
class V3BWLine(object): """ @@ -773,6 +829,7 @@ class V3BWFile(object): # log.debug(bw_lines[-1]) # Not using the result for now, just warning cls.is_max_bw_diff_perc_reached(bw_lines, max_bw_diff_perc) + header.add_time_report_half_network() f = cls(header, bw_lines) return f
diff --git a/sbws/util/timestamp.py b/sbws/util/timestamp.py index 00f3d1c..ff3efd8 100644 --- a/sbws/util/timestamp.py +++ b/sbws/util/timestamp.py @@ -14,6 +14,10 @@ def dt_obj_to_isodt_str(dt): return dt.replace(microsecond=0).isoformat()
+def isostr_to_dt_obj(isostr): + return datetime.strptime(isostr, "%Y-%m-%dT%H:%M:%S") + + def unixts_to_dt_obj(unixts): """ Convert unix timestamp to naive datetime object in UTC time zone. diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py index c4c6a20..a7c0946 100644 --- a/tests/unit/lib/test_v3bwfile.py +++ b/tests/unit/lib/test_v3bwfile.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """Test generation of bandwidth measurements document (v3bw)""" import json +import logging import math import os.path
@@ -393,3 +394,17 @@ def test_update_progress(datadir, tmpdir): assert header.number_consensus_relays == '3' assert header.number_eligible_relays == '3' assert header.percent_eligible_relays == '100' + + +def test_time_measure_half_network(caplog): + header = V3BWHeader(timestamp_l, + file_created=file_created, + generator_started=generator_started, + earliest_bandwidth=earliest_bandwidth) + header.number_consensus_relays = '6500' + header.number_eligible_relays = '4000' + caplog.set_level(logging.INFO) + header.add_time_report_half_network() + assert header.time_to_report_half_network == '70200' # 19.5h + expected_log = "Estimated time to measure the network: 39 hours." # 19.5*2 + assert caplog.records[-1].getMessage() == expected_log
tor-commits@lists.torproject.org