[tor-commits] [sbws/maint-1.1] chg: relaylist: Count measurements with timestamps

juga at torproject.org juga at torproject.org
Tue Apr 14 13:53:19 UTC 2020


commit 89801c5331b80776cced97d9da3a2990516279b3
Author: juga0 <juga at riseup.net>
Date:   Sat Mar 21 14:01:15 2020 +0000

    chg: relaylist: Count measurements with timestamps
    
    in RelayList:
    - Rename recent_measurement_attempt_count to recent_measurement_attempt when
      there is no counting
    - Use the timestamps class to manage/count measurement attempts
---
 sbws/core/scanner.py             |  2 +-
 sbws/globals.py                  | 16 +++++++++++++++
 sbws/lib/relaylist.py            | 20 ++++++++++---------
 sbws/lib/v3bwfile.py             |  2 +-
 tests/unit/lib/test_relaylist.py | 42 ++++++++++++++++++++++++++++++++++++++++
 tests/unit/lib/test_v3bwfile.py  | 11 +++++++++++
 6 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/sbws/core/scanner.py b/sbws/core/scanner.py
index 60c8ca4..06381b8 100644
--- a/sbws/core/scanner.py
+++ b/sbws/core/scanner.py
@@ -515,7 +515,7 @@ def main_loop(args, conf, controller, relay_list, circuit_builder, result_dump,
             # Don't start measuring a relay if sbws is stopping.
             if settings.end_event.is_set():
                 break
-            relay_list.increment_recent_measurement_attempt_count()
+            relay_list.increment_recent_measurement_attempt()
             target.increment_relay_recent_measurement_attempt_count()
             num_relays += 1
             # callback and callback_err must be non-blocking
diff --git a/sbws/globals.py b/sbws/globals.py
index a9dca10..01a147b 100644
--- a/sbws/globals.py
+++ b/sbws/globals.py
@@ -151,6 +151,22 @@ FACTOR_INCREMENT_DESTINATION_RETRY = 2
 # Constants to check health KeyValues in the bandwidth file
 PERIOD_DAYS = int(MEASUREMENTS_PERIOD / (24 * 60 * 60))
 MAX_RECENT_CONSENSUS_COUNT = PERIOD_DAYS * 24  # 120
+# XXX: This was only defined in `config.default.ini`, it should be read from
+# here.
+FRACTION_RELAYS = 0.05
+# A priority list currently takes more than 3h, ideally it should only take 1h.
+MIN_HOURS_PRIORITY_LIST = 1
+# As of 2020, there're less than 7000 relays.
+MAX_RELAYS = 8000
+# 120
+MAX_RECENT_PRIORITY_LIST_COUNT = int(
+    PERIOD_DAYS * 24 / MIN_HOURS_PRIORITY_LIST
+)
+MAX_RELAYS_PER_PRIORITY_LIST = int(MAX_RELAYS * FRACTION_RELAYS)  # 400
+# 48000
+MAX_RECENT_PRIORITY_RELAY_COUNT = (
+    MAX_RECENT_PRIORITY_LIST_COUNT * MAX_RELAYS_PER_PRIORITY_LIST
+)
 
 
 def fail_hard(*a, **kw):
diff --git a/sbws/lib/relaylist.py b/sbws/lib/relaylist.py
index 5ae1c2d..d5e5754 100644
--- a/sbws/lib/relaylist.py
+++ b/sbws/lib/relaylist.py
@@ -10,6 +10,7 @@ from threading import Lock
 
 from ..globals import (
     MAX_RECENT_CONSENSUS_COUNT,
+    MAX_RECENT_PRIORITY_RELAY_COUNT,
     MEASUREMENTS_PERIOD
 )
 from ..util import timestamp, timestamps
@@ -334,12 +335,10 @@ class RelayList:
         self._relays = []
         # The period of time for which the measurements are keep.
         self._measurements_period = measurements_period
-        self._state = state
-        # NOTE: blocking: writes to disk
-        if self._state:
-            if self._state.get('recent_measurement_attempt_count', None) \
-                    is None:
-                self._state['recent_measurement_attempt_count'] = 0
+        self._recent_measurement_attempt = timestamps.DateTimeSeq(
+            [], MAX_RECENT_PRIORITY_RELAY_COUNT, state,
+            "recent_measurement_attempt"
+        )
         self._refresh()
 
     def _need_refresh(self):
@@ -502,7 +501,7 @@ class RelayList:
         return [r for r in self.exits
                 if r.is_exit_not_bad_allowing_port(port)]
 
-    def increment_recent_measurement_attempt_count(self):
+    def increment_recent_measurement_attempt(self):
         """
         Increment the number of times that any relay has been queued to be
         measured.
@@ -512,5 +511,8 @@ class RelayList:
         It is read and stored in a ``state`` file.
         """
         # NOTE: blocking, writes to file!
-        if self._state:
-            self._state['recent_measurement_attempt_count'] += 1
+        self._recent_measurement_attempt.update()
+
+    @property
+    def recent_measurement_attempt_count(self):
+        return len(self._recent_measurement_attempt)
diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index 70304a9..f81ef82 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -470,7 +470,7 @@ class V3BWHeader(object):
         in the recent (by default 5) days from the state file.
         """
         state = State(state_fpath)
-        return state.get('recent_measurement_attempt_count', None)
+        return state.count('recent_measurement_attempt')
 
     @staticmethod
     def recent_priority_list_count_from_file(state_fpath):
diff --git a/tests/unit/lib/test_relaylist.py b/tests/unit/lib/test_relaylist.py
index 1b22425..ea13b7b 100644
--- a/tests/unit/lib/test_relaylist.py
+++ b/tests/unit/lib/test_relaylist.py
@@ -75,3 +75,45 @@ def test_init_relays(
     # The number of relays will be the number of relays in the cosensus plus
     # the added ones minus the removed ones.
     assert 6925 == 6505 + len(added_fps) - len(removed_fps)
+
+
+def test_increment_recent_measurement_attempt(args, conf, controller):
+    """Test that incrementing the measurement attempts does not go on forever
+
+    And instead it only counts the number of attempts in the last days.
+    It also tests that the state file is updated correctly.
+    """
+    state = State(conf['paths']['state_fpath'])
+    # For this test it does not matter that the consensus timestamps or relays
+    # are not correct.
+    relay_list = RelayList(args, conf, controller=controller, state=state)
+    # The initial count is 0 and the state does not have that key.
+    assert 0 == relay_list.recent_measurement_attempt_count
+    assert not state.get("recent_measurement_attempt", None)
+
+    # Pretend that a measurement attempt is made.
+    with freeze_time("2020-02-29 10:00:00"):
+        relay_list.increment_recent_measurement_attempt()
+    assert 1 == relay_list.recent_measurement_attempt_count
+    assert [datetime(2020, 2, 29, 10, 0)] == state[
+        "recent_measurement_attempt"
+    ]
+
+    # And a second measurement attempt is made 4 days later.
+    with freeze_time("2020-03-04 10:00:00"):
+        relay_list.increment_recent_measurement_attempt()
+    assert 2 == relay_list.recent_measurement_attempt_count
+    assert 2 == len(state["recent_measurement_attempt"])
+
+    # And a third measurement attempt is made 5 days later.
+    with freeze_time("2020-03-05 10:00:00"):
+        relay_list.increment_recent_measurement_attempt()
+    assert 3 == relay_list.recent_measurement_attempt_count
+    assert 3 == len(state["recent_measurement_attempt"])
+
+    # And a fourth measurement attempt is made 6 days later. The first one is
+    # now removed and not counted.
+    with freeze_time("2020-03-06 10:00:00"):
+        relay_list.increment_recent_measurement_attempt()
+    assert 3 == relay_list.recent_measurement_attempt_count
+    assert 3 == len(state["recent_measurement_attempt"])
diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py
index c9c0e93..2ce34ff 100644
--- a/tests/unit/lib/test_v3bwfile.py
+++ b/tests/unit/lib/test_v3bwfile.py
@@ -540,3 +540,14 @@ def test_recent_consensus_count(root_data_path, datadir):
     results = load_result_file(str(datadir.join("results.txt")))
     header = V3BWHeader.from_results(results, '', '', state_fpath)
     assert "1" == header.recent_consensus_count
+
+
+def test_recent_measurement_attempt_count(root_data_path, datadir):
+    state_fpath = os.path.join(root_data_path, '.sbws/state.dat')
+    assert 15 == V3BWHeader.recent_measurement_attempt_count_from_file(
+        state_fpath
+    )
+    # `results` does not matter here, using them to not have an empty list.
+    results = load_result_file(str(datadir.join("results.txt")))
+    header = V3BWHeader.from_results(results, '', '', state_fpath)
+    assert "15" == header.recent_measurement_attempt_count





More information about the tor-commits mailing list