commit 133e37ba47eadfbff545e03c87673bb99781db9b
Author: juga0 <juga(a)riseup.net>
Date: Tue Dec 18 08:57:02 2018 +0000
relayprioritizer: add argument to return only a fraction
Not all the relays in the network are returned by this method, but
a fraction of them. Create an argument to allow to change this
default behaviour to return all the relays in the network.
---
sbws/lib/relayprioritizer.py | 50 +++++++++++++++++++++++++++++++-------------
1 file changed, 35 insertions(+), 15 deletions(-)
diff --git a/sbws/lib/relayprioritizer.py b/sbws/lib/relayprioritizer.py
index f17ebd4..963d08a 100644
--- a/sbws/lib/relayprioritizer.py
+++ b/sbws/lib/relayprioritizer.py
@@ -23,28 +23,45 @@ class RelayPrioritizer:
self.fraction_to_return = conf.getfloat(
'relayprioritizer', 'fraction_relays')
- def best_priority(self, prioritize_result_error=False):
- """
- Return a generator of a new list of relays ordereded by priority to
- be measured. The relays that were measured farther away in the past,
- get prioritized (lowest priority number, first in the generator).
+ def best_priority(self, prioritize_result_error=False,
+ return_fraction=True):
+ """Yields a new ordered list of relays to be measured next.
+ The relays that were measured farther away in the past,
+ get prioritized (lowest priority number, first in the list).
The relays that were measured more recently get lower priority (last in
- the generator, higher priority number).
+ the list, higher priority number).
- Optionally, the measurements that failed can be prioritized (be
- measured first).
- However, unstable relays that often fail to be measured, might fail
+ Optionally, the relays which measurements failed can be prioritized
+ (first in the list).
+ However, unstable relays that fail often to be measured, might fail
again and stable relays will get measured only when their measurements
become old enough.
The opposite might be more suitable: give lower priority to the relays
that are unstable, to don't spend time measuring relays that might fail
to be measured.
+ Optionally, return only a fraction of all the relays in the network.
+ Since there could be new relays in the network while measuring the
+ list of relays returned by this method, this method is run again
+ before all the relays in the network are measured.
+
+ .. note::
+
+ In a future refactor, instead of having a static fraction of relays
+ to be measured, this method could be call when it's known that
+ there're X number of new relays in the network.
+
Since measurements made before than X days ago (too old) are not
considered, and the initial list of past measurements is only filtered
when the scanner starts, it's needed to filter here again to discard
- those measurements that are too old.
+ those measurements.
+
+ :param bool prioritize_result_error: whether prioritize or not
+ measurements that did not succed.
+ :param bool return_fraction: whether to return only a fraction of the
+ relays seen in the network or return all.
+ return: a generator of the new ordered list of relays to measure next.
"""
fn_tstart = Decimal(time.time())
@@ -82,14 +99,17 @@ class RelayPrioritizer:
# Sort the relays by their priority, with the smallest (best) priority
# relays at the front
relays = sorted(relays, key=lambda r: r.priority)
- cutoff = max(int(len(relays) * self.fraction_to_return),
- self.min_to_return)
+
fn_tstop = Decimal(time.time())
fn_tdelta = (fn_tstop - fn_tstart) * 1000
log.info('Spent %f msecs calculating relay best priority', fn_tdelta)
- # Finally, slowly return the relays to the caller (after removing the
- # priority member we polluted the variable with ...)
- for relay in relays[0:cutoff]:
+
+ # Return a fraction of relays in the network if return_fraction is
+ # True, otherwise return all.
+ cutoff = max(int(len(relays) * self.fraction_to_return),
+ self.min_to_return)
+ upper_limit = cutoff if return_fraction else len(relays)
+ for relay in relays[0:upper_limit]:
log.debug('Returning next relay %s with priority %f',
relay.nickname, relay.priority)
del(relay.priority)