[tor-commits] [sbws/master] Make sbws round to 3 significant figures in torflow rounding mode

Fri Nov 16 11:03:10 UTC 2018

commit 44a588eeb22bfe6291c3e897723e0524993da396
Author: teor <teor at torproject.org>
Date:   Wed Nov 14 20:30:40 2018 +1000

    Make sbws round to 3 significant figures in torflow rounding mode
    
    And add unit tests for rounding and rounding error.
    
    Bugfix on 27337 in sbws 1.0.
    
    Part of 28442.
---
 sbws/lib/v3bwfile.py            |  33 ++++++++++--
 tests/unit/lib/test_v3bwfile.py | 113 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/sbws/lib/v3bwfile.py b/sbws/lib/v3bwfile.py
index cae8418..f2077d9 100644
--- a/sbws/lib/v3bwfile.py
+++ b/sbws/lib/v3bwfile.py
@@ -4,6 +4,7 @@
 
 import copy
 import logging
+import math
 import os
 from itertools import combinations
 from statistics import median, mean
@@ -57,10 +58,36 @@ BW_KEYVALUES_INT = ['bw', 'rtt', 'success', 'error_stream',
 BW_KEYVALUES = BW_KEYVALUES_BASIC + BW_KEYVALUES_EXTRA
 
 
+def round_sig_dig(n, digits=TORFLOW_ROUND_DIG):
+    """Round n to 'digits' significant digits in front of the decimal point.
+       Results less than or equal to 1 are rounded to 1.
+       Returns an integer.
+
+       digits must be greater than 0.
+       n must be less than or equal to 2**73, to avoid floating point errors.
+       """
+    assert digits >= 1
+    if n <= 1:
+        return 1
+    digits = int(digits)
+    digits_in_n = int(math.log10(n)) + 1
+    round_digits = max(digits_in_n - digits, 0)
+    rounded_n = round(n, -round_digits)
+    return int(rounded_n)
+
+
 def kb_round_x_sig_dig(bw_bs, digits=TORFLOW_ROUND_DIG):
-    """Convert bw to KB and round to x most significat digits."""
-    bw_kb = bw_bs / 1000
-    return max(int(round(bw_kb, -digits)), 1)
+    """Convert bw_bs from bytes to kilobytes, and round the result to
+       'digits' significant digits.
+       Results less than or equal to 1 are rounded up to 1.
+       Returns an integer.
+
+       digits must be greater than 0.
+       n must be less than or equal to 2**82, to avoid floating point errors.
+       """
+    # avoid double-rounding by using floating-point
+    bw_kb = bw_bs / 1000.0
+    return round_sig_dig(bw_kb, digits=digits)
 
 
 def num_results_of_type(results, type_str):
diff --git a/tests/unit/lib/test_v3bwfile.py b/tests/unit/lib/test_v3bwfile.py
index a299d05..db82a6c 100644
--- a/tests/unit/lib/test_v3bwfile.py
+++ b/tests/unit/lib/test_v3bwfile.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Test generation of bandwidth measurements document (v3bw)"""
 import json
+import math
 import os.path
 
 from sbws import __version__ as version
@@ -9,7 +10,7 @@ from sbws.globals import (SPEC_VERSION, SBWS_SCALING, TORFLOW_SCALING,
 from sbws.lib.resultdump import Result, load_result_file, ResultSuccess
 from sbws.lib.v3bwfile import (V3BWHeader, V3BWLine, TERMINATOR, LINE_SEP,
                                KEYVALUE_SEP_V1, num_results_of_type,
-                               V3BWFile)
+                               V3BWFile, round_sig_dig)
 from sbws.util.timestamp import now_fname, now_isodt_str, now_unixts
 
 timestamp = 1523974147
@@ -90,6 +91,116 @@ def test_num_results_of_type(result_success, result_error_stream):
     assert num_results_of_type([result_error_stream], 'error-stream') == 1
 
 
+def assert_round_sig_dig_any_digits(n, result):
+    """Test that rounding n to any reasonable number of significant digits
+       produces result."""
+    max_digits_int64 = int(math.ceil(math.log10(2**64 - 1))) + 1
+    for d in range(1, max_digits_int64 + 1):
+        assert(round_sig_dig(n, digits=d) == result)
+
+
+def assert_round_sig_dig_any_digits_error(n, elp_fraction=0.5):
+    """Test that rounding n to any reasonable number of significant digits
+       produces a result within elp_fraction * 10.0 ** -(digits - 1)."""
+    max_digits_int64 = int(math.ceil(math.log10(2**64 - 1))) + 1
+    for d in range(1, max_digits_int64 + 1):
+        error_fraction = elp_fraction * (10.0 ** -(d - 1))
+        # use ceil rather than round, to work around floating-point inaccuracy
+        e = int(math.ceil(n * error_fraction))
+        assert(round_sig_dig(n, digits=d) >= n - e)
+        assert(round_sig_dig(n, digits=d) <= n + e)
+
+
+def test_round_sig_dig():
+    """Test rounding to a number of significant digits."""
+    # Expected values
+    assert(round_sig_dig(11, 1) == 10)
+    assert(round_sig_dig(11, 2) == 11)
+
+    assert(round_sig_dig(15, 1) == 20)
+    assert(round_sig_dig(15, 2) == 15)
+
+    assert(round_sig_dig(54, 1) == 50)
+    assert(round_sig_dig(54, 2) == 54)
+
+    assert(round_sig_dig(96, 1) == 100)
+    assert(round_sig_dig(96, 2) == 96)
+
+    assert(round_sig_dig(839, 1) == 800)
+    assert(round_sig_dig(839, 2) == 840)
+    assert(round_sig_dig(839, 3) == 839)
+
+    assert(round_sig_dig(5789, 1) == 6000)
+    assert(round_sig_dig(5789, 2) == 5800)
+    assert(round_sig_dig(5789, 3) == 5790)
+    assert(round_sig_dig(5789, 4) == 5789)
+
+    assert(round_sig_dig(24103, 1) == 20000)
+    assert(round_sig_dig(24103, 2) == 24000)
+    assert(round_sig_dig(24103, 3) == 24100)
+    assert(round_sig_dig(24103, 4) == 24100)
+    assert(round_sig_dig(24103, 5) == 24103)
+
+    # Floating-point values
+
+    # Must round based on fractions, must not double-round
+    assert(round_sig_dig(14, 1) == 10)
+    assert(round_sig_dig(14.0, 1) == 10)
+    assert(round_sig_dig(14.9, 1) == 10)
+    assert(round_sig_dig(15.0, 1) == 20)
+    assert(round_sig_dig(15.1, 1) == 20)
+
+    assert(round_sig_dig(14, 2) == 14)
+    assert(round_sig_dig(14.0, 2) == 14)
+    assert(round_sig_dig(14.9, 2) == 15)
+    assert(round_sig_dig(15.0, 2) == 15)
+    assert(round_sig_dig(15.1, 2) == 15)
+
+    # Must round to integer
+    assert(round_sig_dig(14, 3) == 14)
+    assert(round_sig_dig(14.0, 3) == 14)
+    assert(round_sig_dig(14.9, 3) == 15)
+    assert(round_sig_dig(15.0, 3) == 15)
+    assert(round_sig_dig(15.1, 3) == 15)
+
+    # Small integers
+    assert_round_sig_dig_any_digits(0, 1)
+    assert_round_sig_dig_any_digits(1, 1)
+    assert_round_sig_dig_any_digits(2, 2)
+    assert_round_sig_dig_any_digits(9, 9)
+    assert_round_sig_dig_any_digits(10, 10)
+
+    # Large values
+    assert_round_sig_dig_any_digits_error(2**30)
+    assert_round_sig_dig_any_digits_error(2**31)
+    assert_round_sig_dig_any_digits_error(2**32)
+
+    # the floating-point accuracy limit for this function is 2**73
+    # on some machines
+    assert_round_sig_dig_any_digits_error(2**62)
+    assert_round_sig_dig_any_digits_error(2**63)
+    assert_round_sig_dig_any_digits_error(2**64)
+
+    # Out of range values: must round to 1
+    assert_round_sig_dig_any_digits(-0.01, 1)
+    assert_round_sig_dig_any_digits(-1, 1)
+    assert_round_sig_dig_any_digits(-10.5, 1)
+    assert_round_sig_dig_any_digits(-(2**31), 1)
+
+    # test the transition points in the supported range
+    # testing the entire range up to 1 million takes 100s
+    for n in range(1, 20000):
+        assert_round_sig_dig_any_digits_error(n)
+
+    # use a step that is relatively prime, to increase the chance of
+    # detecting errors
+    for n in range(90000, 200000, 9):
+        assert_round_sig_dig_any_digits_error(n)
+
+    for n in range(900000, 2000000, 99):
+        assert_round_sig_dig_any_digits_error(n)
+
+
 def test_v3bwline_from_results_file(datadir):
     lines = datadir.readlines('results.txt')
     d = dict()