commit 6d54bdbdcf076167c1b73bfb5bef9fd1c3921796 Author: teor teor2345@gmail.com Date: Thu Dec 25 20:30:18 2014 +1100
Handle edge cases in laplace functions
Avoid division by zero. Avoid taking the log of zero. Silence clang type conversion warnings using round and trunc. The existing values returned by the laplace functions do not change.
Add tests for laplace edge cases. These changes pass the existing unit tests without modification.
Related to HS stats in #13192. --- changes/laplace-edge-cases | 7 ++ src/common/util.c | 33 ++++++-- src/test/test_util.c | 201 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 226 insertions(+), 15 deletions(-)
diff --git a/changes/laplace-edge-cases b/changes/laplace-edge-cases new file mode 100644 index 0000000..f8a36e1 --- /dev/null +++ b/changes/laplace-edge-cases @@ -0,0 +1,7 @@ + o Minor bugfixes: + - Handle edge cases in the laplace functions: + * avoid division by zero + * avoid taking the log of zero + * silence clang type conversion warnings using round and trunc + - Add tests for laplace edge cases. + Related to HS stats in #13192. diff --git a/src/common/util.c b/src/common/util.c index f8d1b7b..52b3e04 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -536,13 +536,18 @@ int64_t sample_laplace_distribution(double mu, double b, double p) { double result; - tor_assert(p >= 0.0 && p < 1.0); + /* This is the "inverse cumulative distribution function" from: * http://en.wikipedia.org/wiki/Laplace_distribution */ + if (p == 0.0) { + /* Avoid taking log(0.0) == -INFINITY, as some processors or compiler + * options can cause the program to trap. */ + return INT64_MIN; + } + result = mu - b * (p > 0.5 ? 1.0 : -1.0) * tor_mathlog(1.0 - 2.0 * fabs(p - 0.5)); - if (result >= INT64_MAX) return INT64_MAX; else if (result <= INT64_MIN) @@ -551,18 +556,28 @@ sample_laplace_distribution(double mu, double b, double p) return (int64_t) result; }
-/** Add random noise between INT64_MIN and INT64_MAX coming from a - * Laplace distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b> - * to <b>signal</b> based on the provided <b>random</b> value in - * [0.0, 1.0[. */ +/** Add random noise between INT64_MIN and INT64_MAX coming from a Laplace + * distribution with mu = 0 and b = <b>delta_f</b>/<b>epsilon</b> to + * <b>signal</b> based on the provided <b>random</b> value in [0.0, 1.0[. + * The epislon value must be between ]0.0, 1.0]. delta_f must be greater + * than 0. */ int64_t add_laplace_noise(int64_t signal, double random, double delta_f, double epsilon) { - int64_t noise = sample_laplace_distribution( - 0.0, /* just add noise, no further signal */ - delta_f / epsilon, random); + int64_t noise; + + /* epsilon MUST be between ]0.0, 1.0] */ + tor_assert(epsilon > 0.0 && epsilon <= 1.0); + /* delta_f MUST be greater than 0. */ + tor_assert(delta_f > 0.0); + + /* Just add noise, no further signal */ + noise = sample_laplace_distribution(0.0, + delta_f / epsilon, + random);
+ /* Clip (signal + noise) to [INT64_MIN, INT64_MAX] */ if (noise > 0 && INT64_MAX - noise < signal) return INT64_MAX; else if (noise < 0 && INT64_MIN - noise > signal) diff --git a/src/test/test_util.c b/src/test/test_util.c index 6a4c3ec..940f9bd 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4100,12 +4100,201 @@ test_util_laplace(void *arg) */ tt_i64_op(INT64_MIN + 20, ==, add_laplace_noise(20, 0.0, delta_f, epsilon)); - tt_i64_op(-60, ==, add_laplace_noise(20, 0.1, delta_f, epsilon)); - tt_i64_op(-14, ==, add_laplace_noise(20, 0.25, delta_f, epsilon)); - tt_i64_op(20, ==, add_laplace_noise(20, 0.5, delta_f, epsilon)); - tt_i64_op(54, ==, add_laplace_noise(20, 0.75, delta_f, epsilon)); - tt_i64_op(100, ==, add_laplace_noise(20, 0.9, delta_f, epsilon)); - tt_i64_op(215, ==, add_laplace_noise(20, 0.99, delta_f, epsilon)); + + tt_assert(-60 == add_laplace_noise(20, 0.1, delta_f, epsilon)); + tt_assert(-14 == add_laplace_noise(20, 0.25, delta_f, epsilon)); + tt_assert(20 == add_laplace_noise(20, 0.5, delta_f, epsilon)); + tt_assert(54 == add_laplace_noise(20, 0.75, delta_f, epsilon)); + tt_assert(100 == add_laplace_noise(20, 0.9, delta_f, epsilon)); + tt_assert(215 == add_laplace_noise(20, 0.99, delta_f, epsilon)); + + /* Test extreme values of signal with maximally negative values of noise + * 1.0000000000000002 is the smallest number > 1 + * 0.0000000000000002 is the double epsilon (error when calculating near 1) + * this is approximately 1/(2^52) + * per https://en.wikipedia.org/wiki/Double_precision + * (let's not descend into the world of subnormals) + * >>> laplace.ppf([0, 0.0000000000000002], loc = 0, scale = 1) + * array([ -inf, -35.45506713]) + */ + const double noscale_df = 1.0, noscale_eps = 1.0; + + tt_assert(INT64_MIN == + add_laplace_noise(0, 0.0, noscale_df, noscale_eps)); + + /* is it clipped to INT64_MIN? */ + tt_assert(INT64_MIN == + add_laplace_noise(-1, 0.0, noscale_df, noscale_eps)); + tt_assert(INT64_MIN == + add_laplace_noise(INT64_MIN, 0.0, + noscale_df, noscale_eps)); + /* ... even when scaled? */ + tt_assert(INT64_MIN == + add_laplace_noise(0, 0.0, delta_f, epsilon)); + tt_assert(INT64_MIN == + add_laplace_noise(0, 0.0, + INT64_MAX, 1)); + tt_assert(INT64_MIN == + add_laplace_noise(INT64_MIN, 0.0, + INT64_MAX, 1)); + + /* does it play nice with INT64_MAX? */ + tt_assert((INT64_MIN + INT64_MAX) == + add_laplace_noise(INT64_MAX, 0.0, + noscale_df, noscale_eps)); + + /* do near-zero fractional values work? */ + const double min_dbl_error = 0.0000000000000002; + + tt_assert(-35 == + add_laplace_noise(0, min_dbl_error, + noscale_df, noscale_eps)); + tt_assert(INT64_MIN == + add_laplace_noise(INT64_MIN, min_dbl_error, + noscale_df, noscale_eps)); + tt_assert((-35 + INT64_MAX) == + add_laplace_noise(INT64_MAX, min_dbl_error, + noscale_df, noscale_eps)); + /* ... even when scaled? */ + tt_assert(INT64_MIN == + add_laplace_noise(0, min_dbl_error, + INT64_MIN, -35)); + tt_assert(INT64_MIN == + add_laplace_noise(0, min_dbl_error, + INT64_MIN, -34)); + tt_assert(INT64_MAX == + add_laplace_noise(0, min_dbl_error, + INT64_MIN, 1)); + tt_assert((INT64_MIN + INT64_MAX) == + add_laplace_noise(INT64_MIN, min_dbl_error, + INT64_MIN, 1)); + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX, min_dbl_error, + INT64_MIN, 1)); + + tt_assert(INT64_MAX == + add_laplace_noise(0, min_dbl_error, + INT64_MAX, -35)); + tt_assert(INT64_MAX == + add_laplace_noise(0, min_dbl_error, + INT64_MAX, -34)); + tt_assert(INT64_MIN == + add_laplace_noise(0, min_dbl_error, + INT64_MAX, 1)); + tt_assert((INT64_MAX + INT64_MIN) == + add_laplace_noise(INT64_MAX, min_dbl_error, + INT64_MAX, 1)); + tt_assert(INT64_MIN == + add_laplace_noise(INT64_MIN, min_dbl_error, + INT64_MAX, 1)); + + /* does it play nice with INT64_MAX? */ + tt_assert((INT64_MAX - 35) == + add_laplace_noise(INT64_MAX, min_dbl_error, + noscale_df, noscale_eps)); + + /* Test extreme values of signal with maximally positive values of noise + * 1.0000000000000002 is the smallest number > 1 + * 0.9999999999999998 is the greatest number < 1 by calculation + * per https://en.wikipedia.org/wiki/Double_precision + * >>> laplace.ppf([1.0, 0.9999999999999998], loc = 0, scale = 1) + * array([inf, 35.35050621]) + * but the function rejects p == 1.0, so we just use max_dbl_lt_one + */ + const double max_dbl_lt_one = 0.9999999999999998; + + /* do near-one fractional values work? */ + tt_assert(35 == + add_laplace_noise(0, max_dbl_lt_one, noscale_df, noscale_eps)); + + /* is it clipped to INT64_MAX? */ + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX - 35, max_dbl_lt_one, + noscale_df, noscale_eps)); + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX - 34, max_dbl_lt_one, + noscale_df, noscale_eps)); + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX, max_dbl_lt_one, + noscale_df, noscale_eps)); + /* ... even when scaled? */ + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX, max_dbl_lt_one, + delta_f, epsilon)); + tt_assert(INT64_MAX == + add_laplace_noise(0, max_dbl_lt_one, + INT64_MAX, 35)); + tt_assert(INT64_MAX == + add_laplace_noise(0, max_dbl_lt_one, + INT64_MAX, 34)); + tt_assert((INT64_MIN + INT64_MAX) == + add_laplace_noise(INT64_MIN, max_dbl_lt_one, + INT64_MAX, 1)); + tt_assert(INT64_MAX == + add_laplace_noise(INT64_MAX, max_dbl_lt_one, + INT64_MAX, 1)); + tt_assert((INT64_MAX + INT64_MIN) == + add_laplace_noise(INT64_MAX, max_dbl_lt_one, + INT64_MIN, 1)); + tt_assert(INT64_MIN == + add_laplace_noise(INT64_MIN, max_dbl_lt_one, + INT64_MIN, 1)); + + /* does it play nice with INT64_MIN? */ + tt_assert((INT64_MIN + 35) == + add_laplace_noise(INT64_MIN, max_dbl_lt_one, + noscale_df, noscale_eps)); + + /* Test extreme values of b = delta_f / epsilon + * >>> laplace.ppf([0.5], loc = 0, scale = 1) + * array([0. ]) + */ + + /* Make sure edge cases don't depend on architecture, + * optimisation level, or other compiler flags. + * XXXX Are these edge cases important enough to make consistent? */ + + /* b = positive zero, p yields positive zero */ + tt_assert(0.0 == + add_laplace_noise(0.0, 0.5, 0.0, 1.0)) + /* b = negative zero, p yields positive zero */ + tt_assert(0.0 == + add_laplace_noise(0.0, 0.5, 0.0, -1.0)) + /* b = positive infinity, p yields positive zero, result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.5, 1.0, 0.0)) + /* b = negative infinity, p yields positive zero, result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.5, -1.0, 0.0)) + /* b = positive NaN (rounded to -Inf), p yields positive zero, + * result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.5, -0.0, -0.0)) + /* b = negative NaN (rounded to -Inf), p yields positive zero, + * result is -NaN -> -Inf*/ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.5, -0.0, 0.0)) + + /* b = positive zero, p yields negative infinity, result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.0, 0.0, 1.0)) + /* b = negative zero, p yields negative infinity, result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.0, 0.0, -1.0)) + /* b = positive infinity, p yields negative infinity */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.0, 1.0, 0.0)) + /* b = negative infinity, p yields negative infinity */ + tt_assert(INT64_MAX == + add_laplace_noise(0.0, 0.0, -1.0, 0.0)) + /* b = positive NaN (rounded to -Inf), p yields negative infinity, + * result is -NaN -> -Inf */ + tt_assert(INT64_MIN == + add_laplace_noise(0.0, 0.0, -0.0, -0.0)) + /* b = negative NaN (rounded to -Inf), p yields negative infinity, + * result is NaN -> Inf */ + tt_assert(INT64_MAX == + add_laplace_noise(0.0, 0.0, -0.0, 0.0))
done: ;
tor-commits@lists.torproject.org