commit 0f8253bddbaae4e73fe2ff9ecf1c342e3f66b798 Author: Taylor R Campbell campbell+tor@mumble.net Date: Thu Jan 10 17:40:17 2019 +0000
Use the distribution abstraction as an abstraction. --- src/core/or/circuitpadding.c | 16 +-- src/lib/math/prob_distr.c | 229 ++++++++++++++++++++++++++++++------------- src/lib/math/prob_distr.h | 46 +++------ src/test/test_prob_distr.c | 22 +++-- 4 files changed, 196 insertions(+), 117 deletions(-)
diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 6a39a7b37..a5d5d2455 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -545,7 +545,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .a = dist.param1, .b = dist.param2, }; - return uniform_sample(&my_uniform.base); + return dist_sample(&my_uniform.base); } case CIRCPAD_DIST_LOGISTIC: { @@ -555,7 +555,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .mu = dist.param1, .sigma = dist.param2, }; - return logistic_sample(&my_logistic.base); + return dist_sample(&my_logistic.base); } case CIRCPAD_DIST_LOG_LOGISTIC: { @@ -565,12 +565,16 @@ circpad_distribution_sample(circpad_distribution_t dist) .alpha = dist.param1, .beta = dist.param2, }; - return log_logistic_sample(&my_log_logistic.base); + return dist_sample(&my_log_logistic.base); } case CIRCPAD_DIST_GEOMETRIC: { /* param1 is 'p' (success probability) */ - return geometric_sample(dist.param1); + const struct geometric my_geometric = { + .base = DIST_BASE(&geometric_ops), + .p = dist.param1, + }; + return dist_sample(&my_geometric.base); } case CIRCPAD_DIST_WEIBULL: { @@ -580,7 +584,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .k = dist.param1, .lambda = dist.param2, }; - return weibull_sample(&my_weibull.base); + return dist_sample(&my_weibull.base); } case CIRCPAD_DIST_PARETO: { @@ -591,7 +595,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .sigma = dist.param1, .xi = dist.param2, }; - return genpareto_sample(&my_genpareto.base); + return dist_sample(&my_genpareto.base); } }
diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c index e170d000f..4263ba207 100644 --- a/src/lib/math/prob_distr.c +++ b/src/lib/math/prob_distr.c @@ -1319,17 +1319,45 @@ sample_geometric(uint32_t s, double p0, double p) * (sample/cdf/sf/icdf/isf) as part of its dist_ops structure. */
-/** Functions for uniform distribution */ -const struct dist_ops uniform_ops = { - .name = "uniform", - .sample = uniform_sample, - .cdf = uniform_cdf, - .sf = uniform_sf, - .icdf = uniform_icdf, - .isf = uniform_isf, -}; +const char * +dist_name(const struct dist *dist) +{ + return dist->ops->name; +} + +double +dist_sample(const struct dist *dist) +{ + return dist->ops->sample(dist); +} + +double +dist_cdf(const struct dist *dist, double x) +{ + return dist->ops->cdf(dist, x); +} + +double +dist_sf(const struct dist *dist, double x) +{ + return dist->ops->sf(dist, x); +}
double +dist_icdf(const struct dist *dist, double p) +{ + return dist->ops->icdf(dist, p); +} + +double +dist_isf(const struct dist *dist, double p) +{ + return dist->ops->isf(dist, p); +} + +/** Functions for uniform distribution */ + +static double uniform_sample(const struct dist *dist) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1339,7 +1367,7 @@ uniform_sample(const struct dist *dist) return sample_uniform_interval(p0, U->a, U->b); }
-double +static double uniform_cdf(const struct dist *dist, double x) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1353,7 +1381,7 @@ uniform_cdf(const struct dist *dist, double x) return 1; }
-double +static double uniform_sf(const struct dist *dist, double x) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1367,7 +1395,7 @@ uniform_sf(const struct dist *dist, double x) return 1; }
-double +static double uniform_icdf(const struct dist *dist, double p) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1377,7 +1405,7 @@ uniform_icdf(const struct dist *dist, double p) return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p))); }
-double +static double uniform_isf(const struct dist *dist, double p) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1387,17 +1415,18 @@ uniform_isf(const struct dist *dist, double p) return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p))); }
-/** Functions for logistic distribution: */ -const struct dist_ops logistic_ops = { - .name = "logistic", - .sample = logistic_sample, - .cdf = logistic_cdf, - .sf = logistic_sf, - .icdf = logistic_icdf, - .isf = logistic_isf, +const struct dist_ops uniform_ops = { + .name = "uniform", + .sample = uniform_sample, + .cdf = uniform_cdf, + .sf = uniform_sf, + .icdf = uniform_icdf, + .isf = uniform_isf, };
-double +/** Functions for logistic distribution: */ + +static double logistic_sample(const struct dist *dist) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1409,7 +1438,7 @@ logistic_sample(const struct dist *dist) return sample_logistic_locscale(s, t, p0, L->mu, L->sigma); }
-double +static double logistic_cdf(const struct dist *dist, double x) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1418,7 +1447,7 @@ logistic_cdf(const struct dist *dist, double x) return cdf_logistic(x, L->mu, L->sigma); }
-double +static double logistic_sf(const struct dist *dist, double x) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1427,7 +1456,7 @@ logistic_sf(const struct dist *dist, double x) return sf_logistic(x, L->mu, L->sigma); }
-double +static double logistic_icdf(const struct dist *dist, double p) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1436,7 +1465,7 @@ logistic_icdf(const struct dist *dist, double p) return icdf_logistic(p, L->mu, L->sigma); }
-double +static double logistic_isf(const struct dist *dist, double p) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1445,17 +1474,18 @@ logistic_isf(const struct dist *dist, double p) return isf_logistic(p, L->mu, L->sigma); }
-/** Functions for log-logistic distribution: */ -const struct dist_ops log_logistic_ops = { - .name = "log logistic", - .sample = log_logistic_sample, - .cdf = log_logistic_cdf, - .sf = log_logistic_sf, - .icdf = log_logistic_icdf, - .isf = log_logistic_isf, +const struct dist_ops logistic_ops = { + .name = "logistic", + .sample = logistic_sample, + .cdf = logistic_cdf, + .sf = logistic_sf, + .icdf = logistic_icdf, + .isf = logistic_isf, };
-double +/** Functions for log-logistic distribution: */ + +static double log_logistic_sample(const struct dist *dist) { const struct log_logistic *LL = const_container_of(dist, struct @@ -1466,7 +1496,7 @@ log_logistic_sample(const struct dist *dist) return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta); }
-double +static double log_logistic_cdf(const struct dist *dist, double x) { const struct log_logistic *LL = const_container_of(dist, @@ -1475,7 +1505,7 @@ log_logistic_cdf(const struct dist *dist, double x) return cdf_log_logistic(x, LL->alpha, LL->beta); }
-double +static double log_logistic_sf(const struct dist *dist, double x) { const struct log_logistic *LL = const_container_of(dist, @@ -1484,7 +1514,7 @@ log_logistic_sf(const struct dist *dist, double x) return sf_log_logistic(x, LL->alpha, LL->beta); }
-double +static double log_logistic_icdf(const struct dist *dist, double p) { const struct log_logistic *LL = const_container_of(dist, @@ -1493,7 +1523,7 @@ log_logistic_icdf(const struct dist *dist, double p) return icdf_log_logistic(p, LL->alpha, LL->beta); }
-double +static double log_logistic_isf(const struct dist *dist, double p) { const struct log_logistic *LL = const_container_of(dist, @@ -1502,17 +1532,18 @@ log_logistic_isf(const struct dist *dist, double p) return isf_log_logistic(p, LL->alpha, LL->beta); }
-/** Functions for Weibull distribution */ -const struct dist_ops weibull_ops = { - .name = "Weibull", - .sample = weibull_sample, - .cdf = weibull_cdf, - .sf = weibull_sf, - .icdf = weibull_icdf, - .isf = weibull_isf, +const struct dist_ops log_logistic_ops = { + .name = "log logistic", + .sample = log_logistic_sample, + .cdf = log_logistic_cdf, + .sf = log_logistic_sf, + .icdf = log_logistic_icdf, + .isf = log_logistic_isf, };
-double +/** Functions for Weibull distribution */ + +static double weibull_sample(const struct dist *dist) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1523,7 +1554,7 @@ weibull_sample(const struct dist *dist) return sample_weibull(s, p0, W->lambda, W->k); }
-double +static double weibull_cdf(const struct dist *dist, double x) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1532,7 +1563,7 @@ weibull_cdf(const struct dist *dist, double x) return cdf_weibull(x, W->lambda, W->k); }
-double +static double weibull_sf(const struct dist *dist, double x) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1541,7 +1572,7 @@ weibull_sf(const struct dist *dist, double x) return sf_weibull(x, W->lambda, W->k); }
-double +static double weibull_icdf(const struct dist *dist, double p) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1550,7 +1581,7 @@ weibull_icdf(const struct dist *dist, double p) return icdf_weibull(p, W->lambda, W->k); }
-double +static double weibull_isf(const struct dist *dist, double p) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1559,17 +1590,18 @@ weibull_isf(const struct dist *dist, double p) return isf_weibull(p, W->lambda, W->k); }
-/** Functions for generalized Pareto distributions */ -const struct dist_ops genpareto_ops = { - .name = "generalized Pareto", - .sample = genpareto_sample, - .cdf = genpareto_cdf, - .sf = genpareto_sf, - .icdf = genpareto_icdf, - .isf = genpareto_isf, +const struct dist_ops weibull_ops = { + .name = "Weibull", + .sample = weibull_sample, + .cdf = weibull_cdf, + .sf = weibull_sf, + .icdf = weibull_icdf, + .isf = weibull_isf, };
-double +/** Functions for generalized Pareto distributions */ + +static double genpareto_sample(const struct dist *dist) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1580,7 +1612,7 @@ genpareto_sample(const struct dist *dist) return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi); }
-double +static double genpareto_cdf(const struct dist *dist, double x) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1589,7 +1621,7 @@ genpareto_cdf(const struct dist *dist, double x) return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi); }
-double +static double genpareto_sf(const struct dist *dist, double x) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1598,7 +1630,7 @@ genpareto_sf(const struct dist *dist, double x) return sf_genpareto(x, GP->mu, GP->sigma, GP->xi); }
-double +static double genpareto_icdf(const struct dist *dist, double p) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1607,7 +1639,7 @@ genpareto_icdf(const struct dist *dist, double p) return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi); }
-double +static double genpareto_isf(const struct dist *dist, double p) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1616,13 +1648,70 @@ genpareto_isf(const struct dist *dist, double p) return isf_genpareto(p, GP->mu, GP->sigma, GP->xi); }
-/* Deterministically sample from the geometric distribution with - * per-trial success probability p. */ -double -geometric_sample(double p) +const struct dist_ops genpareto_ops = { + .name = "generalized Pareto", + .sample = genpareto_sample, + .cdf = genpareto_cdf, + .sf = genpareto_sf, + .icdf = genpareto_icdf, + .isf = genpareto_isf, +}; + +/** Functions for geometric distribution on number of trials before success */ + +static double +geometric_sample(const struct dist *dist) { + const struct geometric *G = const_container_of(dist, struct geometric, base); uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); - return sample_geometric(s, p0, p); + + return sample_geometric(s, p0, G->p); }
+static double +geometric_cdf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */ + return -expm1(floor(x)*log1p(-G->p)); +} + +static double +geometric_sf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */ + return exp(floor(x)*log1p(-G->p)); +} + +static double +geometric_icdf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log1p(-p)/log1p(-G->p); +} + +static double +geometric_isf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log(p)/log1p(-G->p); +} + +const struct dist_ops geometric_ops = { + .name = "geometric (1-based)", + .sample = geometric_sample, + .cdf = geometric_cdf, + .sf = geometric_sf, + .icdf = geometric_icdf, + .isf = geometric_isf, +}; diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h index c2fd6c74b..981fc2017 100644 --- a/src/lib/math/prob_distr.h +++ b/src/lib/math/prob_distr.h @@ -21,6 +21,13 @@ struct dist {
#define DIST_BASE(OPS) { .ops = (OPS) }
+const char *dist_name(const struct dist *); +double dist_sample(const struct dist *); +double dist_cdf(const struct dist *, double x); +double dist_sf(const struct dist *, double x); +double dist_icdf(const struct dist *, double p); +double dist_isf(const struct dist *, double p); + struct dist_ops { const char *name; double (*sample)(const struct dist *); @@ -30,9 +37,14 @@ struct dist_ops { double (*isf)(const struct dist *, double p); };
-/* Geometric distribution */ +/* Geometric distribution on positive number of trials before first success */
-double geometric_sample(double p); +struct geometric { + struct dist base; + double p; /* success probability */ +}; + +extern const struct dist_ops geometric_ops;
/* Pareto distribution */
@@ -43,12 +55,6 @@ struct genpareto { double xi; };
-double genpareto_sample(const struct dist *dist); -double genpareto_cdf(const struct dist *dist, double x); -double genpareto_sf(const struct dist *dist, double x); -double genpareto_icdf(const struct dist *dist, double p); -double genpareto_isf(const struct dist *dist, double p); - extern const struct dist_ops genpareto_ops;
/* Weibull distribution */ @@ -59,12 +65,6 @@ struct weibull { double k; };
-double weibull_sample(const struct dist *dist); -double weibull_cdf(const struct dist *dist, double x); -double weibull_sf(const struct dist *dist, double x); -double weibull_icdf(const struct dist *dist, double p); -double weibull_isf(const struct dist *dist, double p); - extern const struct dist_ops weibull_ops;
/* Log-logistic distribution */ @@ -75,12 +75,6 @@ struct log_logistic { double beta; };
-double log_logistic_sample(const struct dist *dist); -double log_logistic_cdf(const struct dist *dist, double x); -double log_logistic_sf(const struct dist *dist, double x); -double log_logistic_icdf(const struct dist *dist, double p); -double log_logistic_isf(const struct dist *dist, double p); - extern const struct dist_ops log_logistic_ops;
/* Logistic distribution */ @@ -91,12 +85,6 @@ struct logistic { double sigma; };
-double logistic_sample(const struct dist *dist); -double logistic_cdf(const struct dist *dist, double x); -double logistic_sf(const struct dist *dist, double x); -double logistic_icdf(const struct dist *dist, double p); -double logistic_isf(const struct dist *dist, double p); - extern const struct dist_ops logistic_ops;
/* Uniform distribution */ @@ -107,12 +95,6 @@ struct uniform { double b; };
-double uniform_sample(const struct dist *dist); -double uniform_cdf(const struct dist *dist, double x); -double uniform_sf(const struct dist *dist, double x); -double uniform_icdf(const struct dist *dist, double p); -double uniform_isf(const struct dist *dist, double p); - extern const struct dist_ops uniform_ops;
/** Only by unittests */ diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c index ec4e943e9..fe3969518 100644 --- a/src/test/test_prob_distr.c +++ b/src/test/test_prob_distr.c @@ -942,6 +942,10 @@ psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N) static bool test_stochastic_geometric_impl(double p) { + const struct geometric geometric = { + .base = DIST_BASE(&geometric_ops), + .p = p, + }; double logP[PSI_DF] = {0}; unsigned ntry = NTRIALS, npass = 0; unsigned i; @@ -958,7 +962,7 @@ test_stochastic_geometric_impl(double p) size_t C[PSI_DF] = {0};
for (j = 0; j < NSAMPLES; j++) { - double n_tmp = geometric_sample(p); + double n_tmp = dist_sample(&geometric.base);
/* Must be an integer. (XXX -Wfloat-equal) */ tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp); @@ -1006,10 +1010,10 @@ test_stochastic_geometric_impl(double p) static void bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n) { -#define CDF(x) dist->ops->cdf(dist, x) -#define SF(x) dist->ops->sf(dist, x) +#define CDF(x) dist_cdf(dist, x) +#define SF(x) dist_sf(dist, x) const double w = (hi - lo)/(n - 2); - double halfway = dist->ops->icdf(dist, 0.5); + double halfway = dist_icdf(dist, 0.5); double x_0, x_1; size_t i; size_t n2 = ceil_to_size_t((halfway - lo)/w); @@ -1057,7 +1061,7 @@ bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n) size_t i;
for (i = 0; i < NSAMPLES; i++) { - double x = dist->ops->sample(dist); + double x = dist_sample(dist); size_t bin;
if (x < lo) @@ -1084,8 +1088,8 @@ test_psi_dist_sample(const struct dist *dist) { double logP[PSI_DF] = {0}; unsigned ntry = NTRIALS, npass = 0; - double lo = dist->ops->icdf(dist, 1/(double)(PSI_DF + 2)); - double hi = dist->ops->isf(dist, 1/(double)(PSI_DF + 2)); + double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2)); + double hi = dist_isf(dist, 1/(double)(PSI_DF + 2));
/* Create the null hypothesis in logP */ bin_cdfs(dist, lo, hi, logP, PSI_DF); @@ -1102,10 +1106,10 @@ test_psi_dist_sample(const struct dist *dist)
/* Did we fail or succeed? */ if (npass >= NPASSES_MIN) { - /* printf("pass %s sampler\n", dist->ops->name);*/ + /* printf("pass %s sampler\n", dist_name(dist));*/ return true; } else { - printf("fail %s sampler\n", dist->ops->name); + printf("fail %s sampler\n", dist_name(dist)); return false; } }