[tor-commits] [tor/master] Completely ignore abandoned circs from circ timeout calc

asn at torproject.org asn at torproject.org
Fri Feb 19 11:21:15 UTC 2021


commit 37b21591502d080b5f8ba9c1d0b37bd226b7f183
Author: Mike Perry <mikeperry-git at torproject.org>
Date:   Tue Oct 20 10:50:27 2020 -0500

    Completely ignore abandoned circs from circ timeout calc
    
    This prevents the timeout curve from getting spread out as much, resulting in
    more accurate timeout values for quantiles from 60-80.
---
 src/core/or/circuitstats.c | 64 ++++++++++------------------------------------
 src/test/test.c            | 10 +-------
 2 files changed, 15 insertions(+), 59 deletions(-)

diff --git a/src/core/or/circuitstats.c b/src/core/or/circuitstats.c
index 2cde21fa1f..d4eb23c966 100644
--- a/src/core/or/circuitstats.c
+++ b/src/core/or/circuitstats.c
@@ -1009,43 +1009,6 @@ circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt,
   }
 }
 
-/**
- * Filter old synthetic timeouts that were created before the
- * new right-censored Pareto calculation was deployed.
- *
- * Once all clients before 0.2.1.13-alpha are gone, this code
- * will be unused.
- */
-static int
-circuit_build_times_filter_timeouts(circuit_build_times_t *cbt)
-{
-  int num_filtered=0, i=0;
-  double timeout_rate = 0;
-  build_time_t max_timeout = 0;
-
-  timeout_rate = circuit_build_times_timeout_rate(cbt);
-  max_timeout = (build_time_t)cbt->close_ms;
-
-  for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
-    if (cbt->circuit_build_times[i] > max_timeout) {
-      build_time_t replaced = cbt->circuit_build_times[i];
-      num_filtered++;
-      cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED;
-
-      log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced,
-               cbt->circuit_build_times[i]);
-    }
-  }
-
-  log_info(LD_CIRC,
-           "We had %d timeouts out of %d build times, "
-           "and filtered %d above the max of %u",
-          (int)(cbt->total_build_times*timeout_rate),
-          cbt->total_build_times, num_filtered, max_timeout);
-
-  return num_filtered;
-}
-
 /**
  * Load histogram from <b>state</b>, shuffling the resulting array
  * after we do so. Use this result to estimate parameters and
@@ -1171,10 +1134,6 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
 
   circuit_build_times_set_timeout(cbt);
 
-  if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) {
-    circuit_build_times_filter_timeouts(cbt);
-  }
-
  done:
   tor_free(loaded_times);
   return err ? -1 : 0;
@@ -1215,14 +1174,15 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 
     if (x[i] < cbt->Xm) {
       a += tor_mathlog(cbt->Xm);
+      n++;
     } else if (x[i] == CBT_BUILD_ABANDONED) {
       abandoned_count++;
     } else {
       a += tor_mathlog(x[i]);
       if (x[i] > max_time)
         max_time = x[i];
+      n++;
     }
-    n++;
   }
 
   /*
@@ -1231,11 +1191,11 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
    * performs this same check, and resets state if it hits it. If we
    * hit it at runtime, something serious has gone wrong.
    */
-  if (n!=cbt->total_build_times) {
+  if (n!=cbt->total_build_times-abandoned_count) {
     log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
             cbt->total_build_times);
   }
-  tor_assert(n==cbt->total_build_times);
+  tor_assert_nonfatal(n==cbt->total_build_times-abandoned_count);
 
   if (max_time <= 0) {
     /* This can happen if Xm is actually the *maximum* value in the set.
@@ -1248,13 +1208,17 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
     return 0;
   }
 
-  a += abandoned_count*tor_mathlog(max_time);
-
+  /* This is the "Maximum Likelihood Estimator" for parameter alpha of a Pareto
+   * Distribution. See:
+   * https://en.wikipedia.org/wiki/Pareto_distribution#Estimation_of_parameters
+   *
+   * The division in the estimator is done with subtraction outside the ln(),
+   * with the sum occurring in the for loop above.
+   *
+   * This done is to avoid the precision issues of logs of small values.
+   */
   a -= n*tor_mathlog(cbt->Xm);
-  // Estimator comes from Eq #4 in:
-  // "Bayesian estimation based on trimmed samples from Pareto populations"
-  // by Arturo J. Fernández. We are right-censored only.
-  a = (n-abandoned_count)/a;
+  a = n/a;
 
   cbt->alpha = a;
 
diff --git a/src/test/test.c b/src/test/test.c
index 58b468775c..cd21a37409 100644
--- a/src/test/test.c
+++ b/src/test/test.c
@@ -384,7 +384,6 @@ test_circuit_timeout(void *arg)
   double timeout1, timeout2;
   or_state_t *state=NULL;
   int i, runs;
-  double close_ms;
   (void)arg;
 
   initialize_periodic_events();
@@ -406,18 +405,11 @@ test_circuit_timeout(void *arg)
   circuit_build_times_initial_alpha(&initial,
                                     CBT_DEFAULT_QUANTILE_CUTOFF/100.0,
                                     timeout0);
-  close_ms = MAX(circuit_build_times_calculate_timeout(&initial,
-                             CBT_DEFAULT_CLOSE_QUANTILE/100.0),
-                 CBT_DEFAULT_TIMEOUT_INITIAL_VALUE);
   do {
     for (i=0; i < CBT_DEFAULT_MIN_CIRCUITS_TO_OBSERVE; i++) {
       build_time_t sample = circuit_build_times_generate_sample(&initial,0,1);
 
-      if (sample > close_ms) {
-        circuit_build_times_add_time(&estimate, CBT_BUILD_ABANDONED);
-      } else {
-        circuit_build_times_add_time(&estimate, sample);
-      }
+      circuit_build_times_add_time(&estimate, sample);
     }
     circuit_build_times_update_alpha(&estimate);
     timeout1 = circuit_build_times_calculate_timeout(&estimate,





More information about the tor-commits mailing list