[tor-commits] [tor] 01/03: rephist: Introduce a fraction and period for overload onionskin

gitolite role git at cupani.torproject.org
Thu Apr 14 13:33:36 UTC 2022


This is an automated email from the git hooks/post-receive script.

dgoulet pushed a commit to branch main
in repository tor.

commit 7ce3edaee0eaf93f2f8fa60b859cf49118b9523b
Author: David Goulet <dgoulet at torproject.org>
AuthorDate: Wed Mar 2 13:20:07 2022 -0500

    rephist: Introduce a fraction and period for overload onionskin
    
    This code was heavily reused from the previous DNS timeout work done in
    ticket #40491 that was removed afterall from our code.
    
    Closes #40560
    
    Signed-off-by: David Goulet <dgoulet at torproject.org>
---
 src/feature/relay/onion_queue.c |   2 -
 src/feature/stats/rephist.c     | 208 +++++++++++++++++++++++++++++++++++-----
 src/feature/stats/rephist.h     |  10 ++
 3 files changed, 195 insertions(+), 25 deletions(-)

diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c
index 85ec0dc74a..c4023509ad 100644
--- a/src/feature/relay/onion_queue.c
+++ b/src/feature/relay/onion_queue.c
@@ -166,8 +166,6 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
       RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL);
     if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) {
       char *m;
-      /* Note this ntor onionskin drop as an overload */
-      rep_hist_note_overload(OVERLOAD_GENERAL);
       if ((m = rate_limit_log(&last_warned, approx_time()))) {
         log_warn(LD_GENERAL,
                  "Your computer is too slow to handle this many circuit "
diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c
index 2bfa14d326..ae4420ae50 100644
--- a/src/feature/stats/rephist.c
+++ b/src/feature/stats/rephist.c
@@ -291,25 +291,6 @@ overload_general_dns_assessment(void)
     approx_time() + overload_dns_timeout_period_secs;
 }
 
-/** Called just before the consensus will be replaced. Update the consensus
- * parameters in case they changed. */
-void
-rep_hist_consensus_has_changed(const networkstatus_t *ns)
-{
-  overload_dns_timeout_fraction =
-    networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
-                            OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
-                            OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
-                            OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
-    OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
-
-  overload_dns_timeout_period_secs =
-    networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
-                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
-                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
-                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
-}
-
 /** Note a DNS error for the given given libevent DNS record type and error
  * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA.
  *
@@ -1913,12 +1894,144 @@ STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
 STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
 /**@}*/
 
+/** Counters keeping the same stats as above but for the entire duration of the
+ * process (not reset). */
+static uint64_t stats_n_onionskin_assigned[MAX_ONION_STAT_TYPE+1] = {0};
+static uint64_t stats_n_onionskin_dropped[MAX_ONION_STAT_TYPE+1] = {0};
+
+/* We use a scale here so we can represent percentages with decimal points by
+ * scaling the value by this factor and so 0.5% becomes a value of 500.
+ * Default is 1% and thus min and max range is 0 to 100%. */
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE 1000.0
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT 1000
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN 0
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX 100000
+
+/** Consensus parameter: indicate what fraction of ntor onionskin drop over the
+ * total number of requests must be reached before we trigger a general
+ * overload signal.*/
+static double overload_onionskin_ntor_fraction =
+   OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT /
+   OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
+
+/* Number of seconds for the assessment period. Default is 6 hours (21600) and
+ * the min max range is within a 32bit value. We align this period to the
+ * Heartbeat so the logs would match this period more or less. */
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT (60 * 60 * 6)
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN 0
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX INT32_MAX
+
+/** Consensus parameter: Period, in seconds, over which we count the number of
+ * ntor onionskins requests and how many were dropped. After that period, we
+ * assess if we trigger an overload or not. */
+static int32_t overload_onionskin_ntor_period_secs =
+  OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT;
+
+/** Structure containing information for an assessment period of the onionskin
+ * drop overload general signal.
+ *
+ * It is used to track, within a time period, how many requests we've gotten
+ * and how many were dropped. The overload general signal is decided from these
+ * depending on some consensus parameters. */
+typedef struct {
+  /** Total number of ntor onionskin requested for an assessment period. */
+  uint64_t n_ntor_requested;
+
+  /** Total number of dropped ntor onionskins for an assessment period. */
+  uint64_t n_ntor_dropped;
+
+  /** When is the next assessment time of the general overload for ntor
+   * onionskin drop. Once this time is reached, all stats are reset and this
+   * time is set to the next assessment time. */
+  time_t next_assessment_time;
+} overload_onionskin_assessment_t;
+
+/** Keep track of the onionskin requests for an assessment period. */
+static overload_onionskin_assessment_t overload_onionskin_assessment;
+
+/**
+ * We combine ntorv3 and ntor into the same stat, so we must
+ * use this function to covert the cell type to a stat index.
+ */
+static inline uint16_t
+onionskin_type_to_stat(uint16_t type)
+{
+  if (BUG(type > MAX_ONION_STAT_TYPE)) {
+    return MAX_ONION_STAT_TYPE; // use ntor if out of range
+  }
+
+  return type;
+}
+
+/** Assess our ntor handshake statistics and decide if we need to emit a
+ * general overload signal.
+ *
+ * Regardless of overloaded or not, if the assessment time period has passed,
+ * the stats are reset back to 0 and the assessment time period updated.
+ *
+ * This is called when a ntor handshake is _requested_ because we want to avoid
+ * to have an assymetric situation where requested counter is reset to 0 but
+ * then a drop happens leading to the drop counter being incremented while the
+ * requested counter is 0. */
+static void
+overload_general_onionskin_assessment(void)
+{
+  /* Initialize the time. Should be done once. */
+  if (overload_onionskin_assessment.next_assessment_time == 0) {
+    goto reset;
+  }
+
+  /* Not the time yet. */
+  if (overload_onionskin_assessment.next_assessment_time > approx_time()) {
+    goto done;
+  }
+
+  /* Make sure we have enough requests to be able to make a proper assessment.
+   * We want to avoid 1 single request/drop to trigger an overload as we want
+   * at least the number of requests to be above the scale of our fraction. */
+  if (overload_onionskin_assessment.n_ntor_requested <
+      OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE) {
+    goto done;
+  }
+
+  /* Lets see if we can signal a general overload. */
+  double fraction = (double) overload_onionskin_assessment.n_ntor_dropped /
+                    (double) overload_onionskin_assessment.n_ntor_requested;
+  if (fraction >= overload_onionskin_ntor_fraction) {
+    log_notice(LD_HIST, "General overload -> Ntor dropped (%" PRIu64 ") "
+               "fraction %.4f%% is above threshold of %.4f%%",
+               overload_onionskin_assessment.n_ntor_dropped,
+               fraction * 100.0,
+               overload_onionskin_ntor_fraction * 100.0);
+    rep_hist_note_overload(OVERLOAD_GENERAL);
+  }
+
+ reset:
+  /* Reset counters for the next period. */
+  overload_onionskin_assessment.n_ntor_dropped = 0;
+  overload_onionskin_assessment.n_ntor_requested = 0;
+  overload_onionskin_assessment.next_assessment_time =
+    approx_time() + overload_onionskin_ntor_period_secs;
+
+ done:
+  return;
+}
+
 /** A new onionskin (using the <b>type</b> handshake) has arrived. */
 void
 rep_hist_note_circuit_handshake_requested(uint16_t type)
 {
-  if (type <= MAX_ONION_HANDSHAKE_TYPE)
-    onion_handshakes_requested[type]++;
+  uint16_t stat = onionskin_type_to_stat(type);
+
+  onion_handshakes_requested[stat]++;
+
+  /* Only relays get to record requested onionskins. */
+  if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
+    /* Assess if we've reached the overload general signal. */
+    overload_general_onionskin_assessment();
+
+    overload_onionskin_assessment.n_ntor_requested++;
+  }
 }
 
 /** We've sent an onionskin (using the <b>type</b> handshake) to a
@@ -1926,8 +2039,24 @@ rep_hist_note_circuit_handshake_requested(uint16_t type)
 void
 rep_hist_note_circuit_handshake_assigned(uint16_t type)
 {
-  if (type <= MAX_ONION_HANDSHAKE_TYPE)
-    onion_handshakes_assigned[type]++;
+  onion_handshakes_assigned[onionskin_type_to_stat(type)]++;
+  stats_n_onionskin_assigned[onionskin_type_to_stat(type)]++;
+}
+
+/** We've just drop an onionskin (using the <b>type</b> handshake) due to being
+ * overloaded. */
+void
+rep_hist_note_circuit_handshake_dropped(uint16_t type)
+{
+  uint16_t stat = onionskin_type_to_stat(type);
+
+  stats_n_onionskin_dropped[stat]++;
+
+  /* Only relays get to record requested onionskins. */
+  if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
+    /* Note the dropped ntor in the overload assessment object. */
+    overload_onionskin_assessment.n_ntor_dropped++;
+  }
 }
 
 /** Get the circuit handshake value that is requested. */
@@ -2522,6 +2651,39 @@ rep_hist_free_all(void)
   tor_assert_nonfatal_once(rephist_total_num == 0);
 }
 
+/** Called just before the consensus will be replaced. Update the consensus
+ * parameters in case they changed. */
+void
+rep_hist_consensus_has_changed(const networkstatus_t *ns)
+{
+  overload_onionskin_ntor_fraction =
+    networkstatus_get_param(ns, "overload_onionskin_ntor_scale_percent",
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT,
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN,
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX) /
+    OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
+
+  overload_onionskin_ntor_period_secs =
+    networkstatus_get_param(ns, "overload_onionskin_ntor_period_secs",
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT,
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN,
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX);
+
+  /** XXX: Unused parameters. */
+  overload_dns_timeout_fraction =
+    networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
+    OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
+
+  overload_dns_timeout_period_secs =
+    networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
+}
+
 #ifdef TOR_UNIT_TESTS
 /* only exists for unit tests: get HSv2 stats object */
 const hs_v2_stats_t *
diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h
index 749b4996a8..891827c8e6 100644
--- a/src/feature/stats/rephist.h
+++ b/src/feature/stats/rephist.h
@@ -58,6 +58,7 @@ time_t rep_hist_desc_stats_write(time_t now);
 
 void rep_hist_note_circuit_handshake_requested(uint16_t type);
 void rep_hist_note_circuit_handshake_assigned(uint16_t type);
+void rep_hist_note_circuit_handshake_dropped(uint16_t type);
 void rep_hist_log_circuit_handshake_stats(time_t now);
 
 MOCK_DECL(int, rep_hist_get_circuit_handshake_requested, (uint16_t type));
@@ -79,8 +80,17 @@ void rep_hist_free_all(void);
 void rep_hist_note_negotiated_link_proto(unsigned link_proto,
                                          int started_here);
 void rep_hist_log_link_protocol_counts(void);
+
+uint64_t rep_hist_get_n_dns_error(int type, uint8_t error);
+uint64_t rep_hist_get_n_dns_request(int type);
+void rep_hist_note_dns_request(int type);
+void rep_hist_note_dns_error(int type, uint8_t error);
+
 void rep_hist_consensus_has_changed(const networkstatus_t *ns);
 
+/** We have 3 stat types: tap, fast, and ntor. The max type is ntor (2) */
+#define MAX_ONION_STAT_TYPE   ONION_HANDSHAKE_TYPE_NTOR
+
 extern uint64_t rephist_total_alloc;
 extern uint32_t rephist_total_num;
 #ifdef TOR_UNIT_TESTS

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the tor-commits mailing list