[tor-commits] [tor/master] Add two hidden-service related statistics.

nickm at torproject.org nickm at torproject.org
Fri Dec 19 15:39:27 UTC 2014


commit 14e83e626be84c4f311b8e8f0d80ca141675fa9e
Author: George Kadianakis <desnacked at riseup.net>
Date:   Tue Dec 2 12:20:35 2014 +0000

    Add two hidden-service related statistics.
    
    The two statistics are:
     1. number of RELAY cells observed on successfully established
        rendezvous circuits; and
     2. number of .onion addresses observed as hidden-service
        directory.
    
    Both statistics are accumulated over 24 hours, obfuscated by rounding
    up to the next multiple of a given number and adding random noise,
    and written to local file stats/hidserv-stats.
    
    Notably, no statistics will be gathered on clients or services, but
    only on relays.
---
 doc/tor.1.txt       |    5 ++
 src/or/command.c    |    9 +++
 src/or/config.c     |   13 ++++
 src/or/main.c       |    5 ++
 src/or/or.h         |    8 ++
 src/or/rendcommon.c |    7 ++
 src/or/rendmid.c    |    7 ++
 src/or/rephist.c    |  216 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/or/rephist.h    |    7 ++
 src/or/router.c     |    5 ++
 10 files changed, 282 insertions(+)

diff --git a/doc/tor.1.txt b/doc/tor.1.txt
index e876829..b09f322 100644
--- a/doc/tor.1.txt
+++ b/doc/tor.1.txt
@@ -1764,6 +1764,11 @@ is non-zero):
     When this option is enabled, Tor writes statistics on the bidirectional use
     of connections to disk every 24 hours. (Default: 0)
 
+[[HiddenServiceStatistics]] **HiddenServiceStatistics** **0**|**1**::
+    When this option is enabled, a Tor relay writes statistics on its role as
+    hidden-service directory, introduction point, or rendezvous point to disk
+    every 24 hours. (Default: 0)
+
 [[ExtraInfoStatistics]] **ExtraInfoStatistics** **0**|**1**::
     When this option is enabled, Tor includes previously gathered statistics in
     its extra-info documents that it uploads to the directory authorities.
diff --git a/src/or/command.c b/src/or/command.c
index 268c495..8e214bf 100644
--- a/src/or/command.c
+++ b/src/or/command.c
@@ -438,6 +438,7 @@ command_process_created_cell(cell_t *cell, channel_t *chan)
 static void
 command_process_relay_cell(cell_t *cell, channel_t *chan)
 {
+  const or_options_t *options = get_options();
   circuit_t *circ;
   int reason, direction;
 
@@ -511,6 +512,14 @@ command_process_relay_cell(cell_t *cell, channel_t *chan)
            direction==CELL_DIRECTION_OUT?"forward":"backward");
     circuit_mark_for_close(circ, -reason);
   }
+
+  /* If this is a cell in an RP circuit, count it as part of the
+     hidden service stats */
+  if (options->HiddenServiceStatistics &&
+      !CIRCUIT_IS_ORIGIN(circ) &&
+      TO_OR_CIRCUIT(circ)->circuit_carries_hs_traffic_stats) {
+    rep_hist_seen_new_rp_cell();
+  }
 }
 
 /** Process a 'destroy' <b>cell</b> that just arrived from
diff --git a/src/or/config.c b/src/or/config.c
index 28f1df0..9a6f952 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -268,6 +268,7 @@ static config_var_t option_vars_[] = {
   VAR("HiddenServicePort",   LINELIST_S, RendConfigLines,    NULL),
   VAR("HiddenServiceVersion",LINELIST_S, RendConfigLines,    NULL),
   VAR("HiddenServiceAuthorizeClient",LINELIST_S,RendConfigLines, NULL),
+  V(HiddenServiceStatistics,     BOOL,     "0"),
   V(HidServAuth,                 LINELIST, NULL),
   V(CloseHSClientCircuitsImmediatelyOnTimeout, BOOL, "0"),
   V(CloseHSServiceRendCircuitsImmediatelyOnTimeout, BOOL, "0"),
@@ -1714,6 +1715,7 @@ options_act(const or_options_t *old_options)
   if (options->CellStatistics || options->DirReqStatistics ||
       options->EntryStatistics || options->ExitPortStatistics ||
       options->ConnDirectionStatistics ||
+      options->HiddenServiceStatistics ||
       options->BridgeAuthoritativeDir) {
     time_t now = time(NULL);
     int print_notice = 0;
@@ -1722,6 +1724,7 @@ options_act(const or_options_t *old_options)
     if (!public_server_mode(options)) {
       options->CellStatistics = 0;
       options->EntryStatistics = 0;
+      options->HiddenServiceStatistics = 0;
       options->ExitPortStatistics = 0;
     }
 
@@ -1767,6 +1770,11 @@ options_act(const or_options_t *old_options)
         options->ConnDirectionStatistics) {
       rep_hist_conn_stats_init(now);
     }
+    if ((!old_options || !old_options->HiddenServiceStatistics) &&
+        options->HiddenServiceStatistics) {
+      log_info(LD_CONFIG, "Configured to measure hidden service statistics.");
+      rep_hist_hs_stats_init(now);
+    }
     if ((!old_options || !old_options->BridgeAuthoritativeDir) &&
         options->BridgeAuthoritativeDir) {
       rep_hist_desc_stats_init(now);
@@ -1778,6 +1786,8 @@ options_act(const or_options_t *old_options)
                  "data directory in 24 hours from now.");
   }
 
+  /* If we used to have statistics enabled but we just disabled them,
+     stop gathering them.  */
   if (old_options && old_options->CellStatistics &&
       !options->CellStatistics)
     rep_hist_buffer_stats_term();
@@ -1787,6 +1797,9 @@ options_act(const or_options_t *old_options)
   if (old_options && old_options->EntryStatistics &&
       !options->EntryStatistics)
     geoip_entry_stats_term();
+  if (old_options && old_options->HiddenServiceStatistics &&
+      !options->HiddenServiceStatistics)
+    rep_hist_hs_stats_term();
   if (old_options && old_options->ExitPortStatistics &&
       !options->ExitPortStatistics)
     rep_hist_exit_stats_term();
diff --git a/src/or/main.c b/src/or/main.c
index e78e9bf..160bfa0 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -1384,6 +1384,11 @@ run_scheduled_events(time_t now)
       if (next_write && next_write < next_time_to_write_stats_files)
         next_time_to_write_stats_files = next_write;
     }
+    if (options->HiddenServiceStatistics) {
+      time_t next_write = rep_hist_hs_stats_write(time_to_write_stats_files);
+      if (next_write && next_write < next_time_to_write_stats_files)
+        next_time_to_write_stats_files = next_write;
+    }
     if (options->ExitPortStatistics) {
       time_t next_write = rep_hist_exit_stats_write(time_to_write_stats_files);
       if (next_write && next_write < next_time_to_write_stats_files)
diff --git a/src/or/or.h b/src/or/or.h
index 0de3745..ee86697 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3204,6 +3204,10 @@ typedef struct or_circuit_t {
   /** True iff this circuit was made with a CREATE_FAST cell. */
   unsigned int is_first_hop : 1;
 
+  /** If set, this circuit carries HS traffic. Consider it in any HS
+   *  statistics. */
+  unsigned int circuit_carries_hs_traffic_stats : 1;
+
   /** Number of cells that were removed from circuit queue; reset every
    * time when writing buffer stats to disk. */
   uint32_t processed_cells;
@@ -3961,6 +3965,10 @@ typedef struct {
   /** If true, the user wants us to collect statistics as entry node. */
   int EntryStatistics;
 
+  /** If true, the user wants us to collect statistics as hidden service
+   * directory, introduction point, or rendezvous point. */
+  int HiddenServiceStatistics;
+
   /** If true, include statistics file contents in extra-info documents. */
   int ExtraInfoStatistics;
 
diff --git a/src/or/rendcommon.c b/src/or/rendcommon.c
index df74b74..e779ecf 100644
--- a/src/or/rendcommon.c
+++ b/src/or/rendcommon.c
@@ -924,6 +924,7 @@ rend_cache_lookup_v2_desc_as_dir(const char *desc_id, const char **desc)
 rend_cache_store_status_t
 rend_cache_store_v2_desc_as_dir(const char *desc)
 {
+  const or_options_t *options = get_options();
   rend_service_descriptor_t *parsed;
   char desc_id[DIGEST_LEN];
   char *intro_content;
@@ -1003,6 +1004,12 @@ rend_cache_store_v2_desc_as_dir(const char *desc)
     log_info(LD_REND, "Successfully stored service descriptor with desc ID "
                       "'%s' and len %d.",
              safe_str(desc_id_base32), (int)encoded_size);
+
+    /* Statistics: Note down this potentially new HS. */
+    if (options->HiddenServiceStatistics) {
+      rep_hist_stored_maybe_new_hs(e->parsed->pk);
+    }
+
     number_stored++;
     goto advance;
   skip:
diff --git a/src/or/rendmid.c b/src/or/rendmid.c
index 6a701e7..1c56471 100644
--- a/src/or/rendmid.c
+++ b/src/or/rendmid.c
@@ -281,6 +281,7 @@ int
 rend_mid_rendezvous(or_circuit_t *circ, const uint8_t *request,
                     size_t request_len)
 {
+  const or_options_t *options = get_options();
   or_circuit_t *rend_circ;
   char hexid[9];
   int reason = END_CIRC_REASON_INTERNAL;
@@ -316,6 +317,12 @@ rend_mid_rendezvous(or_circuit_t *circ, const uint8_t *request,
     goto err;
   }
 
+  /* Statistics: Mark this circuit as an RP circuit so that we collect
+     stats from it. */
+  if (options->HiddenServiceStatistics) {
+    circ->circuit_carries_hs_traffic_stats = 1;
+  }
+
   /* Send the RENDEZVOUS2 cell to Alice. */
   if (relay_send_command_from_edge(0, TO_CIRCUIT(rend_circ),
                                    RELAY_COMMAND_RENDEZVOUS2,
diff --git a/src/or/rephist.c b/src/or/rephist.c
index f1e8827..a190fc8 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -2908,11 +2908,227 @@ rep_hist_log_circuit_handshake_stats(time_t now)
   memset(onion_handshakes_requested, 0, sizeof(onion_handshakes_requested));
 }
 
+/* Hidden service statistics section */
+
+/** Start of the current hidden service stats interval or 0 if we're
+ * not collecting hidden service statistics. */
+static time_t start_of_hs_stats_interval;
+
+/** Carries the various hidden service statistics, and any other
+ *  information needed. */
+typedef struct hs_stats_t {
+  /** How many relay cells have we seen as rendezvous points? */
+  int64_t rp_relay_cells_seen;
+
+  /** Set of unique public key digests we've seen this stat period
+   * (could also be implemented as sorted smartlist). */
+  digestmap_t *onions_seen_this_period;
+} hs_stats_t;
+
+/** Our statistics structure singleton. */
+static hs_stats_t *hs_stats = NULL;
+
+/** Allocate, initialize and return an hs_stats_t structure. */
+static hs_stats_t *
+hs_stats_new(void)
+{
+  hs_stats_t * hs_stats = tor_malloc_zero(sizeof(hs_stats_t));
+  hs_stats->onions_seen_this_period = digestmap_new();
+
+  return hs_stats;
+}
+
+/** Free an hs_stats_t structure. */
+static void
+hs_stats_free(hs_stats_t *hs_stats)
+{
+  if (!hs_stats) {
+    return;
+  }
+
+  digestmap_free(hs_stats->onions_seen_this_period, NULL);
+  tor_free(hs_stats);
+}
+
+/** Initialize hidden service statistics. */
+void
+rep_hist_hs_stats_init(time_t now)
+{
+  if (!hs_stats) {
+    hs_stats = hs_stats_new();
+  }
+
+  start_of_hs_stats_interval = now;
+}
+
+/** Clear history of hidden service statistics and set the measurement
+ * interval start to <b>now</b>. */
+static void
+rep_hist_reset_hs_stats(time_t now)
+{
+  if (!hs_stats) {
+    hs_stats = hs_stats_new();
+  }
+
+  hs_stats->rp_relay_cells_seen = 0;
+
+  digestmap_free(hs_stats->onions_seen_this_period, NULL);
+  hs_stats->onions_seen_this_period = digestmap_new();
+
+  start_of_hs_stats_interval = now;
+}
+
+/** Stop collecting hidden service stats in a way that we can re-start
+ * doing so in rep_hist_buffer_stats_init(). */
+void
+rep_hist_hs_stats_term(void)
+{
+  rep_hist_reset_hs_stats(0);
+}
+
+/** We saw a new HS relay cell, Count it! */
+void
+rep_hist_seen_new_rp_cell(void)
+{
+  if (!hs_stats) {
+    return; // We're not collecting stats
+  }
+
+  hs_stats->rp_relay_cells_seen++;
+}
+
+/** As HSDirs, we saw another hidden service with public key
+ *  <b>pubkey</b>. Check whether we have counted it before, if not
+ *  count it now! */
+void
+rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey)
+{
+  char pubkey_hash[DIGEST_LEN];
+
+  if (!hs_stats) {
+    return; // We're not collecting stats
+  }
+
+  /* Get the digest of the pubkey which will be used to detect whether
+     we've seen this hidden service before or not.  */
+  if (crypto_pk_get_digest(pubkey, pubkey_hash) < 0) {
+    /*  This fail should not happen; key has been validated by
+        descriptor parsing code first. */
+    return;
+  }
+
+  /* Check if this is the first time we've seen this hidden
+     service. If it is, count it as new. */
+  if (!digestmap_get(hs_stats->onions_seen_this_period,
+                     pubkey_hash)) {
+    digestmap_set(hs_stats->onions_seen_this_period,
+                  pubkey_hash, (void*)(uintptr_t)1);
+  }
+}
+
+/* The number of cells that are supposed to be hidden from the adversary
+ * by adding noise from the Laplace distribution.  This value, divided by
+ * EPSILON, is Laplace parameter b. */
+#define REND_CELLS_DELTA_F 2048
+/* Security parameter for obfuscating number of cells with a value between
+ * 0 and 1.  Smaller values obfuscate observations more, but at the same
+ * time make statistics less usable. */
+#define REND_CELLS_EPSILON 0.3
+/* The number of cells that are supposed to be hidden from the adversary
+ * by rounding up to the next multiple of this number. */
+#define REND_CELLS_BIN_SIZE 1024
+/* The number of service identities that are supposed to be hidden from
+ * the adversary by adding noise from the Laplace distribution.  This
+ * value, divided by EPSILON, is Laplace parameter b. */
+#define ONIONS_SEEN_DELTA_F 8
+/* Security parameter for obfuscating number of service identities with a
+ * value between 0 and 1.  Smaller values obfuscate observations more, but
+ * at the same time make statistics less usable. */
+#define ONIONS_SEEN_EPSILON 0.3
+/* The number of service identities that are supposed to be hidden from
+ * the adversary by rounding up to the next multiple of this number. */
+#define ONIONS_SEEN_BIN_SIZE 8
+
+/** Allocate and return a string containing hidden service stats that
+ *  are meant to be placed in the extra-info descriptor. */
+static char *
+rep_hist_format_hs_stats(time_t now)
+{
+  char t[ISO_TIME_LEN+1];
+  char *hs_stats_string;
+  int64_t obfuscated_cells_seen;
+  int64_t obfuscated_onions_seen;
+
+  obfuscated_cells_seen = round_int64_to_next_multiple_of(
+                          hs_stats->rp_relay_cells_seen,
+                          REND_CELLS_BIN_SIZE);
+  obfuscated_cells_seen = add_laplace_noise(obfuscated_cells_seen,
+                          crypto_rand_double(),
+                          REND_CELLS_DELTA_F, REND_CELLS_EPSILON);
+  obfuscated_onions_seen = round_int64_to_next_multiple_of(digestmap_size(
+                           hs_stats->onions_seen_this_period),
+                           ONIONS_SEEN_BIN_SIZE);
+  obfuscated_onions_seen = add_laplace_noise(obfuscated_onions_seen,
+                           crypto_rand_double(), ONIONS_SEEN_DELTA_F,
+                           ONIONS_SEEN_EPSILON);
+
+  format_iso_time(t, now);
+  tor_asprintf(&hs_stats_string, "hidserv-stats-end %s (%d s)\n"
+               "hidserv-rend-relayed-cells "I64_FORMAT" delta_f=%d "
+                                           "epsilon=%.2f bin_size=%d\n"
+               "hidserv-dir-onions-seen "I64_FORMAT" delta_f=%d "
+                                        "epsilon=%.2f bin_size=%d\n",
+               t, (unsigned) (now - start_of_hs_stats_interval),
+               I64_PRINTF_ARG(obfuscated_cells_seen), REND_CELLS_DELTA_F,
+               REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE,
+               I64_PRINTF_ARG(obfuscated_onions_seen),
+               ONIONS_SEEN_DELTA_F,
+               ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE);
+
+  return hs_stats_string;
+}
+
+/** If 24 hours have passed since the beginning of the current HS
+ * stats period, write buffer stats to $DATADIR/stats/hidserv-stats
+ * (possibly overwriting an existing file) and reset counters.  Return
+ * when we would next want to write buffer stats or 0 if we never want to
+ * write. */
+time_t
+rep_hist_hs_stats_write(time_t now)
+{
+  char *str = NULL;
+
+  if (!start_of_hs_stats_interval) {
+    return 0; /* Not initialized. */
+  }
+
+  if (start_of_hs_stats_interval + WRITE_STATS_INTERVAL > now) {
+    goto done; /* Not ready to write */
+  }
+
+  /* Generate history string. */
+  str = rep_hist_format_hs_stats(now);
+
+  /* Reset HS history. */
+  rep_hist_reset_hs_stats(now);
+
+  /* Try to write to disk. */
+  if (!check_or_create_data_subdir("stats")) {
+    write_to_data_subdir("stats", "hidserv-stats", str,
+                         "hidden service stats");
+  }
+
+ done:
+  tor_free(str);
+  return start_of_hs_stats_interval + WRITE_STATS_INTERVAL;
+}
+
 /** Free all storage held by the OR/link history caches, by the
  * bandwidth history arrays, by the port history, or by statistics . */
 void
 rep_hist_free_all(void)
 {
+  hs_stats_free(hs_stats);
   digestmap_free(history_map, free_or_history);
   tor_free(read_array);
   tor_free(write_array);
diff --git a/src/or/rephist.h b/src/or/rephist.h
index d853fe2..8fd1599 100644
--- a/src/or/rephist.h
+++ b/src/or/rephist.h
@@ -99,6 +99,13 @@ void rep_hist_note_circuit_handshake_requested(uint16_t type);
 void rep_hist_note_circuit_handshake_assigned(uint16_t type);
 void rep_hist_log_circuit_handshake_stats(time_t now);
 
+void rep_hist_hs_stats_init(time_t now);
+void rep_hist_hs_stats_term(void);
+time_t rep_hist_hs_stats_write(time_t now);
+char *rep_hist_get_hs_stats_string(void);
+void rep_hist_seen_new_rp_cell(void);
+void rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey);
+
 void rep_hist_free_all(void);
 
 #endif
diff --git a/src/or/router.c b/src/or/router.c
index 01838b4..3e60880 100644
--- a/src/or/router.c
+++ b/src/or/router.c
@@ -2654,6 +2654,11 @@ extrainfo_dump_to_string(char **s_out, extrainfo_t *extrainfo,
                         "dirreq-stats-end", now, &contents) > 0) {
       smartlist_add(chunks, contents);
     }
+    if (options->HiddenServiceStatistics &&
+        load_stats_file("stats"PATH_SEPARATOR"hidserv-stats",
+                        "hidserv-stats-end", now, &contents) > 0) {
+      smartlist_add(chunks, contents);
+    }
     if (options->EntryStatistics &&
         load_stats_file("stats"PATH_SEPARATOR"entry-stats",
                         "entry-stats-end", now, &contents) > 0) {





More information about the tor-commits mailing list