[or-cvs] [tor/maint-0.2.2 1/2] Report only the top 10 ports in exit-port stats.

nickm at torproject.org nickm at torproject.org
Mon Nov 29 20:25:32 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue, 23 Nov 2010 21:09:12 +0100
Subject: Report only the top 10 ports in exit-port stats.
Commit: 4fed43ab2e84eacac738bce6ae74d41b71eb614c

---
 changes/task2196 |    5 ++
 src/or/rephist.c |  122 ++++++++++++++++++++++++++++++++++--------------------
 src/test/test.c  |   17 ++++++++
 3 files changed, 99 insertions(+), 45 deletions(-)
 create mode 100644 changes/task2196

diff --git a/changes/task2196 b/changes/task2196
new file mode 100644
index 0000000..e629fcc
--- /dev/null
+++ b/changes/task2196
@@ -0,0 +1,5 @@
+  o Minor features:
+    - Report only the top 10 ports in exit-port stats in order not to
+      exceed the maximum extra-info descriptor length of 50 KB. Implements
+      task 2196.
+
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 22b3ec5..cdb596b 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -1943,9 +1943,8 @@ dump_pk_ops(int severity)
 #define EXIT_STATS_ROUND_UP_STREAMS 4
 /** Number of TCP ports */
 #define EXIT_STATS_NUM_PORTS 65536
-/** Reciprocal of threshold (= 0.01%) of total bytes that a port needs to
- * see in order to be included in exit stats. */
-#define EXIT_STATS_THRESHOLD_RECIPROCAL 10000
+/** Top n ports that will be included in exit stats. */
+#define EXIT_STATS_TOP_N_PORTS 10
 
 /* The following data structures are arrays and no fancy smartlists or maps,
  * so that all write operations can be done in constant time. This comes at
@@ -1995,15 +1994,23 @@ rep_hist_exit_stats_term(void)
   tor_free(exit_streams);
 }
 
+/** Helper: compare two ints. */
+static int
+_compare_int(const void *x, const void *y) {
+  return (*(int*)x - *(int*)y);
+}
+
 /** Return a newly allocated string containing the exit port statistics
  * until <b>now</b>, or NULL if we're not collecting exit stats. */
 char *
 rep_hist_format_exit_stats(time_t now)
 {
-  int i;
-  uint64_t total_bytes = 0, threshold_bytes, other_read = 0,
-           other_written = 0;
-  uint32_t other_streams = 0;
+  int i, j, top_elements = 0, cur_min_idx = 0, cur_port;
+  uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS];
+  int top_ports[EXIT_STATS_TOP_N_PORTS];
+  uint64_t cur_bytes = 0, other_read = 0, other_written = 0,
+           total_read = 0, total_written = 0;
+  uint32_t total_streams = 0, other_streams = 0;
   char *buf;
   smartlist_t *written_strings, *read_strings, *streams_strings;
   char *written_string, *read_string, *streams_string;
@@ -2013,52 +2020,75 @@ rep_hist_format_exit_stats(time_t now)
   if (!start_of_exit_stats_interval)
     return NULL; /* Not initialized. */
 
-  /* Count total number of bytes, so that we can attribute observations
-   * below or equal to a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL
-   * of all bytes to a special port 'other'. */
+  /* Go through all ports to find the n ports that saw most written and
+   * read bytes. */
   for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
-    total_bytes += exit_bytes_read[i];
-    total_bytes += exit_bytes_written[i];
+    total_read += exit_bytes_read[i];
+    total_written += exit_bytes_written[i];
+    total_streams += exit_streams[i];
+    cur_bytes = exit_bytes_read[i] + exit_bytes_written[i];
+    if (cur_bytes == 0) {
+      continue;
+    }
+    if (top_elements < EXIT_STATS_TOP_N_PORTS) {
+      top_bytes[top_elements] = cur_bytes;
+      top_ports[top_elements++] = i;
+    } else if (cur_bytes > top_bytes[cur_min_idx]) {
+      top_bytes[cur_min_idx] = cur_bytes;
+      top_ports[cur_min_idx] = i;
+    } else {
+      continue;
+    }
+    cur_min_idx = 0;
+    for (j = 1; j < top_elements; j++) {
+      if (top_bytes[j] < top_bytes[cur_min_idx]) {
+        cur_min_idx = j;
+      }
+    }
   }
-  threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL;
 
-  /* Add observations of all ports above the threshold to smartlists and
-   * join them to single strings. Also count bytes and streams of ports
-   * below or equal to the threshold. */
+  /* Add observations of top ports to smartlists. */
   written_strings = smartlist_create();
   read_strings = smartlist_create();
   streams_strings = smartlist_create();
-  for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
-    if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) {
-      if (exit_bytes_written[i] > 0) {
-        uint64_t num = round_uint64_to_next_multiple_of(
-                       exit_bytes_written[i], EXIT_STATS_ROUND_UP_BYTES);
-        num /= 1024;
-        buf = NULL;
-        tor_asprintf(&buf, "%d="U64_FORMAT, i, U64_PRINTF_ARG(num));
-        smartlist_add(written_strings, buf);
-      }
-      if (exit_bytes_read[i] > 0) {
-        uint64_t num = round_uint64_to_next_multiple_of(
-                       exit_bytes_read[i], EXIT_STATS_ROUND_UP_BYTES);
-        num /= 1024;
-        buf = NULL;
-        tor_asprintf(&buf, "%d="U64_FORMAT, i, U64_PRINTF_ARG(num));
-        smartlist_add(read_strings, buf);
-      }
-      if (exit_streams[i] > 0) {
-        uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i],
-                       EXIT_STATS_ROUND_UP_STREAMS);
-        buf = NULL;
-        tor_asprintf(&buf, "%d=%u", i, num);
-        smartlist_add(streams_strings, buf);
-      }
-    } else {
-      other_read += exit_bytes_read[i];
-      other_written += exit_bytes_written[i];
-      other_streams += exit_streams[i];
+  other_read = total_read;
+  other_written = total_written;
+  other_streams = total_streams;
+  qsort(top_ports, top_elements, sizeof(int), _compare_int);
+  for (j = 0; j < top_elements; j++) {
+    cur_port = top_ports[j];
+    if (exit_bytes_written[cur_port] > 0) {
+      uint64_t num = round_uint64_to_next_multiple_of(
+                     exit_bytes_written[cur_port],
+                     EXIT_STATS_ROUND_UP_BYTES);
+      num /= 1024;
+      buf = NULL;
+      tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num));
+      smartlist_add(written_strings, buf);
+      other_written -= exit_bytes_written[cur_port];
+    }
+    if (exit_bytes_read[cur_port] > 0) {
+      uint64_t num = round_uint64_to_next_multiple_of(
+                     exit_bytes_read[cur_port],
+                     EXIT_STATS_ROUND_UP_BYTES);
+      num /= 1024;
+      buf = NULL;
+      tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num));
+      smartlist_add(read_strings, buf);
+      other_read -= exit_bytes_read[cur_port];
+    }
+    if (exit_streams[cur_port] > 0) {
+      uint32_t num = round_uint32_to_next_multiple_of(
+                     exit_streams[cur_port],
+                     EXIT_STATS_ROUND_UP_STREAMS);
+      buf = NULL;
+      tor_asprintf(&buf, "%d=%u", cur_port, num);
+      smartlist_add(streams_strings, buf);
+      other_streams -= exit_streams[cur_port];
     }
   }
+
+  /* Add observations of other ports in a single element. */
   other_written = round_uint64_to_next_multiple_of(other_written,
                   EXIT_STATS_ROUND_UP_BYTES);
   other_written /= 1024;
@@ -2076,6 +2106,8 @@ rep_hist_format_exit_stats(time_t now)
   buf = NULL;
   tor_asprintf(&buf, "other=%u", other_streams);
   smartlist_add(streams_strings, buf);
+
+  /* Join all observations in single strings. */
   written_string = smartlist_join_strings(written_strings, ",", 0, NULL);
   read_string = smartlist_join_strings(read_strings, ",", 0, NULL);
   streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL);
diff --git a/src/test/test.c b/src/test/test.c
index 8d8c46f..8782ef5 100644
--- a/src/test/test.c
+++ b/src/test/test.c
@@ -1081,6 +1081,7 @@ test_stats(void)
 {
   time_t now = 1281533250; /* 2010-08-11 13:27:30 UTC */
   char *s = NULL;
+  int i;
 
   /* We shouldn't collect exit stats without initializing them. */
   rep_hist_note_exit_stream_opened(80);
@@ -1103,6 +1104,22 @@ test_stats(void)
              "exit-streams-opened 80=4,443=4,other=0\n", s);
   tor_free(s);
 
+  /* Add a few bytes on 10 more ports and ensure that only the top 10
+   * ports are contained in the history string. */
+  for (i = 50; i < 60; i++) {
+    rep_hist_note_exit_bytes(i, i, i);
+    rep_hist_note_exit_stream_opened(i);
+  }
+  s = rep_hist_format_exit_stats(now + 86400);
+  test_streq("exit-stats-end 2010-08-12 13:27:30 (86400 s)\n"
+             "exit-kibibytes-written 52=1,53=1,54=1,55=1,56=1,57=1,58=1,"
+             "59=1,80=1,443=1,other=1\n"
+             "exit-kibibytes-read 52=1,53=1,54=1,55=1,56=1,57=1,58=1,"
+             "59=1,80=10,443=20,other=1\n"
+             "exit-streams-opened 52=4,53=4,54=4,55=4,56=4,57=4,58=4,"
+             "59=4,80=4,443=4,other=4\n", s);
+  tor_free(s);
+
   /* Stop collecting stats, add some bytes, and ensure we don't generate
    * a history string. */
   rep_hist_exit_stats_term();
-- 
1.7.1




More information about the tor-commits mailing list