[or-cvs] r14802: New code to implement proposal for local geoip stats. Only e (in tor/trunk: . doc src/common src/or)

nickm at seul.org nickm at seul.org
Thu May 29 02:29:36 UTC 2008


Author: nickm
Date: 2008-05-28 22:29:35 -0400 (Wed, 28 May 2008)
New Revision: 14802

Modified:
   tor/trunk/ChangeLog
   tor/trunk/configure.in
   tor/trunk/doc/TODO
   tor/trunk/src/common/util.c
   tor/trunk/src/or/config.c
   tor/trunk/src/or/connection_or.c
   tor/trunk/src/or/directory.c
   tor/trunk/src/or/geoip.c
   tor/trunk/src/or/main.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/router.c
   tor/trunk/src/or/test.c
Log:
New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/ChangeLog	2008-05-29 02:29:35 UTC (rev 14802)
@@ -104,6 +104,9 @@
       Robert Hogan. Fixes the first part of bug 681.
     - Make bridge authorities never serve extrainfo docs.
     - Allow comments in geoip file.
+    - New configure/torrc options (--enable-geoip-stats,
+      DirRecordUsageByCountry) to record how many IPs we've served directory
+      info to in each country code.
 
   o Minor features (security):
     - Reject requests for reverse-dns lookup of names in a private

Modified: tor/trunk/configure.in
===================================================================
--- tor/trunk/configure.in	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/configure.in	2008-05-29 02:29:35 UTC (rev 14802)
@@ -87,6 +87,13 @@
      ;;
 esac
 
+AC_ARG_ENABLE(geoip-stats,
+     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+
+if test "$enable_geoip_stats" = "yes"; then
+  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+fi
+
 AC_ARG_ENABLE(gcc-warnings,
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
 

Modified: tor/trunk/doc/TODO
===================================================================
--- tor/trunk/doc/TODO	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/doc/TODO	2008-05-29 02:29:35 UTC (rev 14802)
@@ -289,10 +289,10 @@
       too much.
   o teach geoip_parse_entry() to skip over lines that start with #, so we
     can put a little note at the top of the geoip file to say what it is.
-N d we should have an off-by-default way for relays to dump geoip data to
+  . we should have an off-by-default way for relays to dump geoip data to
     a file in their data directory, for measurement purposes.
-    - Basic implementation
-    - Include probability-of-selection
+    o Basic implementation
+N   - Include probability-of-selection
 R d let bridges set relaybandwidthrate as low as 5kb
 R - bug: if we launch using bridges, and then stop using bridges, we
     still have our bridges in our entryguards section, and may use them.

Modified: tor/trunk/src/common/util.c
===================================================================
--- tor/trunk/src/common/util.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/common/util.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -1559,7 +1559,6 @@
   tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
 #endif
   new_file->fd = -1;
-  tempname_len = strlen(fname)+16;
   tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
   new_file->filename = tor_strdup(fname);
   if (open_flags & O_APPEND) {
@@ -1577,8 +1576,7 @@
     new_file->rename_on_close = 1;
   }
 
-  if ((new_file->fd = open(open_name, open_flags, mode))
-      < 0) {
+  if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
     log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
         open_name, fname, strerror(errno));
     goto err;

Modified: tor/trunk/src/or/config.c
===================================================================
--- tor/trunk/src/or/config.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/config.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -179,6 +179,9 @@
   V(DirPolicy,                   LINELIST, NULL),
   V(DirPort,                     UINT,     "0"),
   OBSOLETE("DirPostPeriod"),
+#ifdef ENABLE_GEOIP_STATS
+  V(DirRecordUsageByCountry,     BOOL,     "0"),
+#endif
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),

Modified: tor/trunk/src/or/connection_or.c
===================================================================
--- tor/trunk/src/or/connection_or.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/connection_or.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -901,7 +901,7 @@
   } else {
     /* only report it to the geoip module if it's not a known router */
     if (!router_get_by_digest(conn->identity_digest))
-      geoip_note_client_seen(TO_CONN(conn)->addr, now);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
   }
   if (conn->handshake_state) {
     or_handshake_state_free(conn->handshake_state);

Modified: tor/trunk/src/or/directory.c
===================================================================
--- tor/trunk/src/or/directory.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/directory.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -2484,6 +2484,26 @@
       goto done;
     }
 
+#ifdef ENABLE_GEOIP_STATS
+    {
+      geoip_client_action_t act =
+        is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
+      uint32_t addr = conn->_base.addr;
+
+      if (conn->_base.linked_conn) {
+        connection_t *c = conn->_base.linked_conn;
+        if (c->type == CONN_TYPE_EXIT) {
+          circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
+          if (! CIRCUIT_IS_ORIGIN(circ)) {
+            or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
+            addr = orconn->_base.addr;
+          }
+        }
+      }
+      geoip_note_client_seen(act, addr, time(NULL));
+    }
+#endif
+
     // note_request(request_type,dlen);
     (void) request_type;
     write_http_response_header(conn, -1, compressed,

Modified: tor/trunk/src/or/geoip.c
===================================================================
--- tor/trunk/src/or/geoip.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/geoip.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -131,7 +131,7 @@
  *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  * integers, and CC is a country code.
- * 
+ *
  * It also recognizes, and skips over, blank lines and lines that start
  * with '#' (comments).
  */
@@ -208,9 +208,12 @@
 typedef struct clientmap_entry_t {
   HT_ENTRY(clientmap_entry_t) node;
   uint32_t ipaddr;
-  time_t last_seen;
+  time_t last_seen; /* The last 2 bits of this value hold the client
+                     * operation. */
 } clientmap_entry_t;
 
+#define ACTION_MASK 3
+
 /** Map from client IP address to last time seen. */
 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
      HT_INITIALIZER();
@@ -238,12 +241,28 @@
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  * at time <b>now</b>. Ignored by all but bridges. */
 void
-geoip_note_client_seen(uint32_t addr, time_t now)
+geoip_note_client_seen(geoip_client_action_t action,
+                       uint32_t addr, time_t now)
 {
   or_options_t *options = get_options();
   clientmap_entry_t lookup, *ent;
-  if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+  if (action == GEOIP_CLIENT_CONNECT) {
+    if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+      return;
+  } else {
+#ifndef ENABLE_GEOIP_STATS
     return;
+#else
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirRecordUsageByCountry)
+      return;
+#endif
+  }
+
+  /* We use the low 3 bits of the time to encode the action. Since we're
+   * potentially remembering times of clients, we don't want to make
+   * clientmap_entry_t larger than it has to be. */
+  now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
   lookup.ipaddr = addr;
   ent = HT_FIND(clientmap, &client_history, &lookup);
   if (ent) {
@@ -328,7 +347,7 @@
  * that country, and cc is a lowercased country code.  Returns NULL if we don't
  * want to export geoip data yet. */
 char *
-geoip_get_client_history(time_t now)
+geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
   char *result = NULL;
   if (!geoip_is_loaded())
@@ -343,7 +362,10 @@
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     HT_FOREACH(ent, clientmap, &client_history) {
-      int country = geoip_get_country_by_ip((*ent)->ipaddr);
+      int country;
+      if (((*ent)->last_seen & ACTION_MASK) != action)
+        continue;
+      country = geoip_get_country_by_ip((*ent)->ipaddr);
       if (country < 0)
         continue;
       tor_assert(0 <= country && country < n_countries);
@@ -404,6 +426,41 @@
   return result;
 }
 
+void
+dump_geoip_stats(void)
+{
+#ifdef ENABLE_GEOIP_STATS
+  time_t now = time(NULL);
+  char *filename = get_datadir_fname("geoip-stats");
+  char *data_v2 = NULL, *data_v3 = NULL;
+  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+  open_file_t *open_file = NULL;
+  FILE *out;
+
+  data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+  data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+  format_iso_time(since, geoip_get_history_start());
+  format_iso_time(written, now);
+  if (!data_v2 || !data_v3)
+    goto done;
+  out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+  if (!out)
+    goto done;
+  if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
+              written, since, data_v3, data_v2) < 0)
+    goto done;
+
+  finish_writing_to_file(open_file);
+  open_file = NULL;
+ done:
+  if (open_file)
+    abort_writing_to_file(open_file);
+  tor_free(filename);
+  tor_free(data_v2);
+  tor_free(data_v3);
+#endif
+}
+
 /** Helper used to implement GETINFO ip-to-country/... controller command. */
 int
 getinfo_helper_geoip(control_connection_t *control_conn,

Modified: tor/trunk/src/or/main.c
===================================================================
--- tor/trunk/src/or/main.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/main.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -832,6 +832,7 @@
   static time_t time_to_clean_caches = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
+  static time_t time_to_dump_geoip_stats = 0;
   or_options_t *options = get_options();
   int i;
   int have_dir_info;
@@ -958,6 +959,12 @@
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
   }
 
+  if (time_to_dump_geoip_stats < now) {
+#define DUMP_GEOIP_STATS_INTERVAL (60*60);
+    time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
+    dump_geoip_stats();
+  }
+
   /** 2. Periodically, we consider getting a new directory, getting a
    * new running-routers list, and/or force-uploading our descriptor
    * (if we've passed our internal checks). */

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/or.h	2008-05-29 02:29:35 UTC (rev 14802)
@@ -2358,6 +2358,10 @@
    * count of how many client addresses have contacted us so that we can help
    * the bridge authority guess which countries have blocked access to us. */
   int BridgeRecordUsageByCountry;
+#ifdef ENABLE_GEOIP_STATS
+  int DirRecordUsageByCountry;
+#endif
+
   /** Optionally, a file with GeoIP data. */
   char *GeoIPFile;
 
@@ -3294,13 +3298,27 @@
 int geoip_get_n_countries(void);
 const char *geoip_get_country_name(int num);
 int geoip_is_loaded(void);
-void geoip_note_client_seen(uint32_t addr, time_t now);
+/** Indicates an action that we might be noting geoip statistics on.
+ * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
+ * the others, we're not.
+ */
+typedef enum {
+  /** We've noticed a connection as a bridge relay. */
+  GEOIP_CLIENT_CONNECT = 0,
+  /** We've served a networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS = 1,
+  /** We've served a v2 networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
+} geoip_client_action_t;
+void geoip_note_client_seen(geoip_client_action_t action,
+                            uint32_t addr, time_t now);
 void geoip_remove_old_clients(time_t cutoff);
 time_t geoip_get_history_start(void);
-char *geoip_get_client_history(time_t now);
+char *geoip_get_client_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
 void geoip_free_all(void);
+void dump_geoip_stats(void);
 
 /********************************* hibernate.c **********************/
 

Modified: tor/trunk/src/or/router.c
===================================================================
--- tor/trunk/src/or/router.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/router.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -1830,7 +1830,7 @@
       geoip_remove_old_clients(now-48*60*60);
       last_purged_at = now;
     }
-    geoip_summary = geoip_get_client_history(time(NULL));
+    geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
     if (geoip_summary) {
       char geoip_start[ISO_TIME_LEN+1];
       format_iso_time(geoip_start, geoip_get_history_start());

Modified: tor/trunk/src/or/test.c
===================================================================
--- tor/trunk/src/or/test.c	2008-05-29 01:45:29 UTC (rev 14801)
+++ tor/trunk/src/or/test.c	2008-05-29 02:29:35 UTC (rev 14802)
@@ -3908,28 +3908,28 @@
   get_options()->BridgeRecordUsageByCountry = 1;
   /* Put 9 observations in AB... */
   for (i=32; i < 40; ++i)
-    geoip_note_client_seen(i, now);
-  geoip_note_client_seen(225, now);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
+  geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
   /* and 3 observations in XY, several times. */
   for (j=0; j < 10; ++j)
     for (i=52; i < 55; ++i)
-      geoip_note_client_seen(i, now-3600);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
   /* and 17 observations in ZZ... */
   for (i=110; i < 127; ++i)
-    geoip_note_client_seen(i, now-7200);
-  s = geoip_get_client_history(now+5*24*60*60);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_streq("zz=24,ab=16", s);
   tor_free(s);
 
   /* Now clear out all the zz observations. */
   geoip_remove_old_clients(now-6000);
-  s = geoip_get_client_history(now+5*24*60*60);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(! s); /* There are only 12 observations left.  Not enough to
                        build an answer.  Add 4 more in XY... */
   for (i=55; i < 59; ++i)
-    geoip_note_client_seen(i, now-3600);
-  s = geoip_get_client_history(now+5*24*60*60);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_streq("ab=16", s);
   tor_free(s);



More information about the tor-commits mailing list