[or-cvs] r12845: First wodge of geoip code so bridges can figure out which co (in tor/trunk: . doc src/or)

nickm at seul.org nickm at seul.org
Mon Dec 17 22:44:12 UTC 2007


Author: nickm
Date: 2007-12-17 17:44:11 -0500 (Mon, 17 Dec 2007)
New Revision: 12845

Added:
   tor/trunk/src/or/geoip.c
Modified:
   tor/trunk/
   tor/trunk/doc/TODO
   tor/trunk/src/or/Makefile.am
   tor/trunk/src/or/config.c
   tor/trunk/src/or/connection_or.c
   tor/trunk/src/or/main.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/rephist.c
Log:
 r15530 at tombo:  nickm | 2007-12-17 16:54:03 -0500
 First wodge of geoip code so bridges can figure out which countries are blocking them.



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r15530] on d9e39d38-0f13-419c-a857-e10a0ce2aa0c

Modified: tor/trunk/doc/TODO
===================================================================
--- tor/trunk/doc/TODO	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/doc/TODO	2007-12-17 22:44:11 UTC (rev 12845)
@@ -28,20 +28,32 @@
   - mirror tor downloads on (via) tor dir caches
 R   . spec
     d deploy
-  - geoip caching and publishing for bridges
+  . geoip caching and publishing for bridges
 R   . spec
-?   - deploy
+    - Implement
+      . Code to load a geoip file from disk
+        o Truncated format
+        - Full format.
+        o Actually invoke
+      o Code to store a GEOIP file in memory.
+      o Code to remember client IPs.
+      . Code to generate history lines
+        - Make history lines match spec.
+      - Controller interface
+      - Track consecutive time up, not time since last-forgotten IP.
+      - Add log lines.
+    - Tests
     d let Vidalia use the geoip data too rather than doing its own
       anonymized queries
-  - bridge address disbursal strategies
+  o bridge address disbursal strategies
     o get the cached-descriptors* to bridges at moria
-    - parse out bridge addresses from cached-descriptors*
+    o parse out bridge addresses from cached-descriptors*
       (or parse them out before Tonga sends them)
       (or get Tonga's Tor to write them out better in the first place)
-N   * answer by IP/timestamp
-      - run a little web server on moria?
-N   d answer by answering email to bridges at torproject
-      - keep track of which addresses have been answered already
+    o answer by IP/timestamp
+      o run a little web server on moria?
+    o answer by answering email to bridges at torproject
+      o keep track of which addresses have been answered already
 R - bridge communities
     - spec
     - deploy

Modified: tor/trunk/src/or/Makefile.am
===================================================================
--- tor/trunk/src/or/Makefile.am	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/Makefile.am	2007-12-17 22:44:11 UTC (rev 12845)
@@ -16,7 +16,7 @@
 	circuituse.c command.c config.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
-	dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \
+	dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \
 	networkstatus.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \
@@ -38,7 +38,7 @@
 	circuituse.c command.c config.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
-	dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \
+	dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \
 	networkstatus.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \

Modified: tor/trunk/src/or/config.c
===================================================================
--- tor/trunk/src/or/config.c	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/config.c	2007-12-17 22:44:11 UTC (rev 12845)
@@ -150,6 +150,7 @@
   V(BandwidthRate,               MEMUNIT,  "5 MB"),
   V(BridgeAuthoritativeDir,      BOOL,     "0"),
   VAR("Bridge",                  LINELIST, Bridges,    NULL),
+  V(BridgeRecordUsageByCountry,  BOOL,     "1"),
   V(BridgeRelay,                 BOOL,     "0"),
   V(CircuitBuildTimeout,         INTERVAL, "1 minute"),
   V(CircuitIdleTimeout,          INTERVAL, "1 hour"),
@@ -191,6 +192,7 @@
   V(FetchServerDescriptors,      BOOL,     "1"),
   V(FetchHidServDescriptors,     BOOL,     "1"),
   V(FetchUselessDescriptors,     BOOL,     "0"),
+  V(GEOIPFile,                   STRING,   NULL),
   V(Group,                       STRING,   NULL),
   V(HardwareAccel,               BOOL,     "0"),
   V(HashedControlPassword,       LINELIST, NULL),
@@ -1214,6 +1216,12 @@
       init_keys();
   }
 
+  /* Maybe load geoip file */
+  if (options->GEOIPFile &&
+      ((!old_options || !opt_streq(old_options->GEOIPFile, options->GEOIPFile))
+       || !geoip_is_loaded())) {
+    geoip_load_file(options->GEOIPFile);
+  }
   /* Check if we need to parse and add the EntryNodes config option. */
   if (options->EntryNodes &&
       (!old_options ||

Modified: tor/trunk/src/or/connection_or.c
===================================================================
--- tor/trunk/src/or/connection_or.c	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/connection_or.c	2007-12-17 22:44:11 UTC (rev 12845)
@@ -893,17 +893,20 @@
 connection_or_set_state_open(or_connection_t *conn)
 {
   int started_here = connection_or_nonopen_was_started_here(conn);
+  time_t now = time(NULL);
   conn->_base.state = OR_CONN_STATE_OPEN;
   control_event_or_conn_status(conn, OR_CONN_EVENT_CONNECTED, 0);
 
   if (started_here) {
-    rep_hist_note_connect_succeeded(conn->identity_digest, time(NULL));
-    if (entry_guard_register_connect_status(conn->identity_digest, 1,
-                                            time(NULL)) < 0) {
+    rep_hist_note_connect_succeeded(conn->identity_digest, now);
+    if (entry_guard_register_connect_status(conn->identity_digest,
+                                            1, now) < 0) {
       /* pending circs get closed in circuit_about_to_close_connection() */
       return -1;
     }
     router_set_status(conn->identity_digest, 1);
+  } else {
+    geoip_note_client_seen(TO_CONN(conn)->addr, now);
   }
   if (conn->handshake_state) {
     or_handshake_state_free(conn->handshake_state);

Added: tor/trunk/src/or/geoip.c
===================================================================
--- tor/trunk/src/or/geoip.c	                        (rev 0)
+++ tor/trunk/src/or/geoip.c	2007-12-17 22:44:11 UTC (rev 12845)
@@ -0,0 +1,275 @@
+/* Copyright (c) 2007, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+/* $Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $ */
+const char geoip_c_id[] =
+  "$Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $";
+
+#define GEOIP_PRIVATE
+#include "or.h"
+#include "ht.h"
+
+/** DOCDOC this whole file */
+
+typedef struct geoip_entry_t {
+  uint32_t ip_low;
+  uint32_t ip_high;
+  int country;
+} geoip_entry_t;
+
+static smartlist_t *geoip_countries = NULL;
+static strmap_t *country_idxplus1_by_lc_code = NULL;
+static smartlist_t *geoip_entries = NULL;
+
+void
+geoip_add_entry(uint32_t low, uint32_t high, const char *country)
+{
+  uintptr_t idx;
+  geoip_entry_t *ent;
+  void *_idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
+
+  if (!_idxplus1) {
+    char *c = tor_strdup(country);
+    tor_strlower(c);
+    smartlist_add(geoip_countries, c);
+    idx = smartlist_len(geoip_countries) + 1;
+    strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
+  } else {
+    idx = ((uintptr_t)_idxplus1)-1;
+  }
+  ent = tor_malloc_zero(sizeof(geoip_entry_t));
+  ent->ip_low = low;
+  ent->ip_high = high;
+  ent->country = idx;
+  smartlist_add(geoip_entries, ent);
+}
+
+static int
+_geoip_compare_entries(const void **_a, const void **_b)
+{
+  const geoip_entry_t *a = *_a, *b = *_b;
+  if (a->ip_low < b->ip_low)
+    return -1;
+  else if (a->ip_low > b->ip_low)
+    return 1;
+  else
+    return 0;
+}
+
+static int
+_geoip_compare_key_to_entry(const void *_key, const void **_member)
+{
+  const uint32_t addr = *(uint32_t *)_key;
+  const geoip_entry_t *entry = *_member;
+  if (addr < entry->ip_low)
+    return -1;
+  else if (addr > entry->ip_high)
+    return 1;
+  else
+    return 0;
+}
+
+int
+geoip_load_file(const char *filename)
+{
+  FILE *f;
+  geoip_free_all();
+  if (!(f = fopen(filename, "r"))) {
+    log_warn(LD_GENERAL, "Failed to open GEOIP file %s.", filename);
+    return -1;
+  }
+  geoip_countries = smartlist_create();
+  geoip_entries = smartlist_create();
+  country_idxplus1_by_lc_code = strmap_new();
+  while (!feof(f)) {
+    unsigned int low, high;
+    char b[3];
+    if (fscanf(f, "%u,%u,%2s", &low, &high, b) == 3) {
+      geoip_add_entry(low, high, b);
+    }
+  }
+  /*XXXX020 abort and return -1 if */
+  fclose(f);
+
+  smartlist_sort(geoip_entries, _geoip_compare_entries);
+  return 0;
+}
+
+int
+geoip_get_country_by_ip(uint32_t ipaddr)
+{
+  geoip_entry_t *ent;
+  if (!geoip_entries)
+    return -1;
+  ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
+  return ent ? ent->country : -1;
+}
+
+int
+geoip_get_n_countries(void)
+{
+  return smartlist_len(geoip_countries);
+}
+
+const char *
+geoip_get_country_name(int num)
+{
+  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries))
+    return smartlist_get(geoip_countries, num);
+  else
+    return "??";
+}
+
+int
+geoip_is_loaded(void)
+{
+  return geoip_countries != NULL && geoip_entries != NULL;
+}
+
+/** DOCDOC */
+typedef struct clientmap_entry_t {
+  HT_ENTRY(clientmap_entry_t) node;
+  uint32_t ipaddr;
+  time_t last_seen;
+} clientmap_entry_t;
+
+static HT_HEAD(clientmap, clientmap_entry_t) client_history =
+     HT_INITIALIZER();
+static time_t client_history_starts = 0;
+
+static INLINE unsigned
+clientmap_entry_hash(const clientmap_entry_t *a)
+{
+  return ht_improve_hash((unsigned) a->ipaddr);
+}
+static INLINE int
+clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
+{
+  return a->ipaddr == b->ipaddr;
+}
+
+HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
+             clientmap_entries_eq);
+HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
+            clientmap_entries_eq, 0.6, malloc, realloc, free);
+
+/** DOCDOC */
+void
+geoip_note_client_seen(uint32_t addr, time_t now)
+{
+  or_options_t *options = get_options();
+  clientmap_entry_t lookup, *ent;
+  if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+    return;
+  lookup.ipaddr = addr;
+  ent = HT_FIND(clientmap, &client_history, &lookup);
+  if (ent) {
+    ent->last_seen = now;
+  } else {
+    ent = tor_malloc_zero(sizeof(clientmap_entry_t));
+    ent->ipaddr = addr;
+    ent->last_seen = now;
+    HT_INSERT(clientmap, &client_history, ent);
+  }
+  if (!client_history_starts)
+    client_history_starts = now;
+}
+
+static int
+_remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
+{
+  time_t cutoff = *(time_t*)_cutoff;
+  if (ent->last_seen < cutoff) {
+    tor_free(ent);
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+void
+geoip_remove_old_clients(time_t cutoff)
+{
+  clientmap_HT_FOREACH_FN(&client_history,
+                          _remove_old_client_helper,
+                          &cutoff);
+  if (client_history_starts < cutoff)
+    client_history_starts = cutoff;
+}
+
+#define MIN_IPS_TO_NOTE_COUNTRY 8
+#define MIN_IPS_TO_NOTE_ANYTHING 16
+#define IP_GRANULARITY 8
+
+char *
+geoip_get_client_history(time_t now)
+{
+  char *result = NULL;
+  if (!geoip_is_loaded())
+    return NULL;
+  if (client_history_starts < (now - 12*60*60)) {
+    char buf[32];
+    smartlist_t *chunks = NULL;
+    int n_countries = geoip_get_n_countries();
+    int i;
+    clientmap_entry_t **ent;
+    unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
+    unsigned total = 0;
+    HT_FOREACH(ent, clientmap, &client_history) {
+      int country = geoip_get_country_by_ip((*ent)->ipaddr);
+      if (country < 0)
+        continue;
+      tor_assert(0 <= country && country < n_countries);
+      ++counts[country];
+      ++total;
+    }
+    if (total < MIN_IPS_TO_NOTE_ANYTHING)
+      goto done;
+    chunks = smartlist_create();
+    for (i = 0; i < n_countries; ++i) {
+      unsigned c = counts[i];
+      const char *countrycode;
+      if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
+        c -= c % IP_GRANULARITY;
+        countrycode = geoip_get_country_name(i);
+        tor_snprintf(buf, sizeof(buf), "%s=%u", countrycode, c);
+        smartlist_add(chunks, tor_strdup(buf));
+      }
+    }
+    result = smartlist_join_strings(chunks, ",", 0, NULL);
+  done:
+    tor_free(counts);
+    if (chunks) {
+      SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
+      smartlist_free(chunks);
+    }
+  }
+  return result;
+}
+
+
+void
+geoip_free_all(void)
+{
+  if (geoip_countries) {
+    SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp));
+    smartlist_free(geoip_countries);
+  }
+  if (country_idxplus1_by_lc_code)
+    strmap_free(country_idxplus1_by_lc_code, NULL);
+  if (geoip_entries) {
+    SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
+    smartlist_free(geoip_entries);
+  }
+  {
+    clientmap_entry_t **ent, **next, *this;
+    for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(clientmap, &client_history, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(clientmap, &client_history);
+  }
+  geoip_countries = NULL;
+  country_idxplus1_by_lc_code = NULL;
+  geoip_entries = NULL;
+}

Modified: tor/trunk/src/or/main.c
===================================================================
--- tor/trunk/src/or/main.c	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/main.c	2007-12-17 22:44:11 UTC (rev 12845)
@@ -1803,6 +1803,7 @@
   if (!postfork) {
     evdns_shutdown(1);
   }
+  geoip_free_all();
   dirvote_free_all();
   routerlist_free_all();
   networkstatus_free_all();

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/or.h	2007-12-17 22:44:11 UTC (rev 12845)
@@ -2315,6 +2315,10 @@
   /** DOCDOC here and in tor.1 */
   int LearnAuthorityAddrFromCerts;
 
+  /** DOCDOC here and in tor.1 */
+  int BridgeRecordUsageByCountry;
+  char *GEOIPFile;
+
 } or_options_t;
 
 /** Persistent state for an onion router, as saved to disk. */
@@ -3192,6 +3196,21 @@
 void dnsserv_reject_request(edge_connection_t *conn);
 void dnsserv_launch_request(const char *name, int is_reverse);
 
+/********************************* geoip.c **************************/
+
+#ifdef GEOIP_PRIVATE
+void geoip_add_entry(uint32_t low, uint32_t high, const char *country);
+#endif
+int geoip_load_file(const char *filename);
+int geoip_get_country_by_ip(uint32_t ipaddr);
+int geoip_get_n_countries(void);
+const char *geoip_get_country_name(int num);
+int geoip_is_loaded(void);
+void geoip_note_client_seen(uint32_t addr, time_t now);
+void geoip_remove_old_clients(time_t cutoff);
+char *geoip_get_client_history(time_t now);
+void geoip_free_all(void);
+
 /********************************* hibernate.c **********************/
 
 int accounting_parse_options(or_options_t *options, int validate_only);

Modified: tor/trunk/src/or/rephist.c
===================================================================
--- tor/trunk/src/or/rephist.c	2007-12-17 22:41:26 UTC (rev 12844)
+++ tor/trunk/src/or/rephist.c	2007-12-17 22:44:11 UTC (rev 12845)
@@ -13,6 +13,7 @@
  **/
 
 #include "or.h"
+#include "ht.h"
 
 static void bw_arrays_init(void);
 static void predicted_ports_init(void);



More information about the tor-commits mailing list