[or-cvs] r15097: Improved code for counting clients by country: support recor (in tor/trunk: . src/common src/or)

nickm at seul.org nickm at seul.org
Tue Jun 10 18:08:57 UTC 2008


Author: nickm
Date: 2008-06-10 14:08:56 -0400 (Tue, 10 Jun 2008)
New Revision: 15097

Modified:
   tor/trunk/
   tor/trunk/ChangeLog
   tor/trunk/src/common/util.c
   tor/trunk/src/or/geoip.c
   tor/trunk/src/or/or.h
Log:
 r16127 at tombo:  nickm | 2008-06-10 14:03:01 -0400
 Improved code for counting clients by country: support recording by number of directory status requests in addition to number of IPs seen.



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r16127] on 49666b30-7950-49c5-bedf-9dc8f3168102

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2008-06-10 13:49:57 UTC (rev 15096)
+++ tor/trunk/ChangeLog	2008-06-10 18:08:56 UTC (rev 15097)
@@ -107,7 +107,8 @@
     - Allow comments in geoip file.
     - New configure/torrc options (--enable-geoip-stats,
       DirRecordUsageByCountry) to record how many IPs we've served directory
-      info to in each country code.
+      info to in each country code, and how many status documents total
+      we've sent to each country code.
     - Never use OpenSSL compression: it wastes RAM and CPU trying to
       compress cells, which are basically all encrypted, compressed, or
       both.

Modified: tor/trunk/src/common/util.c
===================================================================
--- tor/trunk/src/common/util.c	2008-06-10 13:49:57 UTC (rev 15096)
+++ tor/trunk/src/common/util.c	2008-06-10 18:08:56 UTC (rev 15097)
@@ -1604,8 +1604,8 @@
     return file_data->stdio_file;
   tor_assert(file_data->fd >= 0);
   if (!(file_data->stdio_file = fdopen(file_data->fd, "a"))) {
-    log_warn(LD_FS, "Couldn't fdopen \"%s\": %s", file_data->filename,
-             strerror(errno));
+    log_warn(LD_FS, "Couldn't fdopen \"%s\" [%d]: %s", file_data->filename,
+             file_data->fd, strerror(errno));
   }
   return file_data->stdio_file;
 }
@@ -1619,8 +1619,10 @@
   FILE *res;
   if (start_writing_to_file(fname, open_flags, mode, data_out)<0)
     return NULL;
-  if (!(res = fdopen_file(*data_out)))
+  if (!(res = fdopen_file(*data_out))) {
     abort_writing_to_file(*data_out);
+    *data_out = NULL;
+  }
   return res;
 }
 

Modified: tor/trunk/src/or/geoip.c
===================================================================
--- tor/trunk/src/or/geoip.c	2008-06-10 13:49:57 UTC (rev 15096)
+++ tor/trunk/src/or/geoip.c	2008-06-10 18:08:56 UTC (rev 15097)
@@ -23,7 +23,17 @@
   intptr_t country; /**< An index into geoip_countries */
 } geoip_entry_t;
 
-/** A list of lowercased two-letter country codes. */
+/** DOCDOC */
+#define REQUEST_HIST_LEN 3
+#define REQUEST_HIST_PERIOD (8*60*60)
+
+typedef struct geoip_country_t {
+  char countrycode[3];
+  uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];
+  uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];
+} geoip_country_t;
+
+/** A list of geoip_country_t */
 static smartlist_t *geoip_countries = NULL;
 /** A map from lowercased country codes to their position in geoip_countries.
  * The index is encoded in the pointer, and 1 is added so that NULL can mean
@@ -48,15 +58,19 @@
   _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
 
   if (!_idxplus1) {
-    char *c = tor_strdup(country);
-    tor_strlower(c);
+    geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
+    strlcpy(c->countrycode, country, sizeof(c->countrycode));
+    tor_strlower(c->countrycode);
     smartlist_add(geoip_countries, c);
     idx = smartlist_len(geoip_countries) - 1;
     strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
   } else {
     idx = ((uintptr_t)_idxplus1)-1;
   }
-  tor_assert(!strcasecmp(smartlist_get(geoip_countries, idx), country));
+  {
+    geoip_country_t *c = smartlist_get(geoip_countries, idx);
+    tor_assert(!strcasecmp(c->countrycode, country));
+  }
   ent = tor_malloc_zero(sizeof(geoip_entry_t));
   ent->ip_low = low;
   ent->ip_high = high;
@@ -198,9 +212,10 @@
 const char *
 geoip_get_country_name(int num)
 {
-  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries))
-    return smartlist_get(geoip_countries, num);
-  else
+  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
+    geoip_country_t *c = smartlist_get(geoip_countries, num);
+    return c->countrycode;
+  } else
     return "??";
 }
 
@@ -226,9 +241,13 @@
 /** Map from client IP address to last time seen. */
 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
      HT_INITIALIZER();
-/** Time at which we started tracking client history. */
+/** Time at which we started tracking client IP history. */
 static time_t client_history_starts = 0;
 
+/** DOCDOC */
+static time_t current_request_period_starts = 0;
+static int n_old_request_periods = 0;
+
 /** Hashtable helper: compute a hash of a clientmap_entry_t. */
 static INLINE unsigned
 clientmap_entry_hash(const clientmap_entry_t *a)
@@ -268,8 +287,23 @@
 #endif
   }
 
+  /* DOCDOC */
+  while (current_request_period_starts + REQUEST_HIST_PERIOD >= now) {
+    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
+        memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
+                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+        memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
+                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+        c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
+        c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
+      });
+    current_request_period_starts += REQUEST_HIST_PERIOD;
+    if (n_old_request_periods < REQUEST_HIST_PERIOD-1)
+      ++n_old_request_periods;
+  }
+
   /* We use the low 3 bits of the time to encode the action. Since we're
-   * potentially remembering times of clients, we don't want to make
+   * potentially remembering tons of clients, we don't want to make
    * clientmap_entry_t larger than it has to be. */
   now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
   lookup.ipaddr = addr;
@@ -282,8 +316,23 @@
     ent->last_seen = now;
     HT_INSERT(clientmap, &client_history, ent);
   }
-  if (!client_history_starts)
+
+  if (action == GEOIP_CLIENT_NETWORKSTATUS ||
+      action == GEOIP_CLIENT_NETWORKSTATUS_V2) {
+    int country_idx = geoip_get_country_by_ip(addr);
+    if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
+      geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
+      if (action == GEOIP_CLIENT_NETWORKSTATUS)
+        ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];
+      else
+        ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];
+    }
+  }
+
+  if (!client_history_starts) {
     client_history_starts = now;
+    current_request_period_starts = now;
+  }
 }
 
 /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
@@ -350,6 +399,9 @@
     return strcmp(a->country, b->country);
 }
 
+/*DOCDOC*/
+#define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
+
 /** Return a newly allocated comma-separated string containing entries for all
  * the countries from which we've seen enough clients connect. The entry
  * format is cc=num where num is the number of IPs we've seen connecting from
@@ -361,7 +413,7 @@
   char *result = NULL;
   if (!geoip_is_loaded())
     return NULL;
-  if (client_history_starts < (now - 12*60*60)) {
+  if (client_history_starts < (now - GEOIP_MIN_OBSERVATION_TIME)) {
     char buf[32];
     smartlist_t *chunks = NULL;
     smartlist_t *entries = NULL;
@@ -435,11 +487,43 @@
   return result;
 }
 
+  /**DOCDOC*/
+char *
+geoip_get_request_history(time_t now, geoip_client_action_t action)
+{
+  smartlist_t *entries;
+  char *result;
+  if (client_history_starts >= (now - GEOIP_MIN_OBSERVATION_TIME))
+    return NULL;
+  if (action != GEOIP_CLIENT_NETWORKSTATUS &&
+      action != GEOIP_CLIENT_NETWORKSTATUS_V2)
+    return NULL;
+  if (!geoip_countries)
+    return NULL;
+  entries = smartlist_create();
+  SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
+      uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)
+        ? c->n_v3_ns_requests : c->n_v2_ns_requests;
+      uint32_t tot = 0;
+      int i;
+      char buf[32];
+      for (i=0; i < REQUEST_HIST_LEN; ++i)
+        tot += n[i];
+      tor_snprintf(buf, sizeof(buf), "%s=%ld", c->countrycode, (long)n);
+      smartlist_add(entries, tor_strdup(buf));
+  });
+  smartlist_sort_strings(entries);
+  result = smartlist_join_strings(entries, ",", 0, NULL);
+  SMARTLIST_FOREACH(entries, char *, cp, tor_free(cp));
+  return result;
+}
+
 void
 dump_geoip_stats(void)
 {
 #ifdef ENABLE_GEOIP_STATS
   time_t now = time(NULL);
+  time_t request_start;
   char *filename = get_datadir_fname("geoip-stats");
   char *data_v2 = NULL, *data_v3 = NULL;
   char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
@@ -450,15 +534,27 @@
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   format_iso_time(since, geoip_get_history_start());
   format_iso_time(written, now);
-  if (!data_v2 || !data_v3)
-    goto done;
-  out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+  out = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE,
+                                    0600, &open_file);
   if (!out)
     goto done;
-  if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
-              written, since, data_v3, data_v2) < 0)
+  if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips%s\n",
+              written, since,
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
     goto done;
+  tor_free(data_v2);
+  tor_free(data_v3);
 
+  request_start = current_request_period_starts -
+    (n_old_request_periods * REQUEST_HIST_PERIOD);
+  format_iso_time(since, request_start);
+  data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+  data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+  if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns_reqs%s\n",
+              since,
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+    goto done;
+
   finish_writing_to_file(open_file);
   open_file = NULL;
  done:
@@ -495,7 +591,7 @@
 clear_geoip_db(void)
 {
   if (geoip_countries) {
-    SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp));
+    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
     smartlist_free(geoip_countries);
   }
   if (country_idxplus1_by_lc_code)

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2008-06-10 13:49:57 UTC (rev 15096)
+++ tor/trunk/src/or/or.h	2008-06-10 18:08:56 UTC (rev 15097)
@@ -3342,6 +3342,7 @@
 void geoip_remove_old_clients(time_t cutoff);
 time_t geoip_get_history_start(void);
 char *geoip_get_client_history(time_t now, geoip_client_action_t action);
+char *geoip_get_request_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
 void geoip_free_all(void);



More information about the tor-commits mailing list