[tor-commits] [metrics-web/master] Add GeoIP database to generate fancy relay visualizations.

karsten at torproject.org karsten at torproject.org
Thu Apr 7 13:53:46 UTC 2011


commit 4c34249760470099475e9830a3b8255b2aa5f3a3
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Apr 4 21:09:40 2011 +0200

    Add GeoIP database to generate fancy relay visualizations.
---
 build.xml                                          |    9 ++
 db/tordir.sql                                      |   50 ++++++++++
 rserve/csv.R                                       |   12 +++
 .../ernie/cron/GeoipDatabaseImporter.java          |   95 ++++++++++++++++++++
 src/org/torproject/ernie/web/CsvServlet.java       |    1 +
 5 files changed, 167 insertions(+), 0 deletions(-)

diff --git a/build.xml b/build.xml
index bd06daf..4b4a7e9 100644
--- a/build.xml
+++ b/build.xml
@@ -42,6 +42,15 @@
     </javac>
   </target>
 
+  <!-- Import GeoIP database. -->
+  <target name="geoipdb" depends="compile">
+    <java fork="true"
+          maxmemory="1024m"
+          classname="org.torproject.ernie.cron.GeoipDatabaseImporter">
+      <classpath refid="classpath"/>
+    </java>
+  </target>
+
   <!-- Prepare data for being displayed on the website. -->
   <target name="run" depends="compile">
     <java fork="true"
diff --git a/db/tordir.sql b/db/tordir.sql
index c47d19a..d5238ea 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -239,6 +239,56 @@ CREATE TABLE updates (
     date DATE
 );
 
+-- GeoIP database that helps resolve IP addresses to country codes,
+-- latitudes, and longitudes.
+CREATE TABLE geoipdb (
+  id SERIAL,
+  ipstart INET,
+  ipend INET,
+  country CHARACTER(2) NOT NULL,
+  latitude NUMERIC(7, 4) NOT NULL,
+  longitude NUMERIC(7, 4) NOT NULL
+);
+
+-- Indexes to speed up looking up IP addresses in the GeoIP database.
+CREATE INDEX geoip_ipstart ON geoipdb (ipstart);
+CREATE INDEX geoip_ipend ON geoipdb (ipend);
+
+-- Result type for GeoIP lookups that encapsulates the country code,
+-- latitude, and longitude.
+CREATE TYPE geoip_result AS (country CHARACTER(2),
+  latitude NUMERIC(7, 4), longitude NUMERIC(7, 4));
+
+-- GeoIP database lookup function.
+CREATE OR REPLACE FUNCTION geoip_lookup (CHARACTER VARYING)
+RETURNS geoip_result AS $$
+  SELECT country, latitude, longitude FROM geoipdb
+  WHERE INET($1)
+  BETWEEN geoipdb.ipstart AND geoipdb.ipend LIMIT 1;
+$$ LANGUAGE SQL;
+
+-- View that contains the relays of the first known consensuses of all
+-- months in the database.
+CREATE OR REPLACE VIEW relays_monthly_snapshots AS
+SELECT status.validafter, status.fingerprint, status.nickname,
+    status.address, (status.geoip).country, (status.geoip).latitude,
+    (status.geoip).longitude, status.isexit, status.isfast,
+    status.isguard, status.isstable, status.version, status.ports,
+    descriptor.bandwidthavg, descriptor.bandwidthburst,
+    descriptor.bandwidthobserved
+FROM (
+  SELECT *, geoip_lookup(address) AS geoip
+  FROM statusentry
+  WHERE validafter IN (
+    SELECT MIN(validafter)
+    FROM consensus
+    GROUP BY DATE_TRUNC('month', validafter)
+  )
+) AS status
+LEFT JOIN descriptor
+ON status.descriptor = descriptor.descriptor
+ORDER BY validafter, fingerprint;
+
 CREATE LANGUAGE plpgsql;
 
 -- FUNCTION refresh_relay_statuses_per_day()
diff --git a/rserve/csv.R b/rserve/csv.R
index 44138ac..e68971c 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -267,3 +267,15 @@ export_connbidirect <- function(path) {
       quote = FALSE, row.names = FALSE)
 }
 
+export_relays_monthly_snapshots <- function(path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT * FROM relays_monthly_snapshots")
+  rs <- dbSendQuery(con, q)
+  c <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  write.csv(format(c, trim = TRUE, scientific = FALSE), path,
+      quote = FALSE, row.names = FALSE)
+}
+
diff --git a/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java
new file mode 100644
index 0000000..afe5245
--- /dev/null
+++ b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java
@@ -0,0 +1,95 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.util.*;
+import java.util.logging.*;
+
+/**
+ * Import a Maxmind GeoLite City database to resolve resolve IP addresses
+ * to country codes, latitudes, and longitudes.
+ */
+public class GeoipDatabaseImporter {
+  public static void main(String[] args) throws IOException,
+      SQLException {
+
+    /* Check if the GeoIP database files are in place. */
+    File locationsFile = new File("GeoLiteCity-Location.csv"),
+        blocksFile = new File("GeoLiteCity-Blocks.csv");
+    if (!locationsFile.exists() || !blocksFile.exists()) {
+      System.out.println("Could not find GeoLiteCity-Location.csv and/or "
+          + "GeoLiteCity-Blocks.csv in the working directory! Exiting!");
+      System.exit(1);
+    }
+
+    /* Initialize configuration to learn JDBC string. */
+    Configuration config = new Configuration();
+    String jdbcString = config.getRelayDescriptorDatabaseJDBC();
+
+    /* Connect to database. */
+    Connection c = DriverManager.getConnection(jdbcString);
+
+    /* Start by reading location information to memory. */
+    BufferedReader br = new BufferedReader(new FileReader(locationsFile));
+    String line;
+    Map<Integer, String> locations = new HashMap<Integer, String>();
+    while ((line = br.readLine()) != null) {
+      if (line.startsWith("Copyright") || line.startsWith("locId")) {
+        continue;
+      }
+      String[] parts = line.split(",");
+      int locId = Integer.parseInt(parts[0]);
+      String country = parts[1].replaceAll("\"", "");
+      String latitude = parts[5];
+      String longitude = parts[6];
+      locations.put(locId, country + "," + latitude + "," + longitude);
+    }
+    br.close();
+
+    /* Parse block information and add it to the database together with
+     * the location information. */
+    PreparedStatement ps = c.prepareStatement("INSERT INTO geoipdb "
+        + "(ipstart, ipend, country, latitude, longitude) VALUES "
+        + "(?, ?, ?, ?, ?)");
+    Statement s = c.createStatement();
+    s.execute("DELETE FROM geoipdb");
+    /* TODO The import takes 30+ minutes.  Perform the import in a single
+     * transaction, or requests will return strange results in these 30+
+     * minutes. */
+    br = new BufferedReader(new FileReader(blocksFile));
+    while ((line = br.readLine()) != null) {
+      if (line.startsWith("Copyright") ||
+          line.startsWith("startIpNum")) {
+        continue;
+      }
+      String[] parts = line.replaceAll("\"", "").split(",");
+      long startIpNum = Long.parseLong(parts[0]);
+      String startIp = "" + startIpNum / 256 / 256 / 256 + "."
+          + startIpNum / 256 / 256 % 256 + "." + startIpNum / 256 % 256
+          + "." + startIpNum % 256;
+      long endIpNum = Long.parseLong(parts[1]);
+      String endIp = "" + endIpNum / 256 / 256 / 256 + "."
+          + endIpNum / 256 / 256 % 256 + "." + endIpNum / 256 % 256 + "."
+          + endIpNum % 256;
+      int locId = Integer.parseInt(parts[2]);
+      if (!locations.containsKey(locId)) {
+        System.out.println("Cannot find locId=" + locId
+            + " in locations file!");
+        continue;
+      }
+      String[] locationParts = locations.get(locId).split(",");
+      String country = locationParts[0];
+      double latitude = Double.parseDouble(locationParts[1]);
+      double longitude = Double.parseDouble(locationParts[2]);
+      ps.setObject(1, startIp, Types.OTHER);
+      ps.setObject(2, endIp, Types.OTHER);
+      ps.setString(3, country);
+      ps.setDouble(4, latitude);
+      ps.setDouble(5, longitude);
+      ps.execute();
+    }
+  }
+}
+
diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java
index 33c098f..c68a2d0 100644
--- a/src/org/torproject/ernie/web/CsvServlet.java
+++ b/src/org/torproject/ernie/web/CsvServlet.java
@@ -40,6 +40,7 @@ public class CsvServlet extends HttpServlet {
     this.availableCsvFiles.add("monthly-users-peak");
     this.availableCsvFiles.add("networksize");
     this.availableCsvFiles.add("platforms");
+    this.availableCsvFiles.add("relays-monthly-snapshots");
     this.availableCsvFiles.add("relayflags");
     this.availableCsvFiles.add("relayflags-hour");
     this.availableCsvFiles.add("torperf");



More information about the tor-commits mailing list