commit 4c34249760470099475e9830a3b8255b2aa5f3a3
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Apr 4 21:09:40 2011 +0200
Add GeoIP database to generate fancy relay visualizations.
---
build.xml | 9 ++
db/tordir.sql | 50 ++++++++++
rserve/csv.R | 12 +++
.../ernie/cron/GeoipDatabaseImporter.java | 95 ++++++++++++++++++++
src/org/torproject/ernie/web/CsvServlet.java | 1 +
5 files changed, 167 insertions(+), 0 deletions(-)
diff --git a/build.xml b/build.xml
index bd06daf..4b4a7e9 100644
--- a/build.xml
+++ b/build.xml
@@ -42,6 +42,15 @@
</javac>
</target>
+ <!-- Import GeoIP database. -->
+ <target name="geoipdb" depends="compile">
+ <java fork="true"
+ maxmemory="1024m"
+ classname="org.torproject.ernie.cron.GeoipDatabaseImporter">
+ <classpath refid="classpath"/>
+ </java>
+ </target>
+
<!-- Prepare data for being displayed on the website. -->
<target name="run" depends="compile">
<java fork="true"
diff --git a/db/tordir.sql b/db/tordir.sql
index c47d19a..d5238ea 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -239,6 +239,56 @@ CREATE TABLE updates (
date DATE
);
+-- GeoIP database that helps resolve IP addresses to country codes,
+-- latitudes, and longitudes.
+CREATE TABLE geoipdb (
+ id SERIAL,
+ ipstart INET,
+ ipend INET,
+ country CHARACTER(2) NOT NULL,
+ latitude NUMERIC(7, 4) NOT NULL,
+ longitude NUMERIC(7, 4) NOT NULL
+);
+
+-- Indexes to speed up looking up IP addresses in the GeoIP database.
+CREATE INDEX geoip_ipstart ON geoipdb (ipstart);
+CREATE INDEX geoip_ipend ON geoipdb (ipend);
+
+-- Result type for GeoIP lookups that encapsulates the country code,
+-- latitude, and longitude.
+CREATE TYPE geoip_result AS (country CHARACTER(2),
+ latitude NUMERIC(7, 4), longitude NUMERIC(7, 4));
+
+-- GeoIP database lookup function.
+CREATE OR REPLACE FUNCTION geoip_lookup (CHARACTER VARYING)
+RETURNS geoip_result AS $$
+ SELECT country, latitude, longitude FROM geoipdb
+ WHERE INET($1)
+ BETWEEN geoipdb.ipstart AND geoipdb.ipend LIMIT 1;
+$$ LANGUAGE SQL;
+
+-- View that contains the relays of the first known consensuses of all
+-- months in the database.
+CREATE OR REPLACE VIEW relays_monthly_snapshots AS
+SELECT status.validafter, status.fingerprint, status.nickname,
+ status.address, (status.geoip).country, (status.geoip).latitude,
+ (status.geoip).longitude, status.isexit, status.isfast,
+ status.isguard, status.isstable, status.version, status.ports,
+ descriptor.bandwidthavg, descriptor.bandwidthburst,
+ descriptor.bandwidthobserved
+FROM (
+ SELECT *, geoip_lookup(address) AS geoip
+ FROM statusentry
+ WHERE validafter IN (
+ SELECT MIN(validafter)
+ FROM consensus
+ GROUP BY DATE_TRUNC('month', validafter)
+ )
+) AS status
+LEFT JOIN descriptor
+ON status.descriptor = descriptor.descriptor
+ORDER BY validafter, fingerprint;
+
CREATE LANGUAGE plpgsql;
-- FUNCTION refresh_relay_statuses_per_day()
diff --git a/rserve/csv.R b/rserve/csv.R
index 44138ac..e68971c 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -267,3 +267,15 @@ export_connbidirect <- function(path) {
quote = FALSE, row.names = FALSE)
}
+export_relays_monthly_snapshots <- function(path) {
+ drv <- dbDriver("PostgreSQL")
+ con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+ q <- paste("SELECT * FROM relays_monthly_snapshots")
+ rs <- dbSendQuery(con, q)
+ c <- fetch(rs, n = -1)
+ dbDisconnect(con)
+ dbUnloadDriver(drv)
+ write.csv(format(c, trim = TRUE, scientific = FALSE), path,
+ quote = FALSE, row.names = FALSE)
+}
+
diff --git a/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java
new file mode 100644
index 0000000..afe5245
--- /dev/null
+++ b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java
@@ -0,0 +1,95 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.util.*;
+import java.util.logging.*;
+
+/**
+ * Import a Maxmind GeoLite City database to resolve resolve IP addresses
+ * to country codes, latitudes, and longitudes.
+ */
+public class GeoipDatabaseImporter {
+ public static void main(String[] args) throws IOException,
+ SQLException {
+
+ /* Check if the GeoIP database files are in place. */
+ File locationsFile = new File("GeoLiteCity-Location.csv"),
+ blocksFile = new File("GeoLiteCity-Blocks.csv");
+ if (!locationsFile.exists() || !blocksFile.exists()) {
+ System.out.println("Could not find GeoLiteCity-Location.csv and/or "
+ + "GeoLiteCity-Blocks.csv in the working directory! Exiting!");
+ System.exit(1);
+ }
+
+ /* Initialize configuration to learn JDBC string. */
+ Configuration config = new Configuration();
+ String jdbcString = config.getRelayDescriptorDatabaseJDBC();
+
+ /* Connect to database. */
+ Connection c = DriverManager.getConnection(jdbcString);
+
+ /* Start by reading location information to memory. */
+ BufferedReader br = new BufferedReader(new FileReader(locationsFile));
+ String line;
+ Map<Integer, String> locations = new HashMap<Integer, String>();
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("Copyright") || line.startsWith("locId")) {
+ continue;
+ }
+ String[] parts = line.split(",");
+ int locId = Integer.parseInt(parts[0]);
+ String country = parts[1].replaceAll("\"", "");
+ String latitude = parts[5];
+ String longitude = parts[6];
+ locations.put(locId, country + "," + latitude + "," + longitude);
+ }
+ br.close();
+
+ /* Parse block information and add it to the database together with
+ * the location information. */
+ PreparedStatement ps = c.prepareStatement("INSERT INTO geoipdb "
+ + "(ipstart, ipend, country, latitude, longitude) VALUES "
+ + "(?, ?, ?, ?, ?)");
+ Statement s = c.createStatement();
+ s.execute("DELETE FROM geoipdb");
+ /* TODO The import takes 30+ minutes. Perform the import in a single
+ * transaction, or requests will return strange results in these 30+
+ * minutes. */
+ br = new BufferedReader(new FileReader(blocksFile));
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("Copyright") ||
+ line.startsWith("startIpNum")) {
+ continue;
+ }
+ String[] parts = line.replaceAll("\"", "").split(",");
+ long startIpNum = Long.parseLong(parts[0]);
+ String startIp = "" + startIpNum / 256 / 256 / 256 + "."
+ + startIpNum / 256 / 256 % 256 + "." + startIpNum / 256 % 256
+ + "." + startIpNum % 256;
+ long endIpNum = Long.parseLong(parts[1]);
+ String endIp = "" + endIpNum / 256 / 256 / 256 + "."
+ + endIpNum / 256 / 256 % 256 + "." + endIpNum / 256 % 256 + "."
+ + endIpNum % 256;
+ int locId = Integer.parseInt(parts[2]);
+ if (!locations.containsKey(locId)) {
+ System.out.println("Cannot find locId=" + locId
+ + " in locations file!");
+ continue;
+ }
+ String[] locationParts = locations.get(locId).split(",");
+ String country = locationParts[0];
+ double latitude = Double.parseDouble(locationParts[1]);
+ double longitude = Double.parseDouble(locationParts[2]);
+ ps.setObject(1, startIp, Types.OTHER);
+ ps.setObject(2, endIp, Types.OTHER);
+ ps.setString(3, country);
+ ps.setDouble(4, latitude);
+ ps.setDouble(5, longitude);
+ ps.execute();
+ }
+ }
+}
+
diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java
index 33c098f..c68a2d0 100644
--- a/src/org/torproject/ernie/web/CsvServlet.java
+++ b/src/org/torproject/ernie/web/CsvServlet.java
@@ -40,6 +40,7 @@ public class CsvServlet extends HttpServlet {
this.availableCsvFiles.add("monthly-users-peak");
this.availableCsvFiles.add("networksize");
this.availableCsvFiles.add("platforms");
+ this.availableCsvFiles.add("relays-monthly-snapshots");
this.availableCsvFiles.add("relayflags");
this.availableCsvFiles.add("relayflags-hour");
this.availableCsvFiles.add("torperf");