commit 4c34249760470099475e9830a3b8255b2aa5f3a3 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Apr 4 21:09:40 2011 +0200
Add GeoIP database to generate fancy relay visualizations. --- build.xml | 9 ++ db/tordir.sql | 50 ++++++++++ rserve/csv.R | 12 +++ .../ernie/cron/GeoipDatabaseImporter.java | 95 ++++++++++++++++++++ src/org/torproject/ernie/web/CsvServlet.java | 1 + 5 files changed, 167 insertions(+), 0 deletions(-)
diff --git a/build.xml b/build.xml index bd06daf..4b4a7e9 100644 --- a/build.xml +++ b/build.xml @@ -42,6 +42,15 @@ </javac> </target>
+ <!-- Import GeoIP database. --> + <target name="geoipdb" depends="compile"> + <java fork="true" + maxmemory="1024m" + classname="org.torproject.ernie.cron.GeoipDatabaseImporter"> + <classpath refid="classpath"/> + </java> + </target> + <!-- Prepare data for being displayed on the website. --> <target name="run" depends="compile"> <java fork="true" diff --git a/db/tordir.sql b/db/tordir.sql index c47d19a..d5238ea 100644 --- a/db/tordir.sql +++ b/db/tordir.sql @@ -239,6 +239,56 @@ CREATE TABLE updates ( date DATE );
+-- GeoIP database that helps resolve IP addresses to country codes, +-- latitudes, and longitudes. +CREATE TABLE geoipdb ( + id SERIAL, + ipstart INET, + ipend INET, + country CHARACTER(2) NOT NULL, + latitude NUMERIC(7, 4) NOT NULL, + longitude NUMERIC(7, 4) NOT NULL +); + +-- Indexes to speed up looking up IP addresses in the GeoIP database. +CREATE INDEX geoip_ipstart ON geoipdb (ipstart); +CREATE INDEX geoip_ipend ON geoipdb (ipend); + +-- Result type for GeoIP lookups that encapsulates the country code, +-- latitude, and longitude. +CREATE TYPE geoip_result AS (country CHARACTER(2), + latitude NUMERIC(7, 4), longitude NUMERIC(7, 4)); + +-- GeoIP database lookup function. +CREATE OR REPLACE FUNCTION geoip_lookup (CHARACTER VARYING) +RETURNS geoip_result AS $$ + SELECT country, latitude, longitude FROM geoipdb + WHERE INET($1) + BETWEEN geoipdb.ipstart AND geoipdb.ipend LIMIT 1; +$$ LANGUAGE SQL; + +-- View that contains the relays of the first known consensuses of all +-- months in the database. +CREATE OR REPLACE VIEW relays_monthly_snapshots AS +SELECT status.validafter, status.fingerprint, status.nickname, + status.address, (status.geoip).country, (status.geoip).latitude, + (status.geoip).longitude, status.isexit, status.isfast, + status.isguard, status.isstable, status.version, status.ports, + descriptor.bandwidthavg, descriptor.bandwidthburst, + descriptor.bandwidthobserved +FROM ( + SELECT *, geoip_lookup(address) AS geoip + FROM statusentry + WHERE validafter IN ( + SELECT MIN(validafter) + FROM consensus + GROUP BY DATE_TRUNC('month', validafter) + ) +) AS status +LEFT JOIN descriptor +ON status.descriptor = descriptor.descriptor +ORDER BY validafter, fingerprint; + CREATE LANGUAGE plpgsql;
-- FUNCTION refresh_relay_statuses_per_day() diff --git a/rserve/csv.R b/rserve/csv.R index 44138ac..e68971c 100644 --- a/rserve/csv.R +++ b/rserve/csv.R @@ -267,3 +267,15 @@ export_connbidirect <- function(path) { quote = FALSE, row.names = FALSE) }
+export_relays_monthly_snapshots <- function(path) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + q <- paste("SELECT * FROM relays_monthly_snapshots") + rs <- dbSendQuery(con, q) + c <- fetch(rs, n = -1) + dbDisconnect(con) + dbUnloadDriver(drv) + write.csv(format(c, trim = TRUE, scientific = FALSE), path, + quote = FALSE, row.names = FALSE) +} + diff --git a/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java new file mode 100644 index 0000000..afe5245 --- /dev/null +++ b/src/org/torproject/ernie/cron/GeoipDatabaseImporter.java @@ -0,0 +1,95 @@ +/* Copyright 2011 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.cron; + +import java.io.*; +import java.sql.*; +import java.util.*; +import java.util.logging.*; + +/** + * Import a Maxmind GeoLite City database to resolve resolve IP addresses + * to country codes, latitudes, and longitudes. + */ +public class GeoipDatabaseImporter { + public static void main(String[] args) throws IOException, + SQLException { + + /* Check if the GeoIP database files are in place. */ + File locationsFile = new File("GeoLiteCity-Location.csv"), + blocksFile = new File("GeoLiteCity-Blocks.csv"); + if (!locationsFile.exists() || !blocksFile.exists()) { + System.out.println("Could not find GeoLiteCity-Location.csv and/or " + + "GeoLiteCity-Blocks.csv in the working directory! Exiting!"); + System.exit(1); + } + + /* Initialize configuration to learn JDBC string. */ + Configuration config = new Configuration(); + String jdbcString = config.getRelayDescriptorDatabaseJDBC(); + + /* Connect to database. */ + Connection c = DriverManager.getConnection(jdbcString); + + /* Start by reading location information to memory. */ + BufferedReader br = new BufferedReader(new FileReader(locationsFile)); + String line; + Map<Integer, String> locations = new HashMap<Integer, String>(); + while ((line = br.readLine()) != null) { + if (line.startsWith("Copyright") || line.startsWith("locId")) { + continue; + } + String[] parts = line.split(","); + int locId = Integer.parseInt(parts[0]); + String country = parts[1].replaceAll(""", ""); + String latitude = parts[5]; + String longitude = parts[6]; + locations.put(locId, country + "," + latitude + "," + longitude); + } + br.close(); + + /* Parse block information and add it to the database together with + * the location information. */ + PreparedStatement ps = c.prepareStatement("INSERT INTO geoipdb " + + "(ipstart, ipend, country, latitude, longitude) VALUES " + + "(?, ?, ?, ?, ?)"); + Statement s = c.createStatement(); + s.execute("DELETE FROM geoipdb"); + /* TODO The import takes 30+ minutes. Perform the import in a single + * transaction, or requests will return strange results in these 30+ + * minutes. */ + br = new BufferedReader(new FileReader(blocksFile)); + while ((line = br.readLine()) != null) { + if (line.startsWith("Copyright") || + line.startsWith("startIpNum")) { + continue; + } + String[] parts = line.replaceAll(""", "").split(","); + long startIpNum = Long.parseLong(parts[0]); + String startIp = "" + startIpNum / 256 / 256 / 256 + "." + + startIpNum / 256 / 256 % 256 + "." + startIpNum / 256 % 256 + + "." + startIpNum % 256; + long endIpNum = Long.parseLong(parts[1]); + String endIp = "" + endIpNum / 256 / 256 / 256 + "." + + endIpNum / 256 / 256 % 256 + "." + endIpNum / 256 % 256 + "." + + endIpNum % 256; + int locId = Integer.parseInt(parts[2]); + if (!locations.containsKey(locId)) { + System.out.println("Cannot find locId=" + locId + + " in locations file!"); + continue; + } + String[] locationParts = locations.get(locId).split(","); + String country = locationParts[0]; + double latitude = Double.parseDouble(locationParts[1]); + double longitude = Double.parseDouble(locationParts[2]); + ps.setObject(1, startIp, Types.OTHER); + ps.setObject(2, endIp, Types.OTHER); + ps.setString(3, country); + ps.setDouble(4, latitude); + ps.setDouble(5, longitude); + ps.execute(); + } + } +} + diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java index 33c098f..c68a2d0 100644 --- a/src/org/torproject/ernie/web/CsvServlet.java +++ b/src/org/torproject/ernie/web/CsvServlet.java @@ -40,6 +40,7 @@ public class CsvServlet extends HttpServlet { this.availableCsvFiles.add("monthly-users-peak"); this.availableCsvFiles.add("networksize"); this.availableCsvFiles.add("platforms"); + this.availableCsvFiles.add("relays-monthly-snapshots"); this.availableCsvFiles.add("relayflags"); this.availableCsvFiles.add("relayflags-hour"); this.availableCsvFiles.add("torperf");
tor-commits@lists.torproject.org