[tor-commits] [metrics-web/master] Remove unused GeoIP file from database.

karsten at torproject.org karsten at torproject.org
Wed Jan 22 16:09:29 UTC 2014


commit a66c38fce620d6128eb845ff36a5db0de228f56a
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Jan 21 15:46:51 2014 +0100

    Remove unused GeoIP file from database.
---
 README                                             |   27 +----
 build.xml                                          |    9 --
 db/tordir.sql                                      |   72 --------------
 .../ernie/cron/network/GeoipDatabaseImporter.java  |  105 --------------------
 4 files changed, 4 insertions(+), 209 deletions(-)

diff --git a/README b/README
index ddf51a1..205e8d4 100644
--- a/README
+++ b/README
@@ -185,26 +185,7 @@ Add a crontab entry for the database importer to run once per hour:
 15 * * * * cd /srv/metrics-web/ && ./run.sh
 
 
-1.5. Importing GeoIP information
-================================
-
-Some of the graphs require GeoIP information to resolve IP addresses to
-country codes.  This information is provided in MaxMind's GeoLite City
-database available at http://www.maxmind.com/app/geolitecity.
-
-Download and extract the two files GeoLiteCity-Location.csv and
-GeoLiteCity-Blocks.csv to /srv/metrics-web/.
-
-Import the two files into the metrics database.
-
-$ ant geoipdb
-
-Note that there is no easy way to update the GeoIP information in the
-metrics database yet.  The only way to do so is to manually delete and
-recreate the database table and import the new GeoIP database.
-
-
-1.6. Pre-calculating relay statistics
+1.5. Pre-calculating relay statistics
 =====================================
 
 The relay graphs on the metrics website rely on pre-calculated statistics
@@ -220,7 +201,7 @@ If the metrics database gets updated automatically, write a script and add
 a crontab entry for pre-calculating statistics every 6 or 12 hours.
 
 
-1.7. Generating network status information
+1.6. Generating network status information
 ==========================================
 
 The metrics database importer can analyze the most recently parsed network
@@ -237,7 +218,7 @@ WriteConsensusHealth 1
 WriteNagiosStatusFile 1
 
 
-1.8. Importing sanitized bridge descriptors
+1.7. Importing sanitized bridge descriptors
 ===========================================
 
 The metrics database can store aggregate statistics about running bridges
@@ -267,7 +248,7 @@ Run the database import:
 $ ./run.sh
 
 
-1.9. Importing Torperf performance data
+1.8. Importing Torperf performance data
 =======================================
 
 Torperf measures the performance of the Tor network as users experience
diff --git a/build.xml b/build.xml
index bbf0a12..b2e4c28 100644
--- a/build.xml
+++ b/build.xml
@@ -52,15 +52,6 @@
     </javac>
   </target>
 
-  <!-- Import GeoIP database. -->
-  <target name="geoipdb" depends="compile">
-    <java fork="true"
-          maxmemory="1024m"
-          classname="org.torproject.ernie.cron.network.GeoipDatabaseImporter">
-      <classpath refid="classpath"/>
-    </java>
-  </target>
-
   <!-- Import descriptors into the ExoneraTor database. -->
   <target name="exonerator" depends="compile">
     <java fork="true"
diff --git a/db/tordir.sql b/db/tordir.sql
index 2a8533d..cd2ed6a 100644
--- a/db/tordir.sql
+++ b/db/tordir.sql
@@ -286,34 +286,6 @@ CREATE TABLE updates (
     date DATE
 );
 
--- GeoIP database that helps resolve IP addresses to country codes,
--- latitudes, and longitudes.
-CREATE TABLE geoipdb (
-  id SERIAL,
-  ipstart INET,
-  ipend INET,
-  country CHARACTER(2) NOT NULL,
-  latitude NUMERIC(7, 4) NOT NULL,
-  longitude NUMERIC(7, 4) NOT NULL
-);
-
--- Indexes to speed up looking up IP addresses in the GeoIP database.
-CREATE INDEX geoip_ipstart ON geoipdb (ipstart);
-CREATE INDEX geoip_ipend ON geoipdb (ipend);
-
--- Result type for GeoIP lookups that encapsulates the country code,
--- latitude, and longitude.
-CREATE TYPE geoip_result AS (country CHARACTER(2),
-  latitude NUMERIC(7, 4), longitude NUMERIC(7, 4));
-
--- GeoIP database lookup function.
-CREATE OR REPLACE FUNCTION geoip_lookup (CHARACTER VARYING)
-RETURNS geoip_result AS $$
-  SELECT country, latitude, longitude FROM geoipdb
-  WHERE INET($1)
-  BETWEEN geoipdb.ipstart AND geoipdb.ipend LIMIT 1;
-$$ LANGUAGE SQL;
-
 -- FUNCTION refresh_relay_statuses_per_day()
 -- Updates helper table which is used to refresh the aggregate tables.
 CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
@@ -454,48 +426,6 @@ CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
     END;
 $$ LANGUAGE plpgsql;
 
--- FUNCTION refresh_relay_countries()
-CREATE OR REPLACE FUNCTION refresh_relay_countries() RETURNS INTEGER AS $$
-    DECLARE
-        min_date TIMESTAMP WITHOUT TIME ZONE;
-        max_date TIMESTAMP WITHOUT TIME ZONE;
-    BEGIN
-
-    min_date := (SELECT MIN(date) FROM updates);
-    max_date := (SELECT MAX(date) + 1 FROM updates);
-
-    DELETE FROM relay_countries
-    WHERE date IN (SELECT date FROM updates);
-
-    EXECUTE '
-    INSERT INTO relay_countries
-    (date, country, relays)
-    SELECT date, country, relays / count AS relays
-    FROM (
-        SELECT date,
-               COALESCE(lower((geoip_lookup(address)).country), ''zz'')
-                 AS country,
-               SUM(relays) AS relays
-        FROM (
-            SELECT DATE(validafter) AS date,
-                   fingerprint,
-                   address,
-                   COUNT(*) AS relays
-            FROM statusentry
-            WHERE isrunning = TRUE
-                  AND validafter >= ''' || min_date || '''
-                  AND validafter < ''' || max_date || '''
-                  AND DATE(validafter) IN (SELECT date FROM updates)
-            GROUP BY 1, 2, 3
-            ) c
-        GROUP BY 1, 2
-        ) b
-    NATURAL JOIN relay_statuses_per_day';
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
 -- FUNCTION refresh_relay_platforms()
 CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
     DECLARE
@@ -930,8 +860,6 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
     PERFORM refresh_relay_statuses_per_day();
     RAISE NOTICE '% Refreshing network size.', timeofday();
     PERFORM refresh_network_size();
-    RAISE NOTICE '% Refreshing relays by country.', timeofday();
-    PERFORM refresh_relay_countries();
     RAISE NOTICE '% Refreshing relay platforms.', timeofday();
     PERFORM refresh_relay_platforms();
     RAISE NOTICE '% Refreshing relay versions.', timeofday();
diff --git a/src/org/torproject/ernie/cron/network/GeoipDatabaseImporter.java b/src/org/torproject/ernie/cron/network/GeoipDatabaseImporter.java
deleted file mode 100644
index 75e29ac..0000000
--- a/src/org/torproject/ernie/cron/network/GeoipDatabaseImporter.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/* Copyright 2011, 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.cron.network;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.sql.Types;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.torproject.ernie.cron.Configuration;
-
-/**
- * Import a Maxmind GeoLite City database to resolve resolve IP addresses
- * to country codes, latitudes, and longitudes.
- */
-public class GeoipDatabaseImporter {
-  public static void main(String[] args) throws IOException,
-      SQLException {
-
-    /* Check if the GeoIP database files are in place. */
-    File locationsFile = new File("GeoLiteCity-Location.csv"),
-        blocksFile = new File("GeoLiteCity-Blocks.csv");
-    if (!locationsFile.exists() || !blocksFile.exists()) {
-      System.out.println("Could not find GeoLiteCity-Location.csv and/or "
-          + "GeoLiteCity-Blocks.csv in the working directory! Exiting!");
-      System.exit(1);
-    }
-
-    /* Initialize configuration to learn JDBC string. */
-    Configuration config = new Configuration();
-    String jdbcString = config.getRelayDescriptorDatabaseJDBC();
-
-    /* Connect to database. */
-    Connection c = DriverManager.getConnection(jdbcString);
-
-    /* Start by reading location information to memory. */
-    BufferedReader br = new BufferedReader(new FileReader(locationsFile));
-    String line;
-    Map<Integer, String> locations = new HashMap<Integer, String>();
-    while ((line = br.readLine()) != null) {
-      if (line.startsWith("Copyright") || line.startsWith("locId")) {
-        continue;
-      }
-      String[] parts = line.split(",");
-      int locId = Integer.parseInt(parts[0]);
-      String country = parts[1].replaceAll("\"", "");
-      String latitude = parts[5];
-      String longitude = parts[6];
-      locations.put(locId, country + "," + latitude + "," + longitude);
-    }
-    br.close();
-
-    /* Parse block information and add it to the database together with
-     * the location information. */
-    PreparedStatement ps = c.prepareStatement("INSERT INTO geoipdb "
-        + "(ipstart, ipend, country, latitude, longitude) VALUES "
-        + "(?, ?, ?, ?, ?)");
-    Statement s = c.createStatement();
-    s.execute("DELETE FROM geoipdb");
-    /* TODO The import takes 30+ minutes.  Perform the import in a single
-     * transaction, or requests will return strange results in these 30+
-     * minutes. */
-    br = new BufferedReader(new FileReader(blocksFile));
-    while ((line = br.readLine()) != null) {
-      if (line.startsWith("Copyright") ||
-          line.startsWith("startIpNum")) {
-        continue;
-      }
-      String[] parts = line.replaceAll("\"", "").split(",");
-      long startIpNum = Long.parseLong(parts[0]);
-      String startIp = "" + startIpNum / 256 / 256 / 256 + "."
-          + startIpNum / 256 / 256 % 256 + "." + startIpNum / 256 % 256
-          + "." + startIpNum % 256;
-      long endIpNum = Long.parseLong(parts[1]);
-      String endIp = "" + endIpNum / 256 / 256 / 256 + "."
-          + endIpNum / 256 / 256 % 256 + "." + endIpNum / 256 % 256 + "."
-          + endIpNum % 256;
-      int locId = Integer.parseInt(parts[2]);
-      if (!locations.containsKey(locId)) {
-        System.out.println("Cannot find locId=" + locId
-            + " in locations file!");
-        continue;
-      }
-      String[] locationParts = locations.get(locId).split(",");
-      String country = locationParts[0];
-      double latitude = Double.parseDouble(locationParts[1]);
-      double longitude = Double.parseDouble(locationParts[2]);
-      ps.setObject(1, startIp, Types.OTHER);
-      ps.setObject(2, endIp, Types.OTHER);
-      ps.setString(3, country);
-      ps.setDouble(4, latitude);
-      ps.setDouble(5, longitude);
-      ps.execute();
-    }
-  }
-}
-





More information about the tor-commits mailing list