[or-cvs] [ernie/master] Add GeoIP database manager in preparation for sanitizing bridge descriptors.

karsten at torproject.org karsten at torproject.org
Tue Mar 23 17:56:49 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Tue, 23 Mar 2010 18:54:04 +0100
Subject: Add GeoIP database manager in preparation for sanitizing bridge descriptors.
Commit: f9562a646295d6873409ebe5570cc028f7fd521d

---
 config                        |   10 +
 src/Configuration.java        |   20 +++
 src/GeoIPDatabaseManager.java |  377 +++++++++++++++++++++++++++++++++++++++++
 src/Main.java                 |   10 +
 4 files changed, 417 insertions(+), 0 deletions(-)
 create mode 100644 src/GeoIPDatabaseManager.java

diff --git a/config b/config
index 18c5ed6..00f6388 100644
--- a/config
+++ b/config
@@ -80,3 +80,13 @@
 ## Download exit list and store it to disk
 #DownloadExitList 0
 
+## Import local Maxmind GeoIP databases
+#ImportGeoIPDatabases 1
+
+## Download (commercial) Maxmind GeoIP database for sanitizing bridge
+## descriptors
+#DownloadGeoIPDatabase 0
+
+## Maxmind license key for downloading commercial GeoIP databases
+#MaxmindLicenseKey NA
+
diff --git a/src/Configuration.java b/src/Configuration.java
index cf8c7d1..0412040 100644
--- a/src/Configuration.java
+++ b/src/Configuration.java
@@ -35,6 +35,9 @@ public class Configuration {
   private String getTorStatsUrl = "http://gettor.torproject.org:8080/"
       + "~gettor/gettor_stats.txt";
   private boolean downloadExitList = false;
+  private boolean importGeoIPDatabases = true;
+  private boolean downloadGeoIPDatabase = false;
+  private String maxmindLicenseKey = "";
   public Configuration() {
     Logger logger = Logger.getLogger(Configuration.class.getName());
     File configFile = new File("config");
@@ -123,6 +126,14 @@ public class Configuration {
         } else if (line.startsWith("DownloadExitList")) {
           this.downloadExitList = Integer.parseInt(
               line.split(" ")[1]) != 0;
+        } else if (line.startsWith("ImportGeoIPDatabases")) {
+          this.importGeoIPDatabases = Integer.parseInt(
+              line.split(" ")[1]) != 0;
+        } else if (line.startsWith("DownloadGeoIPDatabase")) {
+          this.downloadGeoIPDatabase = Integer.parseInt(
+              line.split(" ")[1]) != 0;
+        } else if (line.startsWith("MaxmindLicenseKey")) {
+          this.maxmindLicenseKey = line.split(" ")[1];
         } else {
           logger.severe("Configuration file contains unrecognized "
               + "configuration key in line '" + line + "'! Exiting!");
@@ -208,5 +219,14 @@ public class Configuration {
   public boolean getDownloadExitList() {
     return this.downloadExitList;
   }
+  public boolean getImportGeoIPDatabases() {
+    return this.importGeoIPDatabases;
+  }
+  public boolean getDownloadGeoIPDatabase() {
+    return this.downloadGeoIPDatabase;
+  }
+  public String getMaxmindLicenseKey() {
+    return this.maxmindLicenseKey;
+  }
 }
 
diff --git a/src/GeoIPDatabaseManager.java b/src/GeoIPDatabaseManager.java
new file mode 100644
index 0000000..7438003
--- /dev/null
+++ b/src/GeoIPDatabaseManager.java
@@ -0,0 +1,377 @@
+import java.io.*;
+import java.net.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+import java.util.zip.*;
+
+/**
+ * Maintains multiple versions of GeoIP databases to resolve IP addresses
+ * to country codes using the most recent database at a given time.
+ * Supports importing CSV-formatted databases from disk and downloading
+ * the most recent commercial Maxmind GeoIP database from their server
+ * using a license key.
+ */
+public class GeoIPDatabaseManager {
+
+  /**
+   * Database entry of the combined GeoIP database consisting of start IP
+   * address, end IP address, and countries of all contained database
+   * versions.
+   */
+  private static class DatabaseEntry {
+    
+    /**
+     * Start IP address.
+     */
+    long fromIP;
+
+    /**
+     * End IP address.
+     */
+    long toIP;
+
+    /**
+     * Countries of all contained database versions.
+     */
+    String countries;
+  }
+
+  /**
+   * Mapping from an IP address in decimal form to a database entry.
+   */
+  private SortedMap<Long, DatabaseEntry> combinedDatabase;
+
+  /**
+   * Has the combined database been modified from importing database
+   * versions from disk?
+   */
+  private boolean combinedDatabaseModified;
+
+  /**
+   * File holding the combined GeoIP database.
+   */
+  private File combinedDatabaseFile;
+
+  /**
+   * List of dates representing the GeoIP database versions.
+   */
+  private List<String> allDatabases;
+
+  /**
+   * Timestamp when we last downloaded the GeoIP database from the Maxmind
+   * servers.
+   */
+  private String lastDownloadedTime;
+
+  /**
+   * Logger for this class.
+   */
+  private Logger logger;
+
+  /**
+   * Initializes this class by reading in the database versions known so
+   * far.
+   */
+  public GeoIPDatabaseManager() {
+
+    /* Initialize instance variables. */
+    this.combinedDatabaseFile = new File("stats/geoip-database");
+    this.combinedDatabase = new TreeMap<Long, DatabaseEntry>();
+    this.allDatabases = new ArrayList<String>();
+    this.combinedDatabaseModified = false;
+
+    /* Initialize logger. */
+    this.logger = Logger.getLogger(RelayDescriptorParser.class.getName());
+
+    /* Read in combined GeoIP database. */
+    if (this.combinedDatabaseFile.exists()) {
+      try {
+        this.logger.fine("Reading in "
+            + this.combinedDatabaseFile.getAbsolutePath() + "...");
+        BufferedReader br = new BufferedReader(new FileReader(
+            this.combinedDatabaseFile));
+        String line = null;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("lastDownload")) {
+            this.lastDownloadedTime = line.substring("lastDownload ".
+                length());
+          } else if (line.startsWith("beginIpNum,endIpNum")) {
+            String[] parts = line.split(",");
+            for (int i = 2; i < parts.length; i++) {
+              this.allDatabases.add(parts[i]);
+            }
+          } else {
+            String[] parts = line.split(",");
+            DatabaseEntry e = new DatabaseEntry();
+            e.fromIP = Long.parseLong(parts[0]);
+            e.toIP = Long.parseLong(parts[1]);
+            e.countries = line.substring(line.indexOf(",",
+                line.indexOf(",") + 1));
+            this.combinedDatabase.put(e.fromIP, e);
+          }
+        }
+        br.close();
+        this.logger.fine("Finished reading in "
+            + this.combinedDatabaseFile.getAbsolutePath() + ".");
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Failed to read in "
+            + this.combinedDatabaseFile.getAbsolutePath() + "!", e);
+      }
+    }
+  }
+
+  /**
+   * Downloads today's commercial Maxmind GeoIP database, if such a
+   * database exists, and writes it to disk. This method should be called
+   * before importing GeoIP databases from disk if the new database should
+   * be included in the combined database.
+   */
+  public void downloadGeoIPDatabase(String licenseKey) {
+    if (licenseKey == null || licenseKey.length() < 1) {
+      logger.warning("Missing or invalid license key for downloading "
+          + "GeoIP database!");
+      return;
+    }
+
+    /* Find out when we tried downloading the last time to avoid making
+     * too many download attempts. */
+    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    long now = System.currentTimeMillis();
+    if (this.lastDownloadedTime != null) {
+      long lastDownloaded = -1;
+      try {
+        lastDownloaded = dateTimeFormat.parse(this.lastDownloadedTime).
+            getTime();
+      } catch (ParseException e) {
+        logger.log(Level.WARNING, "Could not parse last downloaded "
+            + "time '" + this.lastDownloadedTime + "'. Ignoring.");
+      }
+      if (lastDownloaded + 8L * 60L * 60L * 1000L > now) {
+        logger.finer("Last GeoIP database download not more than 8 "
+            + "hours in the past. Not downloading.");
+        return;
+      }
+    }
+
+    /* Download GeoIP database. */
+    try {
+      logger.fine("Downloading GeoIP database...");
+      this.lastDownloadedTime = dateTimeFormat.format(now);
+      SimpleDateFormat urlDateFormat = new SimpleDateFormat("yyyyMMdd");
+      urlDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      String date = urlDateFormat.format(now);
+      String url = "http://www.maxmind.com/app/download_new"
+          + "?edition_id=108&date=" + date + "&suffix=zip&license_key="
+          + licenseKey;
+      URL u = new URL(url);
+      HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+      huc.setRequestMethod("GET");
+      huc.connect();
+      int response = huc.getResponseCode();
+      if (response != 200) {
+        logger.fine("Could not download GeoIP database. Response code "
+            + response);
+        return;
+      }
+      BufferedInputStream bis = new BufferedInputStream(
+          huc.getInputStream());
+      ZipInputStream zis = new ZipInputStream(bis);
+      BufferedInputStream bis2 = new BufferedInputStream(zis);
+      ZipEntry entry = null;
+      while ((entry = zis.getNextEntry()) != null) {
+        if (!entry.isDirectory() &&
+            entry.getName().endsWith("GeoIP-108.csv")) {
+          String filename = "geoipdb/GeoIP-108_" + date + ".csv";
+          OutputStream out = new BufferedOutputStream(
+              new FileOutputStream(filename));
+          byte[] buffer = new byte[1024];
+          int len;
+          while ((len = bis2.read(buffer)) >= 0) {
+            out.write(buffer, 0, len);
+          }
+          out.close();
+        }
+      }
+      zis.close();
+    } catch (IOException e) {
+      this.logger.log(Level.WARNING, "Could not download GeoIP database. "
+          + "Exiting.", e);
+      return;
+    }
+  }
+
+  /**
+   * Imports the GeoIP databases from <code>directory</code> to include
+   * them in the combined GeoIP database.
+   */
+  public void importGeoIPDatabaseFromDisk(String directory) {
+    File databaseDirectory = new File(directory);
+    if (!databaseDirectory.exists()) {
+      return;
+    }
+    for (File databaseFile : databaseDirectory.listFiles()) {
+      String filename = databaseFile.getName();
+      if (!filename.startsWith("GeoIP-108_") ||
+          filename.length() != "GeoIP-108_xxxxxxxx.csv".length() ||
+          !filename.endsWith(".csv")) {
+        continue;
+      }
+      String date = filename.substring("GeoIP-108_".length(),
+          "GeoIP-108_xxxxxxxx".length());
+      if (allDatabases.contains(date)) {
+        continue;
+      }
+      this.combinedDatabaseModified = true;
+      this.logger.fine("Reading in " + filename);
+      String emptyCountryString = "";
+      for (int i = 0; i < this.allDatabases.size(); i++) {
+        emptyCountryString += ",ZZ";
+      }
+      try {
+        BufferedReader br = new BufferedReader(new FileReader(
+            databaseFile));
+        String line = null;
+        while ((line = br.readLine()) != null) {
+          if (line.startsWith("Copyright") ||
+              line.startsWith("\"begin")) {
+            continue;
+          }
+          String lineWithoutQuotes = line.replaceAll("\"", "");
+          String[] parts = lineWithoutQuotes.split(",");
+          lineWithoutQuotes = null; // does this help GC?
+          long fromIP = Long.parseLong(parts[2]);
+          long toIP = Long.parseLong(parts[3]);
+          String countryCode = parts[4];
+          SortedMap<Long, DatabaseEntry> submap =
+              combinedDatabase.headMap(toIP + 1L);
+          if (!submap.headMap(fromIP + 1L).isEmpty()) {
+            submap = submap.tailMap(submap.headMap(fromIP + 1L).
+                lastKey());
+          }
+          Set<DatabaseEntry> newEntries = new HashSet<DatabaseEntry>();
+          for (DatabaseEntry e : submap.values()) {
+            while (fromIP <= toIP && fromIP <= e.toIP &&
+                toIP >= e.fromIP) {
+              if (fromIP < e.fromIP) {
+                // duplicate entry: new entry fromIP-e.fromIP, set fromIP
+                // to e.fromIP
+                DatabaseEntry e1 = new DatabaseEntry();
+                e1.fromIP = fromIP;
+                e1.toIP = e.fromIP - 1L;
+                e1.countries = e.countries;
+                newEntries.add(e1);
+                fromIP = e.fromIP;
+              } else if (fromIP > e.fromIP) {
+                // split off existing entry; don't add yet
+                DatabaseEntry e1 = new DatabaseEntry();
+                e1.fromIP = e.fromIP;
+                e1.toIP = fromIP - 1L;
+                e1.countries = e.countries;
+                newEntries.add(e1);
+                e.fromIP = fromIP;
+                newEntries.add(e);
+              } else if (toIP < e.toIP) {
+                // split and add to first half
+                DatabaseEntry e1 = new DatabaseEntry();
+                e1.fromIP = toIP + 1L;
+                e1.toIP = e.toIP;
+                e1.countries = e.countries;
+                newEntries.add(e1);
+                e.toIP = e1.fromIP - 1L;
+                e.countries += "," + countryCode;
+                fromIP = toIP + 1L;
+              } else if (toIP >= e.toIP) {
+                // add to this entry and done, right?
+                e.countries += "," + countryCode;
+                fromIP = toIP + 1L;
+              }
+            }
+          }
+          if (fromIP <= toIP) {
+            DatabaseEntry entry = new DatabaseEntry();
+            entry.fromIP = fromIP;
+            entry.toIP = toIP;
+            entry.countries = emptyCountryString + "," + countryCode;
+            newEntries.add(entry);
+            fromIP = toIP + 1L;
+          }
+          for (DatabaseEntry e : newEntries) {
+            this.combinedDatabase.put(e.fromIP, e);
+          }
+        }
+        this.allDatabases.add(date);
+        for (DatabaseEntry e : this.combinedDatabase.values()) {
+          if (e.countries.substring(1).split(",").length <
+              this.allDatabases.size()) {
+            e.countries += ",ZZ";
+          }
+        }
+      } catch (IOException e) {
+        this.logger.log(Level.WARNING, "Could not import GeoIP database "
+            + "from file " + databaseFile.getAbsolutePath()
+            + ". This might leave us with an inconsistent state!");
+      }
+    }
+  }
+
+  public void writeCombinedDatabase() {
+    if (!combinedDatabaseModified) {
+      return;
+    }
+    try {
+      this.logger.fine("Writing "
+          + this.combinedDatabaseFile.getAbsolutePath() + "...");
+      BufferedWriter bw = new BufferedWriter(new FileWriter(
+          this.combinedDatabaseFile));
+      bw.write("lastDownload " + this.lastDownloadedTime + "\n");
+      bw.write("beginIpNum,endIpNum");
+      for (String d : allDatabases) {
+        bw.write("," + d);
+      }
+      bw.write("\n");
+      for (DatabaseEntry e : this.combinedDatabase.values()) {
+        bw.write(e.fromIP + "," + e.toIP + e.countries + "\n");
+      }
+      bw.close();
+      this.logger.fine("Finished writing "
+          + this.combinedDatabaseFile.getAbsolutePath() + ".");
+    } catch (IOException e) {
+      this.logger.log(Level.WARNING, "Failed to write "
+          + this.combinedDatabaseFile.getAbsolutePath() + "!");
+    }
+  }
+
+  /**
+   * Returns the uppercase two-letter country code that was assigned to
+   * <code>ipAddress</code> (in dotted notation) in the most recent
+   * commercial Maxmind GeoIP database published at least 1 day before
+   * <code>date</code> (in the format yyyy-MM-dd).
+   */
+  public String getCountryForIP(String ipAddress, String date) {
+    String[] parts = ipAddress.split("\\.");
+    long ipNum = Long.parseLong(parts[0]) * 256 * 256 * 256 +
+        Long.parseLong(parts[1]) * 256 * 256 +
+        Long.parseLong(parts[2]) * 256 + Long.parseLong(parts[3]);
+    String countries = null;
+    if (this.combinedDatabase.containsKey(ipNum)) {
+      countries = this.combinedDatabase.get(ipNum).countries;
+    } else if (!this.combinedDatabase.headMap(ipNum).isEmpty()) {
+      countries = this.combinedDatabase.get(this.combinedDatabase.headMap(
+          ipNum).lastKey()).countries;
+    } else {
+      return "ZZ";
+    }
+    String dateShort = date.substring(0, 4) + date.substring(5, 7)
+        + date.substring(8, 10);
+    SortedSet<String> subset = new TreeSet<String>(this.allDatabases).
+        headSet(dateShort);
+    if (subset.isEmpty()) {
+      return "ZZ";
+    }
+    int index = allDatabases.indexOf(subset.last());
+    return countries.substring(1).split(",")[index];
+  }
+}
diff --git a/src/Main.java b/src/Main.java
index 0990ca1..08112f6 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -95,6 +95,16 @@ public class Main {
       aw = null;
     }
 
+    // Import/download GeoIP databases
+    GeoIPDatabaseManager gd = new GeoIPDatabaseManager();
+    if (config.getDownloadGeoIPDatabase()) {
+      gd.downloadGeoIPDatabase(config.getMaxmindLicenseKey());
+    }
+    if (config.getImportGeoIPDatabases()) {
+      gd.importGeoIPDatabaseFromDisk("geoipdb/");
+      gd.writeCombinedDatabase();
+    }
+
     // Prepare bridge descriptor parser
     BridgeDescriptorParser bdp = config.getWriteConsensusStats() &&
         config.getWriteBridgeStats() ? new BridgeDescriptorParser(
-- 
1.6.5



More information about the tor-commits mailing list