[tor-commits] [metrics-tasks/master] Change combined database file header (#6471).

karsten at torproject.org karsten at torproject.org
Tue Nov 6 15:33:21 UTC 2012


commit d84ccdd2a0ad3d3f7e0aed2c65dad3b5e7ad81d0
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Nov 5 20:57:06 2012 -0500

    Change combined database file header (#6471).
    
    File header now contains database dates and file names.
---
 .../src/org/torproject/task6471/DatabaseImpl.java  |    9 ++--
 .../torproject/task6471/DatabaseImporterImpl.java  |   55 ++++++++------------
 .../task6471/DatabasePerformanceExample.java       |    2 +-
 3 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
index 364f0c7..0338376 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
@@ -71,7 +71,7 @@ public class DatabaseImpl implements Database {
   protected SortedSet<Integer> databaseDates = new TreeSet<Integer>();
 
   /**
-   * Database file names.
+   * Database dates and file names, formatted as yyyymmdd!filename.
    */
   protected SortedSet<String> databaseFileNames = new TreeSet<String>();
 
@@ -243,11 +243,10 @@ public class DatabaseImpl implements Database {
         if (line.startsWith("!")) {
 
           /* First read file header containing database dates. */
-          String databaseFileName = line.substring(1);
-          String databaseDateString =
-              databaseFileName.substring(databaseFileName.length() - 8);
+          String[] parts = line.substring(1).split("!");
+          this.databaseFileNames.add(line.substring(1));
+          String databaseDateString = parts[0];
           int dbDate = convertDateStringToNumber(databaseDateString);
-          this.databaseFileNames.add(databaseFileName);
           this.databaseDates.add(dbDate);
         } else {
 
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
index 2a6c203..0f7df77 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
@@ -6,7 +6,6 @@ import java.io.File;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -17,7 +16,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.Stack;
-import java.util.TimeZone;
 import java.util.TreeMap;
 
 public class DatabaseImporterImpl extends DatabaseImpl
@@ -55,12 +53,6 @@ public class DatabaseImporterImpl extends DatabaseImpl
     }
     Collections.sort(allFiles, Collections.reverseOrder());
     for (File file : allFiles) {
-      String databaseFileName = file.getName();
-      if (this.databaseFileNames.contains(databaseFileName)) {
-        /* We already imported this file while loading combined databases
-         * from disk. */
-        continue;
-      }
       if (!this.importRegionalRegistryStatsFile(file)) {
         allImportsSuccessful = false;
       }
@@ -124,8 +116,8 @@ public class DatabaseImporterImpl extends DatabaseImpl
     int databaseDate = convertDateStringToNumber(databaseDateString);
     long startAddress = convertAddressStringToNumber(startAddressString);
     long endAddress = startAddress + addresses - 1L;
-    this.addRange(databaseFileName, databaseDate, startAddress,
-        endAddress, code);
+    this.addDatabase(databaseFileName, databaseDate);
+    this.addRange(databaseDate, startAddress, endAddress, code);
   }
 
   public boolean importGeoLiteCityFileOrDirectory(String path) {
@@ -178,13 +170,11 @@ public class DatabaseImporterImpl extends DatabaseImpl
 
   boolean importGeoLiteCityBlocksAndLocationFiles(File blocksFile,
       File locationFile) {
-    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     long lastModifiedMillis = blocksFile.lastModified();
-    String databaseFileName = blocksFile.getName() + " "
-        + locationFile.getName() + " "
-        + dateFormat.format(lastModifiedMillis);
+    String databaseFileName = blocksFile.getName() + "+"
+        + locationFile.getName();
     int databaseDate = (int) (lastModifiedMillis / 86400000);
+    this.addDatabase(databaseFileName, databaseDate);
     try {
       /* Parse location file first and remember country codes for given
        * locations. */
@@ -221,8 +211,7 @@ public class DatabaseImporterImpl extends DatabaseImpl
           break;
         }
         String code = locations.get(location);
-        this.addRange(databaseFileName, databaseDate, startAddress,
-            endAddress, code);
+        this.addRange(databaseDate, startAddress, endAddress, code);
       }
       br.close();
     } catch (IOException e) {
@@ -257,12 +246,10 @@ public class DatabaseImporterImpl extends DatabaseImpl
   }
 
   private boolean importGeoIPASNum2File(File file) {
-    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
-    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     long lastModifiedMillis = file.lastModified();
-    String databaseFileName = file.getName() + " "
-        + dateFormat.format(lastModifiedMillis);
+    String databaseFileName = file.getName();
     int databaseDate = (int) (lastModifiedMillis / 86400000);
+    this.addDatabase(databaseFileName, databaseDate);
     try {
       BufferedReader br = new BufferedReader(new FileReader(file));
       String line;
@@ -275,8 +262,7 @@ public class DatabaseImporterImpl extends DatabaseImpl
           /* Don't import illegal range. */
           continue;
         }
-        this.addRange(databaseFileName, databaseDate, startAddress,
-            endAddress, code);
+        this.addRange(databaseDate, startAddress, endAddress, code);
       }
       br.close();
       this.repairTree();
@@ -292,6 +278,17 @@ public class DatabaseImporterImpl extends DatabaseImpl
   private int rangeImports = 0, rangeImportsKeyLookups = 0;
 
   /**
+   * Add new database date and file name if we didn't know them yet. */
+  void addDatabase(String databaseFileName, int databaseDate) {
+    if (!this.databaseDates.contains(databaseDate)) {
+      this.databaseDates.add(databaseDate);
+      this.addedDatabaseDate = databaseDate;
+    }
+    this.databaseFileNames.add(convertDateNumberToString(databaseDate)
+        + "!" + databaseFileName);
+  }
+
+  /**
    * Add a single address and date range to the tree, which may require
    * splitting up existing ranges.
    *
@@ -300,18 +297,10 @@ public class DatabaseImporterImpl extends DatabaseImpl
    * is called prior to any lookupAddress() calls.  No further checks are
    * performed that the tree is repaired before looking up an address.
    */
-  void addRange(String databaseFileName, int databaseDate,
-      long startAddress, long endAddress, String code) {
+  void addRange(int databaseDate, long startAddress, long endAddress,
+      String code) {
     this.rangeImports++;
 
-    /* Add new database date and file name if we didn't know them yet,
-     * and note that we need to repair the tree after importing. */
-    if (!this.databaseDates.contains(databaseDate)) {
-      this.databaseDates.add(databaseDate);
-      this.addedDatabaseDate = databaseDate;
-    }
-    this.databaseFileNames.add(databaseFileName);
-
     /* We might have to split existing ranges or the new range before
      * adding it to the tree, and we might have to remove existing ranges.
      * We shouldn't mess with the tree directly while iterating  over it,
diff --git a/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java b/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
index 5f8573a..bdfb140 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
@@ -19,7 +19,7 @@ import java.util.TreeSet;
 public class DatabasePerformanceExample {
   public static void main(String[] args) throws IOException {
 
-    File testCasesCsvFile = new File("test-cases.csv");
+    File testCasesCsvFile = new File("test-cases-2007-10-2012-09.csv");
     if (!testCasesCsvFile.exists()) {
       System.out.print("Generating test cases... ");
       long startMillis = System.currentTimeMillis();





More information about the tor-commits mailing list