commit d200e5911e850748c87c5f160519b9c61b95adbd Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Nov 5 23:10:53 2012 -0500
Take database dates from directory names (#6471).
Last modified times of .csv files do not always match publication dates. --- .../org/torproject/task6471/ConvertExample.java | 4 +- .../org/torproject/task6471/DatabaseImporter.java | 8 +++--- .../torproject/task6471/DatabaseImporterImpl.java | 21 ++++++++++++++----- 3 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/task-6471/java/src/org/torproject/task6471/ConvertExample.java b/task-6471/java/src/org/torproject/task6471/ConvertExample.java index c96047c..c4ef4c6 100644 --- a/task-6471/java/src/org/torproject/task6471/ConvertExample.java +++ b/task-6471/java/src/org/torproject/task6471/ConvertExample.java @@ -11,7 +11,7 @@ public class ConvertExample {
System.out.print("Saving combined ASN database to disk... "); startMillis = endMillis; - combinedDatabase.saveCombinedDatabases("asn-2012-07-2012-10.csv"); + combinedDatabase.saveCombinedDatabases("asn-2005-09-2012-11.csv"); endMillis = System.currentTimeMillis(); System.out.println((endMillis - startMillis) + " millis."); startMillis = endMillis; @@ -25,7 +25,7 @@ public class ConvertExample {
System.out.print("Saving combined city database to disk... "); startMillis = endMillis; - combinedDatabase.saveCombinedDatabases("city-2012-07-2012-10.csv"); + combinedDatabase.saveCombinedDatabases("city-2009-06-2012-10.csv"); endMillis = System.currentTimeMillis(); System.out.println((endMillis - startMillis) + " millis."); startMillis = endMillis; diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java index 330ecea..0d4ac92 100644 --- a/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java +++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporter.java @@ -32,8 +32,8 @@ public interface DatabaseImporter extends Database { * address ranges and block numbers, and GeoLiteCity-Location.csv * contains country codes for block numbers, among other things. Only * the range start and end addresses and the country code are imported. - * The database date is taken from the file modification time of the - * GeoLiteCity-Blocks.csv file. + * The database date is taken from the directory name containing blocks + * and location file. * * A typical entry from the GeoLiteCity-Blocks.csv file is: * ""3758093312","3758094335","108612"" @@ -46,8 +46,8 @@ public interface DatabaseImporter extends Database { /** * Import the contents of one or more Maxmind GeoIPASNum2.csv databases. * Only the range start and end addresses and the AS number are - * imported. The database date is taken from the file modification - * time. + * imported. The database date is taken from the directory name which + * is expected to be yyyy-mm/, e.g., 2012-11/GeoIPASNum2.csv. * * A typical entry from such a database file is: * "3758063616,3758079999,"AS9381 Wharf T&T Ltd."" diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java index 407d8ca..4d15827 100644 --- a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java +++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java @@ -170,10 +170,12 @@ public class DatabaseImporterImpl extends DatabaseImpl
boolean importGeoLiteCityBlocksAndLocationFiles(File blocksFile, File locationFile) { - long lastModifiedMillis = blocksFile.lastModified(); String databaseFileName = blocksFile.getName() + "+" + locationFile.getName(); - int databaseDate = (int) (lastModifiedMillis / 86400000); + String databaseDateString = + blocksFile.getParentFile().getName().substring( + "GeoLiteCity_".length()); + int databaseDate = convertDateStringToNumber(databaseDateString); this.addDatabase(databaseFileName, databaseDate); try { /* Parse location file first and remember country codes for given @@ -262,17 +264,24 @@ public class DatabaseImporterImpl extends DatabaseImpl }
private boolean importGeoIPASNum2File(File file) { - long lastModifiedMillis = file.lastModified(); String databaseFileName = file.getName(); - int databaseDate = (int) (lastModifiedMillis / 86400000); + String databaseDateString = + file.getParentFile().getName().replaceAll("-", "") + "01"; + int databaseDate = convertDateStringToNumber(databaseDateString); this.addDatabase(databaseFileName, databaseDate); try { BufferedReader br = new BufferedReader(new FileReader(file)); String line; while ((line = br.readLine()) != null) { String[] parts = line.split(","); - long startAddress = Long.parseLong(parts[0]), - endAddress = Long.parseLong(parts[1]); + try { + Long.parseLong(parts[0].trim()); + Long.parseLong(parts[1].trim()); + } catch (NumberFormatException e) { + System.err.println(file.getAbsolutePath() + " '" + line + "'"); + } + long startAddress = Long.parseLong(parts[0].trim()), + endAddress = Long.parseLong(parts[1].trim()); String code = parts[2].split(" ")[0].replaceAll(""", ""); if (!code.startsWith("AS")) { /* Don't import illegal range. */
tor-commits@lists.torproject.org