commit 2adef6754cc637da8555d7c5d4592c1ae32289fa
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Feb 11 09:38:00 2015 +0100
Adapt to MaxMind's new GeoLite2 City format.
MaxMind has changed their file format for GeoLite2 City database files.
They split up blocks into IPv4 and IPv6 addresses, and they now support
different locales. We need to update our parsing code to read their new
formats. At the same time we should drop support for the old format in
order to remove otherwise dead code.
---
.../torproject/onionoo/updater/LookupService.java | 103 ++++++------
.../org/torproject/onionoo/LookupServiceTest.java | 174 ++++++++++----------
2 files changed, 143 insertions(+), 134 deletions(-)
diff --git a/src/main/java/org/torproject/onionoo/updater/LookupService.java b/src/main/java/org/torproject/onionoo/updater/LookupService.java
index 22560b2..85e29f4 100644
--- a/src/main/java/org/torproject/onionoo/updater/LookupService.java
+++ b/src/main/java/org/torproject/onionoo/updater/LookupService.java
@@ -27,8 +27,8 @@ public class LookupService {
LookupService.class);
private File geoipDir;
- private File geoLite2CityBlocksCsvFile;
- private File geoLite2CityLocationsCsvFile;
+ private File geoLite2CityBlocksIPv4CsvFile;
+ private File geoLite2CityLocationsEnCsvFile;
private File geoIPASNum2CsvFile;
private boolean hasAllFiles = false;
public LookupService(File geoipDir) {
@@ -38,16 +38,16 @@ public class LookupService {
/* Make sure we have all required .csv files. */
private void findRequiredCsvFiles() {
- this.geoLite2CityBlocksCsvFile = new File(this.geoipDir,
- "GeoLite2-City-Blocks.csv");
- if (!this.geoLite2CityBlocksCsvFile.exists()) {
- log.error("No GeoLite2-City-Blocks.csv file in geoip/.");
+ this.geoLite2CityBlocksIPv4CsvFile = new File(this.geoipDir,
+ "GeoLite2-City-Blocks-IPv4.csv");
+ if (!this.geoLite2CityBlocksIPv4CsvFile.exists()) {
+ log.error("No GeoLite2-City-Blocks-IPv4.csv file in geoip/.");
return;
}
- this.geoLite2CityLocationsCsvFile = new File(this.geoipDir,
- "GeoLite2-City-Locations.csv");
- if (!this.geoLite2CityLocationsCsvFile.exists()) {
- log.error("No GeoLite2-City-Locations.csv file in "
+ this.geoLite2CityLocationsEnCsvFile = new File(this.geoipDir,
+ "GeoLite2-City-Locations-en.csv");
+ if (!this.geoLite2CityLocationsEnCsvFile.exists()) {
+ log.error("No GeoLite2-City-Locations-en.csv file in "
+ "geoip/.");
return;
}
@@ -115,59 +115,61 @@ public class LookupService {
SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
addressStringNumbers.values());
BufferedReader br = new BufferedReader(new InputStreamReader(
- new FileInputStream(geoLite2CityBlocksCsvFile), "ISO-8859-1"));
+ new FileInputStream(this.geoLite2CityBlocksIPv4CsvFile),
+ "ISO-8859-1"));
String line = br.readLine();
while ((line = br.readLine()) != null) {
- if (!line.startsWith("::ffff:")) {
- /* TODO Make this less hacky and IPv6-ready at some point. */
- continue;
- }
- String[] parts = line.replaceAll("\"", "").split(",", 10);
- if (parts.length != 10) {
+ String[] parts = line.split(",", 9);
+ if (parts.length != 9) {
log.error("Illegal line '" + line + "' in "
- + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
+ + this.geoLite2CityBlocksIPv4CsvFile.getAbsolutePath()
+ + ".");
br.close();
return lookupResults;
}
try {
- String startAddressString = parts[0].substring(7); /* ::ffff: */
+ String[] networkAddressAndMask = parts[0].split("/");
+ String startAddressString = networkAddressAndMask[0];
long startIpNum = this.parseAddressString(startAddressString);
if (startIpNum < 0L) {
- log.error("Illegal IP address in '" + line
- + "' in " + geoLite2CityBlocksCsvFile.getAbsolutePath()
+ log.error("Illegal IP address in '" + line + "' in "
+ + this.geoLite2CityBlocksIPv4CsvFile.getAbsolutePath()
+ ".");
br.close();
return lookupResults;
}
- int networkMaskLength = Integer.parseInt(parts[1]);
- if (networkMaskLength < 96 || networkMaskLength > 128) {
- log.error("Illegal network mask in '" + line
- + "' in " + geoLite2CityBlocksCsvFile.getAbsolutePath()
+ int networkMaskLength = networkAddressAndMask.length < 2 ? 0
+ : Integer.parseInt(networkAddressAndMask[1]);
+ if (networkMaskLength < 8 || networkMaskLength > 32) {
+ log.error("Missing or illegal network mask in '" + line
+ + "' in "
+ + this.geoLite2CityBlocksIPv4CsvFile.getAbsolutePath()
+ ".");
br.close();
return lookupResults;
}
- if (parts[2].length() == 0 && parts[3].length() == 0) {
+ if (parts[1].length() == 0 && parts[2].length() == 0) {
continue;
}
- long endIpNum = startIpNum + (1 << (128 - networkMaskLength))
+ long endIpNum = startIpNum + (1 << (32 - networkMaskLength))
- 1;
for (long addressNumber : sortedAddressNumbers.
tailSet(startIpNum).headSet(endIpNum + 1L)) {
- String blockString = parts[2].length() > 0 ? parts[2] :
- parts[3];
+ String blockString = parts[1].length() > 0 ? parts[1] :
+ parts[2];
long blockNumber = Long.parseLong(blockString);
addressNumberBlocks.put(addressNumber, blockNumber);
- if (parts[6].length() > 0 && parts[7].length() > 0) {
+ if (parts[7].length() > 0 && parts[8].length() > 0) {
addressNumberLatLong.put(addressNumber,
- new Float[] { Float.parseFloat(parts[6]),
- Float.parseFloat(parts[7]) });
+ new Float[] { Float.parseFloat(parts[7]),
+ Float.parseFloat(parts[8]) });
}
}
} catch (NumberFormatException e) {
- log.error("Number format exception while parsing line "
- + "'" + line + "' in "
- + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
+ log.error("Number format exception while parsing line '" + line
+ + "' in "
+ + this.geoLite2CityBlocksIPv4CsvFile.getAbsolutePath()
+ + ".");
br.close();
return lookupResults;
}
@@ -175,7 +177,7 @@ public class LookupService {
br.close();
} catch (IOException e) {
log.error("I/O exception while reading "
- + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
+ + this.geoLite2CityBlocksIPv4CsvFile.getAbsolutePath() + ".");
return lookupResults;
}
@@ -185,17 +187,19 @@ public class LookupService {
Set<Long> blockNumbers = new HashSet<Long>(
addressNumberBlocks.values());
BufferedReader br = new BufferedReader(new InputStreamReader(
- new FileInputStream(geoLite2CityLocationsCsvFile),
+ new FileInputStream(this.geoLite2CityLocationsEnCsvFile),
"ISO-8859-1"));
String line = br.readLine();
while ((line = br.readLine()) != null) {
- String[] parts = line.replaceAll("\"", "").split(",", 10);
- if (parts.length != 10) {
+ String[] parts = line.replaceAll("\"", "").split(",", 13);
+ if (parts.length != 13) {
log.error("Illegal line '" + line + "' in "
- + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
+ + this.geoLite2CityLocationsEnCsvFile.getAbsolutePath()
+ + ".");
br.close();
return lookupResults;
}
+
try {
long locId = Long.parseLong(parts[0]);
if (blockNumbers.contains(locId)) {
@@ -204,7 +208,8 @@ public class LookupService {
} catch (NumberFormatException e) {
log.error("Number format exception while parsing line "
+ "'" + line + "' in "
- + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
+ + this.geoLite2CityLocationsEnCsvFile.getAbsolutePath()
+ + ".");
br.close();
return lookupResults;
}
@@ -212,7 +217,7 @@ public class LookupService {
br.close();
} catch (IOException e) {
log.error("I/O exception while reading "
- + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
+ + this.geoLite2CityLocationsEnCsvFile.getAbsolutePath() + ".");
return lookupResults;
}
@@ -301,15 +306,15 @@ public class LookupService {
if (blockLocations.containsKey(blockNumber)) {
String[] parts = blockLocations.get(blockNumber).
replaceAll("\"", "").split(",", -1);
- lookupResult.setCountryCode(parts[3].toLowerCase());
- if (parts[4].length() > 0) {
- lookupResult.setCountryName(parts[4]);
- }
- if (parts[6].length() > 0) {
- lookupResult.setRegionName(parts[6]);
+ lookupResult.setCountryCode(parts[4].toLowerCase());
+ if (parts[5].length() > 0) {
+ lookupResult.setCountryName(parts[5]);
}
if (parts[7].length() > 0) {
- lookupResult.setCityName(parts[7]);
+ lookupResult.setRegionName(parts[7]);
+ }
+ if (parts[10].length() > 0) {
+ lookupResult.setCityName(parts[10]);
}
}
}
diff --git a/src/test/java/org/torproject/onionoo/LookupServiceTest.java b/src/test/java/org/torproject/onionoo/LookupServiceTest.java
index 052b4c0..008b21c 100644
--- a/src/test/java/org/torproject/onionoo/LookupServiceTest.java
+++ b/src/test/java/org/torproject/onionoo/LookupServiceTest.java
@@ -27,8 +27,8 @@ import org.torproject.onionoo.updater.LookupService;
public class LookupServiceTest {
- private List<String> geoLite2CityBlocksLines,
- geoLite2CityLocationsLines, geoipASNum2Lines;
+ private List<String> geoLite2CityBlocksIPv4Lines,
+ geoLite2CityLocationsEnLines, geoipASNum2Lines;
private LookupService lookupService;
@@ -37,27 +37,28 @@ public class LookupServiceTest {
private SortedMap<String, LookupResult> lookupResults;
private void populateLines() {
- this.geoLite2CityBlocksLines = new ArrayList<String>();
- this.geoLite2CityBlocksLines.add("network_start_ip,"
- + "network_mask_length,geoname_id,registered_country_geoname_id,"
- + "represented_country_geoname_id,postal_code,latitude,longitude,"
- + "is_anonymous_proxy,is_satellite_provider");
- this.geoLite2CityBlocksLines.add("::ffff:8.8.9.0,120,6252001,6252001,"
- + ",,38.0000,-97.0000,0,0");
- this.geoLite2CityBlocksLines.add("::ffff:8.8.8.0,120,5375480,6252001,"
- + ",94043,37.3860,-122.0838,0,0");
- this.geoLite2CityBlocksLines.add("::ffff:8.8.7.0,120,6252001,6252001,"
- + ",,38.0000,-97.0000,0,0");
- this.geoLite2CityLocationsLines = new ArrayList<String>();
- this.geoLite2CityLocationsLines.add("geoname_id,continent_code,"
- + "continent_name,country_iso_code,country_name,"
- + "subdivision_iso_code,subdivision_name,city_name,metro_code,"
- + "time_zone");
- this.geoLite2CityLocationsLines.add("6252001,NA,\"North America\",US,"
- + "\"United States\",,,,,");
- this.geoLite2CityLocationsLines.add("5375480,NA,\"North America\",US,"
- + "\"United States\",CA,California,\"Mountain View\",807,"
- + "America/Los_Angeles");
+ this.geoLite2CityBlocksIPv4Lines = new ArrayList<String>();
+ this.geoLite2CityBlocksIPv4Lines.add("network,geoname_id,"
+ + "registered_country_geoname_id,represented_country_geoname_id,"
+ + "is_anonymous_proxy,is_satellite_provider,postal_code,latitude,"
+ + "longitude");
+ this.geoLite2CityBlocksIPv4Lines.add("8.8.0.0/21,6252001,6252001,,0,"
+ + "0,,38.0000,-97.0000");
+ this.geoLite2CityBlocksIPv4Lines.add("8.8.8.0/24,5375480,6252001,,0,"
+ + "0,94035,37.3860,-122.0838");
+ this.geoLite2CityBlocksIPv4Lines.add("8.8.9.0/24,6252001,6252001,,0,"
+ + "0,,38.0000,-97.0000");
+ this.geoLite2CityLocationsEnLines = new ArrayList<String>();
+ this.geoLite2CityLocationsEnLines.add("geoname_id,locale_code,"
+ + "continent_code,continent_name,country_iso_code,country_name,"
+ + "subdivision_1_iso_code,subdivision_1_name,"
+ + "subdivision_2_iso_code,subdivision_2_name,city_name,"
+ + "metro_code,time_zone");
+ this.geoLite2CityLocationsEnLines.add("6252001,en,NA,"
+ + "\"North America\",US,\"United States\",,,,,,,");
+ this.geoLite2CityLocationsEnLines.add("5375480,en,NA,"
+ + "\"North America\",US,\"United States\",CA,California,,,"
+ + "\"Mountain View\",807,America/Los_Angeles");
this.geoipASNum2Lines = new ArrayList<String>();
this.geoipASNum2Lines.add("134743296,134744063,\"AS3356 Level 3 "
+ "Communications\"");
@@ -69,10 +70,10 @@ public class LookupServiceTest {
private void writeCsvFiles() {
try {
- this.writeCsvFile(this.geoLite2CityBlocksLines,
- "GeoLite2-City-Blocks.csv");
- this.writeCsvFile(this.geoLite2CityLocationsLines,
- "GeoLite2-City-Locations.csv");
+ this.writeCsvFile(this.geoLite2CityBlocksIPv4Lines,
+ "GeoLite2-City-Blocks-IPv4.csv");
+ this.writeCsvFile(this.geoLite2CityLocationsEnLines,
+ "GeoLite2-City-Locations-en.csv");
this.writeCsvFile(this.geoipASNum2Lines, "GeoIPASNum2.csv");
} catch (IOException e) {
throw new RuntimeException(e);
@@ -105,10 +106,10 @@ public class LookupServiceTest {
this.addressStrings.add(addressString);
this.populateLines();
if (geoLite2CityBlocksLines != null) {
- this.geoLite2CityBlocksLines = geoLite2CityBlocksLines;
+ this.geoLite2CityBlocksIPv4Lines = geoLite2CityBlocksLines;
}
if (geoLite2CityLocationsLines != null) {
- this.geoLite2CityLocationsLines = geoLite2CityLocationsLines;
+ this.geoLite2CityLocationsEnLines = geoLite2CityLocationsLines;
}
if (geoipASNum2Lines != null) {
this.geoipASNum2Lines = geoipASNum2Lines;
@@ -247,96 +248,99 @@ public class LookupServiceTest {
@Test()
public void testLookupNoCorrespondingLocation() {
- List<String> geoLite2CityLocationsLines = new ArrayList<String>();
- geoLite2CityLocationsLines.add("geoname_id,continent_code,"
- + "continent_name,country_iso_code,country_name,"
- + "subdivision_iso_code,subdivision_name,city_name,metro_code,"
- + "time_zone");
- geoLite2CityLocationsLines.add("6252001,NA,\"North America\",US,"
- + "\"United States\",,,,,");
- this.assertLookupResult(null, geoLite2CityLocationsLines, null,
+ List<String> geoLite2CityLocationsEnLines = new ArrayList<String>();
+ geoLite2CityLocationsEnLines.add("geoname_id,locale_code,"
+ + "continent_code,continent_name,country_iso_code,country_name,"
+ + "subdivision_1_iso_code,subdivision_1_name,"
+ + "subdivision_2_iso_code,subdivision_2_name,city_name,"
+ + "metro_code,time_zone");
+ geoLite2CityLocationsEnLines.add("6252001,en,NA,"
+ + "\"North America\",US,\"United States\",,,,,,,");
+ this.assertLookupResult(null, geoLite2CityLocationsEnLines, null,
"8.8.8.8", null, null, null, null, 37.3860f, -122.0838f,
"AS15169", "Google Inc.");
}
@Test()
public void testLookupBlocksStartNotANumber() {
- List<String> geoLite2CityBlocksLines = new ArrayList<String>();
- geoLite2CityBlocksLines.add("network_start_ip,"
- + "network_mask_length,geoname_id,registered_country_geoname_id,"
- + "represented_country_geoname_id,postal_code,latitude,longitude,"
- + "is_anonymous_proxy,is_satellite_provider");
- geoLite2CityBlocksLines.add("::ffff:one,120,5375480,6252001,,94043,"
- + "37.3860,-122.0838,0,0");
+ List<String> geoLite2CityBlocksIPv4Lines = new ArrayList<String>();
+ geoLite2CityBlocksIPv4Lines.add("network,geoname_id,"
+ + "registered_country_geoname_id,represented_country_geoname_id,"
+ + "is_anonymous_proxy,is_satellite_provider,postal_code,latitude,"
+ + "longitude");
+ geoLite2CityBlocksIPv4Lines.add("one/24,5375480,6252001,,0,"
+ + "0,94035,37.3860,-122.0838");
this.assertLookupResult(
- geoLite2CityBlocksLines, null, null,
+ geoLite2CityBlocksIPv4Lines, null, null,
"8.8.8.8", null, null, null, null, null, null, null, null);
}
@Test()
public void testLookupBlocksLocationX() {
- List<String> geoLite2CityBlocksLines = new ArrayList<String>();
- geoLite2CityBlocksLines.add("network_start_ip,"
- + "network_mask_length,geoname_id,registered_country_geoname_id,"
- + "represented_country_geoname_id,postal_code,latitude,longitude,"
- + "is_anonymous_proxy,is_satellite_provider");
- geoLite2CityBlocksLines.add("::ffff:8.8.8.0,120,X,X,,94043,37.3860,"
- + "-122.0838,0,0");
- this.assertLookupResult(geoLite2CityBlocksLines, null, null,
+ List<String> geoLite2CityBlocksIPv4Lines = new ArrayList<String>();
+ geoLite2CityBlocksIPv4Lines.add("network,geoname_id,"
+ + "registered_country_geoname_id,represented_country_geoname_id,"
+ + "is_anonymous_proxy,is_satellite_provider,postal_code,latitude,"
+ + "longitude");
+ geoLite2CityBlocksIPv4Lines.add("8.8.8.0/24,X,X,,0,0,94035,37.3860,"
+ + "-122.0838");
+ this.assertLookupResult(geoLite2CityBlocksIPv4Lines, null, null,
"8.8.8.8", null, null, null, null, null, null, null, null);
}
@Test()
public void testLookupBlocksLocationEmpty() {
- List<String> geoLite2CityBlocksLines = new ArrayList<String>();
- geoLite2CityBlocksLines.add("network_start_ip,"
- + "network_mask_length,geoname_id,registered_country_geoname_id,"
- + "represented_country_geoname_id,postal_code,latitude,longitude,"
- + "is_anonymous_proxy,is_satellite_provider");
- geoLite2CityBlocksLines.add("::ffff:8.8.8.0,120,,,,,,,1,0");
- this.assertLookupResult(geoLite2CityBlocksLines, null, null,
+ List<String> geoLite2CityBlocksIPv4Lines = new ArrayList<String>();
+ geoLite2CityBlocksIPv4Lines.add("network,geoname_id,"
+ + "registered_country_geoname_id,represented_country_geoname_id,"
+ + "is_anonymous_proxy,is_satellite_provider,postal_code,latitude,"
+ + "longitude");
+ geoLite2CityBlocksIPv4Lines.add("8.8.8.0/24,,,,0,0,,,");
+ this.assertLookupResult(geoLite2CityBlocksIPv4Lines, null, null,
"8.8.8.8", null, null, null, null, null, null, "AS15169",
"Google Inc.");
}
@Test()
public void testLookupBlocksTooFewFields() {
- List<String> geoLite2CityBlocksLines = new ArrayList<String>();
- geoLite2CityBlocksLines.add("network_start_ip,"
- + "network_mask_length,geoname_id,registered_country_geoname_id,"
- + "represented_country_geoname_id,postal_code,latitude,longitude,"
- + "is_anonymous_proxy,is_satellite_provider");
- geoLite2CityBlocksLines.add("::ffff:8.8.8.0,120,5375480,6252001,"
- + ",94043,37.3860,-122.0838,0");
- this.assertLookupResult(geoLite2CityBlocksLines, null, null,
+ List<String> geoLite2CityBlocksIPv4Lines = new ArrayList<String>();
+ geoLite2CityBlocksIPv4Lines.add("network,geoname_id,"
+ + "registered_country_geoname_id,represented_country_geoname_id,"
+ + "is_anonymous_proxy,is_satellite_provider,postal_code,latitude,"
+ + "longitude");
+ geoLite2CityBlocksIPv4Lines.add("8.8.8.0/24,5375480,6252001,,0,"
+ + "0,94035,37.3860");
+ this.assertLookupResult(geoLite2CityBlocksIPv4Lines, null, null,
"8.8.8.8", null, null, null, null, null, null, null, null);
}
@Test()
public void testLookupLocationLocIdNotANumber() {
- List<String> geoLite2CityLocationsLines = new ArrayList<String>();
- geoLite2CityLocationsLines = new ArrayList<String>();
- geoLite2CityLocationsLines.add("geoname_id,continent_code,"
- + "continent_name,country_iso_code,country_name,"
- + "subdivision_iso_code,subdivision_name,city_name,metro_code,"
- + "time_zone");
- geoLite2CityLocationsLines.add("threetwoonenineone,NA,"
- + "\"North America\",US,\"United States\",CA,California,"
+ List<String> geoLite2CityLocationsEnLines = new ArrayList<String>();
+ geoLite2CityLocationsEnLines.add("geoname_id,locale_code,"
+ + "continent_code,continent_name,country_iso_code,country_name,"
+ + "subdivision_1_iso_code,subdivision_1_name,"
+ + "subdivision_2_iso_code,subdivision_2_name,city_name,"
+ + "metro_code,time_zone");
+ geoLite2CityLocationsEnLines.add("threetwoonenineone,en,NA,"
+ + "\"North America\",US,\"United States\",CA,California,,,"
+ "\"Mountain View\",807,America/Los_Angeles");
- this.assertLookupResult(null, geoLite2CityLocationsLines, null,
+ this.assertLookupResult(null, geoLite2CityLocationsEnLines, null,
"8.8.8.8", null, null, null, null, null, null, null, null);
}
@Test()
public void testLookupLocationTooFewFields() {
- List<String> geoLite2CityLocationsLines = new ArrayList<String>();
- geoLite2CityLocationsLines.add("geoname_id,continent_code,"
- + "continent_name,country_iso_code,country_name,"
- + "subdivision_iso_code,subdivision_name,city_name,metro_code,"
- + "time_zone");
- geoLite2CityLocationsLines.add("5375480,NA,\"North America\",US,"
- + "\"United States\",CA,California,\"Mountain View\",807");
- this.assertLookupResult(null, geoLite2CityLocationsLines, null,
+ List<String> geoLite2CityLocationsEnLines = new ArrayList<String>();
+ geoLite2CityLocationsEnLines.add("geoname_id,locale_code,"
+ + "continent_code,continent_name,country_iso_code,country_name,"
+ + "subdivision_1_iso_code,subdivision_1_name,"
+ + "subdivision_2_iso_code,subdivision_2_name,city_name,"
+ + "metro_code,time_zone");
+ geoLite2CityLocationsEnLines.add("threetwoonenineone,en,NA,"
+ + "\"North America\",US,\"United States\",CA,California,,,"
+ + "\"Mountain View\",807");
+ this.assertLookupResult(null, geoLite2CityLocationsEnLines, null,
"8.8.8.8", null, null, null, null, null, null, null, null);
}