commit 9748712da484111087b7268e2246829178f2a612 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Jun 19 13:39:22 2013 +0200
Extract GeoIP lookup code and test it. --- src/org/torproject/onionoo/CurrentNodes.java | 360 +------------- src/org/torproject/onionoo/LookupService.java | 396 ++++++++++++++++ src/org/torproject/onionoo/Main.java | 6 +- test/org/torproject/onionoo/LookupServiceTest.java | 501 ++++++++++++++++++++ 4 files changed, 923 insertions(+), 340 deletions(-)
diff --git a/src/org/torproject/onionoo/CurrentNodes.java b/src/org/torproject/onionoo/CurrentNodes.java index 64db6a1..9e27f5b 100644 --- a/src/org/torproject/onionoo/CurrentNodes.java +++ b/src/org/torproject/onionoo/CurrentNodes.java @@ -2,16 +2,11 @@ * See LICENSE for licensing information */ package org.torproject.onionoo;
-import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Scanner; @@ -21,12 +16,12 @@ import java.util.SortedSet; import java.util.TimeZone; import java.util.TreeMap; import java.util.TreeSet; -import java.util.regex.Pattern;
import org.torproject.descriptor.BridgeNetworkStatus; import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.NetworkStatusEntry; import org.torproject.descriptor.RelayNetworkStatusConsensus; +import org.torproject.onionoo.LookupService.LookupResult;
/* Store relays and bridges that have been running in the past seven * days. */ @@ -34,17 +29,20 @@ public class CurrentNodes {
private DescriptorSource descriptorSource;
+ private LookupService lookupService; + private DocumentStore documentStore;
/* Initialize an instance for the back-end that is read-only and doesn't * support parsing new descriptor contents. */ public CurrentNodes(DocumentStore documentStore) { - this(null, documentStore); + this(null, null, documentStore); }
public CurrentNodes(DescriptorSource descriptorSource, - DocumentStore documentStore) { + LookupService lookupService, DocumentStore documentStore) { this.descriptorSource = descriptorSource; + this.lookupService = lookupService; this.documentStore = documentStore; }
@@ -411,345 +409,29 @@ public class CurrentNodes { }
public void lookUpCitiesAndASes() { - - /* Make sure we have all required .csv files. */ - // TODO Make paths configurable or allow passing file contents as - // strings in order to facilitate testing. - // TODO Move look-up code to new LookupService class that is - // initialized with geoip files, receives a sorted set of addresses, - // performs lookups, and returns results to CurrentNodes. - File[] geoLiteCityBlocksCsvFiles = new File[] { - new File("geoip/Manual-GeoLiteCity-Blocks.csv"), - new File("geoip/Automatic-GeoLiteCity-Blocks.csv"), - new File("geoip/GeoLiteCity-Blocks.csv") - }; - File geoLiteCityBlocksCsvFile = null; - for (File file : geoLiteCityBlocksCsvFiles) { - if (file.exists()) { - geoLiteCityBlocksCsvFile = file; - break; - } - } - if (geoLiteCityBlocksCsvFile == null) { - System.err.println("No *GeoLiteCity-Blocks.csv file in geoip/."); - return; - } - File geoLiteCityLocationCsvFile = - new File("geoip/GeoLiteCity-Location.csv"); - if (!geoLiteCityLocationCsvFile.exists()) { - System.err.println("No GeoLiteCity-Location.csv file in geoip/."); - return; - } - File iso3166CsvFile = new File("geoip/iso3166.csv"); - if (!iso3166CsvFile.exists()) { - System.err.println("No iso3166.csv file in geoip/."); - return; - } - File regionCsvFile = new File("geoip/region.csv"); - if (!regionCsvFile.exists()) { - System.err.println("No region.csv file in geoip/."); - return; - } - File geoIPASNum2CsvFile = new File("geoip/GeoIPASNum2.csv"); - if (!geoIPASNum2CsvFile.exists()) { - System.err.println("No GeoIPASNum2.csv file in geoip/."); - return; - } - - /* Obtain a map from relay IP address strings to numbers. */ - Map<String, Long> addressStringNumbers = new HashMap<String, Long>(); - Pattern ipv4Pattern = Pattern.compile("^[0-9\.]{7,15}$"); + SortedSet<String> addressStrings = new TreeSet<String>(); for (Node relay : this.knownRelays.values()) { - String addressString = relay.getAddress(); - long addressNumber = -1L; - if (ipv4Pattern.matcher(addressString).matches()) { - String[] parts = addressString.split("\.", 4); - if (parts.length == 4) { - addressNumber = 0L; - for (int i = 0; i < 4; i++) { - addressNumber *= 256L; - int octetValue = -1; - try { - octetValue = Integer.parseInt(parts[i]); - } catch (NumberFormatException e) { - } - if (octetValue < 0 || octetValue > 255) { - addressNumber = -1L; - break; - } - addressNumber += octetValue; - } - } - } - if (addressNumber >= 0L) { - addressStringNumbers.put(addressString, addressNumber); - } + addressStrings.add(relay.getAddress()); } - if (addressStringNumbers.isEmpty()) { + if (addressStrings.isEmpty()) { System.err.println("No relay IP addresses to resolve to cities or " + "ASN."); return; } - - /* Obtain a map from IP address numbers to blocks. */ - Map<Long, Long> addressNumberBlocks = new HashMap<Long, Long>(); - try { - SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( - addressStringNumbers.values()); - long firstAddressNumber = sortedAddressNumbers.first(); - BufferedReader br = new BufferedReader(new FileReader( - geoLiteCityBlocksCsvFile)); - String line; - long previousStartIpNum = -1L; - while ((line = br.readLine()) != null) { - if (!line.startsWith(""")) { - continue; - } - String[] parts = line.replaceAll(""", "").split(",", 3); - if (parts.length != 3) { - System.err.println("Illegal line '" + line + "' in " - + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - try { - long startIpNum = Long.parseLong(parts[0]); - if (startIpNum <= previousStartIpNum) { - System.err.println("Line '" + line + "' not sorted in " - + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - previousStartIpNum = startIpNum; - while (firstAddressNumber < startIpNum && - firstAddressNumber != -1L) { - sortedAddressNumbers.remove(firstAddressNumber); - if (sortedAddressNumbers.isEmpty()) { - firstAddressNumber = -1L; - } else { - firstAddressNumber = sortedAddressNumbers.first(); - } - } - long endIpNum = Long.parseLong(parts[1]); - while (firstAddressNumber <= endIpNum && - firstAddressNumber != -1L) { - long blockNumber = Long.parseLong(parts[2]); - addressNumberBlocks.put(firstAddressNumber, blockNumber); - sortedAddressNumbers.remove(firstAddressNumber); - if (sortedAddressNumbers.isEmpty()) { - firstAddressNumber = -1L; - } else { - firstAddressNumber = sortedAddressNumbers.first(); - } - } - if (firstAddressNumber == -1L) { - break; - } - } - catch (NumberFormatException e) { - System.err.println("Number format exception while parsing line " - + "'" + line + "' in " - + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - } - br.close(); - } catch (IOException e) { - System.err.println("I/O exception while reading " - + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); - return; - } - - /* Obtain a map from relevant blocks to location lines. */ - Map<Long, String> blockLocations = new HashMap<Long, String>(); - try { - Set<Long> blockNumbers = new HashSet<Long>( - addressNumberBlocks.values()); - BufferedReader br = new BufferedReader(new FileReader( - geoLiteCityLocationCsvFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.startsWith("C") || line.startsWith("l")) { - continue; - } - String[] parts = line.replaceAll(""", "").split(",", 9); - if (parts.length != 9) { - System.err.println("Illegal line '" + line + "' in " - + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - try { - long locId = Long.parseLong(parts[0]); - if (blockNumbers.contains(locId)) { - blockLocations.put(locId, line); - } - } - catch (NumberFormatException e) { - System.err.println("Number format exception while parsing line " - + "'" + line + "' in " - + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - } - br.close(); - } catch (IOException e) { - System.err.println("I/O exception while reading " - + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); - return; - } - - /* Read country names to memory. */ - Map<String, String> countryNames = new HashMap<String, String>(); - try { - BufferedReader br = new BufferedReader(new FileReader( - iso3166CsvFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.replaceAll(""", "").split(",", 2); - if (parts.length != 2) { - System.err.println("Illegal line '" + line + "' in " - + iso3166CsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - countryNames.put(parts[0].toLowerCase(), parts[1]); - } - br.close(); - } catch (IOException e) { - System.err.println("I/O exception while reading " - + iso3166CsvFile.getAbsolutePath() + "."); - return; - } - - /* Read region names to memory. */ - Map<String, String> regionNames = new HashMap<String, String>(); - try { - BufferedReader br = new BufferedReader(new FileReader( - regionCsvFile)); - String line; - while ((line = br.readLine()) != null) { - String[] parts = line.replaceAll(""", "").split(",", 3); - if (parts.length != 3) { - System.err.println("Illegal line '" + line + "' in " - + regionCsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - regionNames.put(parts[0].toLowerCase() + "," - + parts[1].toLowerCase(), parts[2]); - } - br.close(); - } catch (IOException e) { - System.err.println("I/O exception while reading " - + regionCsvFile.getAbsolutePath() + "."); - return; - } - - /* Obtain a map from IP address numbers to ASN. */ - Map<Long, String> addressNumberASN = new HashMap<Long, String>(); - try { - SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( - addressStringNumbers.values()); - long firstAddressNumber = sortedAddressNumbers.first(); - BufferedReader br = new BufferedReader(new FileReader( - geoIPASNum2CsvFile)); - String line; - long previousStartIpNum = -1L; - while ((line = br.readLine()) != null) { - String[] parts = line.replaceAll(""", "").split(",", 3); - if (parts.length != 3) { - System.err.println("Illegal line '" + line + "' in " - + geoIPASNum2CsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - try { - long startIpNum = Long.parseLong(parts[0]); - if (startIpNum <= previousStartIpNum) { - System.err.println("Line '" + line + "' not sorted in " - + geoIPASNum2CsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - previousStartIpNum = startIpNum; - while (firstAddressNumber < startIpNum && - firstAddressNumber != -1L) { - sortedAddressNumbers.remove(firstAddressNumber); - if (sortedAddressNumbers.isEmpty()) { - firstAddressNumber = -1L; - } else { - firstAddressNumber = sortedAddressNumbers.first(); - } - } - long endIpNum = Long.parseLong(parts[1]); - while (firstAddressNumber <= endIpNum && - firstAddressNumber != -1L) { - if (parts[2].startsWith("AS") && - parts[2].split(" ", 2).length == 2) { - addressNumberASN.put(firstAddressNumber, parts[2]); - } - sortedAddressNumbers.remove(firstAddressNumber); - if (sortedAddressNumbers.isEmpty()) { - firstAddressNumber = -1L; - } else { - firstAddressNumber = sortedAddressNumbers.first(); - } - } - if (firstAddressNumber == -1L) { - break; - } - } - catch (NumberFormatException e) { - System.err.println("Number format exception while parsing line " - + "'" + line + "' in " - + geoIPASNum2CsvFile.getAbsolutePath() + "."); - br.close(); - return; - } - } - br.close(); - } catch (IOException e) { - System.err.println("I/O exception while reading " - + geoIPASNum2CsvFile.getAbsolutePath() + "."); - return; - } - - /* Finally, set relays' city and ASN information. */ + SortedMap<String, LookupResult> lookupResults = + this.lookupService.lookup(addressStrings); for (Node relay : knownRelays.values()) { String addressString = relay.getAddress(); - if (addressStringNumbers.containsKey(addressString)) { - long addressNumber = addressStringNumbers.get(addressString); - if (addressNumberBlocks.containsKey(addressNumber)) { - long blockNumber = addressNumberBlocks.get(addressNumber); - if (blockLocations.containsKey(blockNumber)) { - String[] parts = blockLocations.get(blockNumber). - replaceAll(""", "").split(",", -1); - String countryCode = parts[1].toLowerCase(); - relay.setCountryCode(countryCode); - if (countryNames.containsKey(countryCode)) { - relay.setCountryName(countryNames.get(countryCode)); - } - String regionCode = countryCode + "," - + parts[2].toLowerCase(); - if (regionNames.containsKey(regionCode)) { - relay.setRegionName(regionNames.get(regionCode)); - } - if (parts[3].length() > 0) { - relay.setCityName(parts[3]); - } - relay.setLatitude(parts[5]); - relay.setLongitude(parts[6]); - } - } - if (addressNumberASN.containsKey(addressNumber)) { - String[] parts = addressNumberASN.get(addressNumber).split(" ", - 2); - relay.setASNumber(parts[0]); - relay.setASName(parts[1]); - } + if (lookupResults.containsKey(addressString)) { + LookupResult lookupResult = lookupResults.get(addressString); + relay.setCountryCode(lookupResult.countryCode); + relay.setCountryName(lookupResult.countryName); + relay.setRegionName(lookupResult.regionName); + relay.setCityName(lookupResult.cityName); + relay.setLatitude(lookupResult.latitude); + relay.setLongitude(lookupResult.longitude); + relay.setASNumber(lookupResult.aSNumber); + relay.setASName(lookupResult.aSName); } } } diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java new file mode 100644 index 0000000..bf3131e --- /dev/null +++ b/src/org/torproject/onionoo/LookupService.java @@ -0,0 +1,396 @@ +/* Copyright 2013 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.onionoo; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.regex.Pattern; + +public class LookupService { + + File geoipDir; + File geoLiteCityBlocksCsvFile; + File geoLiteCityLocationCsvFile; + File iso3166CsvFile; + File regionCsvFile; + File geoIPASNum2CsvFile; + private boolean hasAllFiles = false; + public LookupService(File geoipDir) { + this.geoipDir = geoipDir; + this.findRequiredCsvFiles(); + } + + /* Make sure we have all required .csv files. */ + private void findRequiredCsvFiles() { + File[] geoLiteCityBlocksCsvFiles = new File[] { + new File(this.geoipDir, "Manual-GeoLiteCity-Blocks.csv"), + new File(this.geoipDir, "Automatic-GeoLiteCity-Blocks.csv"), + new File(this.geoipDir, "GeoLiteCity-Blocks.csv") }; + for (File file : geoLiteCityBlocksCsvFiles) { + if (file.exists()) { + this.geoLiteCityBlocksCsvFile = file; + break; + } + } + if (this.geoLiteCityBlocksCsvFile == null) { + System.err.println("No *GeoLiteCity-Blocks.csv file in geoip/."); + return; + } + this.geoLiteCityLocationCsvFile = new File(this.geoipDir, + "GeoLiteCity-Location.csv"); + if (!this.geoLiteCityLocationCsvFile.exists()) { + System.err.println("No GeoLiteCity-Location.csv file in geoip/."); + return; + } + this.iso3166CsvFile = new File(this.geoipDir, "iso3166.csv"); + if (!this.iso3166CsvFile.exists()) { + System.err.println("No iso3166.csv file in geoip/."); + return; + } + this.regionCsvFile = new File(this.geoipDir, "region.csv"); + if (!this.regionCsvFile.exists()) { + System.err.println("No region.csv file in geoip/."); + return; + } + this.geoIPASNum2CsvFile = new File(this.geoipDir, "GeoIPASNum2.csv"); + if (!this.geoIPASNum2CsvFile.exists()) { + System.err.println("No GeoIPASNum2.csv file in geoip/."); + return; + } + this.hasAllFiles = true; + } + + public SortedMap<String, LookupResult> lookup( + SortedSet<String> addressStrings) { + + SortedMap<String, LookupResult> lookupResults = + new TreeMap<String, LookupResult>(); + + if (!this.hasAllFiles) { + return lookupResults; + } + + /* Obtain a map from relay IP address strings to numbers. */ + Map<String, Long> addressStringNumbers = new HashMap<String, Long>(); + Pattern ipv4Pattern = Pattern.compile("^[0-9\.]{7,15}$"); + for (String addressString : addressStrings) { + long addressNumber = -1L; + if (ipv4Pattern.matcher(addressString).matches()) { + String[] parts = addressString.split("\.", 4); + if (parts.length == 4) { + addressNumber = 0L; + for (int i = 0; i < 4; i++) { + addressNumber *= 256L; + int octetValue = -1; + try { + octetValue = Integer.parseInt(parts[i]); + } catch (NumberFormatException e) { + } + if (octetValue < 0 || octetValue > 255) { + addressNumber = -1L; + break; + } + addressNumber += octetValue; + } + } + } + if (addressNumber >= 0L) { + addressStringNumbers.put(addressString, addressNumber); + } + } + if (addressStringNumbers.isEmpty()) { + return lookupResults; + } + + /* Obtain a map from IP address numbers to blocks. */ + Map<Long, Long> addressNumberBlocks = new HashMap<Long, Long>(); + try { + SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( + addressStringNumbers.values()); + long firstAddressNumber = sortedAddressNumbers.first(); + BufferedReader br = new BufferedReader(new FileReader( + geoLiteCityBlocksCsvFile)); + String line; + long previousStartIpNum = -1L; + while ((line = br.readLine()) != null) { + if (!line.startsWith(""")) { + continue; + } + String[] parts = line.replaceAll(""", "").split(",", 3); + if (parts.length != 3) { + System.err.println("Illegal line '" + line + "' in " + + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + try { + long startIpNum = Long.parseLong(parts[0]); + if (startIpNum <= previousStartIpNum) { + System.err.println("Line '" + line + "' not sorted in " + + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + previousStartIpNum = startIpNum; + while (firstAddressNumber < startIpNum && + firstAddressNumber != -1L) { + sortedAddressNumbers.remove(firstAddressNumber); + if (sortedAddressNumbers.isEmpty()) { + firstAddressNumber = -1L; + } else { + firstAddressNumber = sortedAddressNumbers.first(); + } + } + long endIpNum = Long.parseLong(parts[1]); + while (firstAddressNumber <= endIpNum && + firstAddressNumber != -1L) { + long blockNumber = Long.parseLong(parts[2]); + addressNumberBlocks.put(firstAddressNumber, blockNumber); + sortedAddressNumbers.remove(firstAddressNumber); + if (sortedAddressNumbers.isEmpty()) { + firstAddressNumber = -1L; + } else { + firstAddressNumber = sortedAddressNumbers.first(); + } + } + if (firstAddressNumber == -1L) { + break; + } + } + catch (NumberFormatException e) { + System.err.println("Number format exception while parsing line " + + "'" + line + "' in " + + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + } + br.close(); + } catch (IOException e) { + System.err.println("I/O exception while reading " + + geoLiteCityBlocksCsvFile.getAbsolutePath() + "."); + return lookupResults; + } + + /* Obtain a map from relevant blocks to location lines. */ + Map<Long, String> blockLocations = new HashMap<Long, String>(); + try { + Set<Long> blockNumbers = new HashSet<Long>( + addressNumberBlocks.values()); + BufferedReader br = new BufferedReader(new FileReader( + geoLiteCityLocationCsvFile)); + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("C") || line.startsWith("l")) { + continue; + } + String[] parts = line.replaceAll(""", "").split(",", 9); + if (parts.length != 9) { + System.err.println("Illegal line '" + line + "' in " + + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + try { + long locId = Long.parseLong(parts[0]); + if (blockNumbers.contains(locId)) { + blockLocations.put(locId, line); + } + } + catch (NumberFormatException e) { + System.err.println("Number format exception while parsing line " + + "'" + line + "' in " + + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + } + br.close(); + } catch (IOException e) { + System.err.println("I/O exception while reading " + + geoLiteCityLocationCsvFile.getAbsolutePath() + "."); + return lookupResults; + } + + /* Read country names to memory. */ + Map<String, String> countryNames = new HashMap<String, String>(); + try { + BufferedReader br = new BufferedReader(new FileReader( + iso3166CsvFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.replaceAll(""", "").split(",", 2); + if (parts.length != 2) { + System.err.println("Illegal line '" + line + "' in " + + iso3166CsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + countryNames.put(parts[0].toLowerCase(), parts[1]); + } + br.close(); + } catch (IOException e) { + System.err.println("I/O exception while reading " + + iso3166CsvFile.getAbsolutePath() + "."); + return lookupResults; + } + + /* Read region names to memory. */ + Map<String, String> regionNames = new HashMap<String, String>(); + try { + BufferedReader br = new BufferedReader(new FileReader( + regionCsvFile)); + String line; + while ((line = br.readLine()) != null) { + String[] parts = line.replaceAll(""", "").split(",", 3); + if (parts.length != 3) { + System.err.println("Illegal line '" + line + "' in " + + regionCsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + regionNames.put(parts[0].toLowerCase() + "," + + parts[1].toLowerCase(), parts[2]); + } + br.close(); + } catch (IOException e) { + System.err.println("I/O exception while reading " + + regionCsvFile.getAbsolutePath() + "."); + return lookupResults; + } + + /* Obtain a map from IP address numbers to ASN. */ + Map<Long, String> addressNumberASN = new HashMap<Long, String>(); + try { + SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( + addressStringNumbers.values()); + long firstAddressNumber = sortedAddressNumbers.first(); + BufferedReader br = new BufferedReader(new FileReader( + geoIPASNum2CsvFile)); + String line; + long previousStartIpNum = -1L; + while ((line = br.readLine()) != null) { + String[] parts = line.replaceAll(""", "").split(",", 3); + if (parts.length != 3) { + System.err.println("Illegal line '" + line + "' in " + + geoIPASNum2CsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + try { + long startIpNum = Long.parseLong(parts[0]); + if (startIpNum <= previousStartIpNum) { + System.err.println("Line '" + line + "' not sorted in " + + geoIPASNum2CsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + previousStartIpNum = startIpNum; + while (firstAddressNumber < startIpNum && + firstAddressNumber != -1L) { + sortedAddressNumbers.remove(firstAddressNumber); + if (sortedAddressNumbers.isEmpty()) { + firstAddressNumber = -1L; + } else { + firstAddressNumber = sortedAddressNumbers.first(); + } + } + long endIpNum = Long.parseLong(parts[1]); + while (firstAddressNumber <= endIpNum && + firstAddressNumber != -1L) { + if (parts[2].startsWith("AS") && + parts[2].split(" ", 2).length == 2) { + addressNumberASN.put(firstAddressNumber, parts[2]); + } + sortedAddressNumbers.remove(firstAddressNumber); + if (sortedAddressNumbers.isEmpty()) { + firstAddressNumber = -1L; + } else { + firstAddressNumber = sortedAddressNumbers.first(); + } + } + if (firstAddressNumber == -1L) { + break; + } + } + catch (NumberFormatException e) { + System.err.println("Number format exception while parsing line " + + "'" + line + "' in " + + geoIPASNum2CsvFile.getAbsolutePath() + "."); + br.close(); + return lookupResults; + } + } + br.close(); + } catch (IOException e) { + System.err.println("I/O exception while reading " + + geoIPASNum2CsvFile.getAbsolutePath() + "."); + return lookupResults; + } + + /* Finally, put together lookup results. */ + for (String addressString : addressStrings) { + if (!addressStringNumbers.containsKey(addressString)) { + continue; + } + long addressNumber = addressStringNumbers.get(addressString); + if (!addressNumberBlocks.containsKey(addressNumber) && + !addressNumberASN.containsKey(addressNumber)) { + continue; + } + LookupResult lookupResult = new LookupResult(); + if (addressNumberBlocks.containsKey(addressNumber)) { + long blockNumber = addressNumberBlocks.get(addressNumber); + if (blockLocations.containsKey(blockNumber)) { + String[] parts = blockLocations.get(blockNumber). + replaceAll(""", "").split(",", -1); + String countryCode = parts[1].toLowerCase(); + lookupResult.countryCode = countryCode; + if (countryNames.containsKey(countryCode)) { + lookupResult.countryName = countryNames.get(countryCode); + } + String regionCode = countryCode + "," + parts[2].toLowerCase(); + if (regionNames.containsKey(regionCode)) { + lookupResult.regionName = regionNames.get(regionCode); + } + if (parts[3].length() > 0) { + lookupResult.cityName = parts[3]; + } + lookupResult.latitude = parts[5]; + lookupResult.longitude = parts[6]; + } + } + if (addressNumberASN.containsKey(addressNumber)) { + String[] parts = addressNumberASN.get(addressNumber).split(" ", + 2); + lookupResult.aSNumber = parts[0]; + lookupResult.aSName = parts[1]; + } + lookupResults.put(addressString, lookupResult); + } + + return lookupResults; + } + + class LookupResult { + String countryCode; + String countryName; + String regionName; + String cityName; + String latitude; + String longitude; + String aSNumber; + String aSName; + } +} diff --git a/src/org/torproject/onionoo/Main.java b/src/org/torproject/onionoo/Main.java index f636b5e..3086054 100644 --- a/src/org/torproject/onionoo/Main.java +++ b/src/org/torproject/onionoo/Main.java @@ -19,8 +19,12 @@ public class Main { new File("out")); printStatusTime("Initialized document store");
+ printStatus("Initializing lookup service."); + LookupService ls = new LookupService(new File("geoip")); + printStatusTime("Initialized Geoip lookup service"); + printStatus("Updating internal node list."); - CurrentNodes cn = new CurrentNodes(dso, ds); + CurrentNodes cn = new CurrentNodes(dso, ls, ds); cn.readStatusSummary(); printStatusTime("Read status summary"); cn.readRelayNetworkConsensuses(); diff --git a/test/org/torproject/onionoo/LookupServiceTest.java b/test/org/torproject/onionoo/LookupServiceTest.java new file mode 100644 index 0000000..42df61f --- /dev/null +++ b/test/org/torproject/onionoo/LookupServiceTest.java @@ -0,0 +1,501 @@ +/* Copyright 2013 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.onionoo; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.torproject.onionoo.LookupService.LookupResult; + +public class LookupServiceTest { + + private static class LookupServiceTestHelper { + + private File tempGeoipDir; + + private List<String> manualGeoLiteCityBlocksLines, + automaticGeoLiteCityBlocksLines, geoLiteCityBlocksLines, + geoLiteCityLocationLines, iso3166Lines, regionLines, + geoipASNum2Lines; + + private LookupService lookupService; + + private SortedSet<String> addressStrings; + + private SortedMap<String, LookupResult> lookupResults; + + private LookupServiceTestHelper(File tempGeoipDir, + SortedSet<String> addressStrings) { + this.tempGeoipDir = tempGeoipDir; + this.addressStrings = addressStrings; + } + + private void populateLines() { + this.manualGeoLiteCityBlocksLines = new ArrayList<String>(); + this.manualGeoLiteCityBlocksLines.add( + "Copyright (c) 2011 MaxMind Inc. All Rights Reserved."); + this.manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + this.manualGeoLiteCityBlocksLines.add(""134739200","134744063"," + + ""223""); + this.manualGeoLiteCityBlocksLines.add(""134744064","134744319"," + + ""32191""); + this.manualGeoLiteCityBlocksLines.add(""134744320","134751743"," + + ""223""); + this.geoLiteCityLocationLines = new ArrayList<String>(); + this.geoLiteCityLocationLines.add("Copyright (c) 2012 MaxMind " + + "LLC. All Rights Reserved."); + this.geoLiteCityLocationLines.add("locId,country,region,city," + + "postalCode,latitude,longitude,metroCode,areaCode"); + this.geoLiteCityLocationLines.add("223,"US","","",""," + + "38.0000,-97.0000,,"); + this.geoLiteCityLocationLines.add("32191,"US","CA"," + + ""Mountain View","",37.3860,-122.0838,807,650"); + this.iso3166Lines = new ArrayList<String>(); + this.iso3166Lines.add("US,"United States""); + this.regionLines = new ArrayList<String>(); + this.regionLines.add("US,CA,"California""); + this.geoipASNum2Lines = new ArrayList<String>(); + this.geoipASNum2Lines.add("134743296,134744063,"AS3356 Level 3 " + + "Communications""); + this.geoipASNum2Lines.add("134744064,134744319,"AS15169 Google " + + "Inc.""); + this.geoipASNum2Lines.add("134744320,134750463,"AS3356 Level 3 " + + "Communications""); + } + + private void writeCsvFiles() { + try { + this.writeCsvFile(this.manualGeoLiteCityBlocksLines, + "Manual-GeoLiteCity-Blocks.csv"); + this.writeCsvFile(this.automaticGeoLiteCityBlocksLines, + "Automatic-GeoLiteCity-Blocks.csv"); + this.writeCsvFile(this.geoLiteCityBlocksLines, + "GeoLiteCity-Blocks.csv"); + this.writeCsvFile(this.geoLiteCityLocationLines, + "GeoLiteCity-Location.csv"); + this.writeCsvFile(this.iso3166Lines, "iso3166.csv"); + this.writeCsvFile(this.regionLines, "region.csv"); + this.writeCsvFile(this.geoipASNum2Lines, "GeoIPASNum2.csv"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void writeCsvFile(List<String> lines, String fileName) + throws IOException { + if (lines != null && !lines.isEmpty()) { + BufferedWriter bw = new BufferedWriter(new FileWriter( + new File(this.tempGeoipDir, fileName))); + for (String line : lines) { + bw.write(line + "\n"); + } + bw.close(); + } + } + + private void performLookups() { + this.lookupService = new LookupService(this.tempGeoipDir); + this.lookupResults = this.lookupService.lookup(this.addressStrings); + } + + private static void assertLookupResult(File tempGeoipDir, + List<String> manualGeoLiteCityBlocksLines, + List<String> automaticGeoLiteCityBlocksLines, + List<String> geoLiteCityBlocksLines, + List<String> geoLiteCityLocationLines, List<String> iso3166Lines, + List<String> regionLines, List<String> geoipASNum2Lines, + String addressString, String countryCode, String countryName, + String regionName, String cityName, String latitude, + String longitude, String aSNumber, String aSName) { + SortedSet<String> addressStrings = new TreeSet<String>(); + addressStrings.add(addressString); + LookupServiceTestHelper helper = new LookupServiceTestHelper( + tempGeoipDir, addressStrings); + helper.populateLines(); + if (manualGeoLiteCityBlocksLines != null) { + helper.manualGeoLiteCityBlocksLines = + manualGeoLiteCityBlocksLines; + } + if (automaticGeoLiteCityBlocksLines != null) { + helper.automaticGeoLiteCityBlocksLines = + automaticGeoLiteCityBlocksLines; + } + if (geoLiteCityBlocksLines != null) { + helper.geoLiteCityBlocksLines = geoLiteCityBlocksLines; + } + if (geoLiteCityLocationLines != null) { + helper.geoLiteCityLocationLines = geoLiteCityLocationLines; + } + if (iso3166Lines != null) { + helper.iso3166Lines = iso3166Lines; + } + if (regionLines != null) { + helper.regionLines = regionLines; + } + if (geoipASNum2Lines != null) { + helper.geoipASNum2Lines = geoipASNum2Lines; + } + helper.writeCsvFiles(); + /* Disable log messages printed to System.err. */ + System.setErr(new PrintStream(new OutputStream() { + public void write(int b) { + } + })); + helper.performLookups(); + if (countryCode == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).countryCode == null); + } else { + assertEquals(countryCode, + helper.lookupResults.get(addressString).countryCode); + } + if (countryName == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).countryName == null); + } else { + assertEquals(countryName, + helper.lookupResults.get(addressString).countryName); + } + if (regionName == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).regionName == null); + } else { + assertEquals(regionName, + helper.lookupResults.get(addressString).regionName); + } + if (cityName == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).cityName == null); + } else { + assertEquals(cityName, + helper.lookupResults.get(addressString).cityName); + } + if (latitude == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).latitude == null); + } else { + assertEquals(latitude, + helper.lookupResults.get(addressString).latitude); + } + if (longitude == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).longitude == null); + } else { + assertEquals(longitude, + helper.lookupResults.get(addressString).longitude); + } + if (aSNumber == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).aSNumber == null); + } else { + assertEquals(aSNumber, + helper.lookupResults.get(addressString).aSNumber); + } + if (aSName == null) { + assertTrue(!helper.lookupResults.containsKey(addressString) || + helper.lookupResults.get(addressString).aSName == null); + } else { + assertEquals(aSName, + helper.lookupResults.get(addressString).aSName); + } + } + } + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + private File tempGeoipDir; + + @Before + public void createTempGeoipDir() throws IOException { + this.tempGeoipDir = this.tempFolder.newFolder("geoip"); + } + + @Test() + public void testLookup8888() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "8.8.8.8", "us", + "United States", "California", "Mountain View", "37.3860", + "-122.0838", "AS15169", "Google Inc."); + } + + @Test() + public void testLookup8880() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "8.8.8.0", "us", + "United States", "California", "Mountain View", "37.3860", + "-122.0838", "AS15169", "Google Inc."); + } + + @Test() + public void testLookup888255() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "8.8.8.255", "us", + "United States", "California", "Mountain View", "37.3860", + "-122.0838", "AS15169", "Google Inc."); + } + + @Test() + public void testLookup888256() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "8.8.8.256", null, null, null, + null, null, null, null, null); + } + + @Test() + public void testLookup888Minus1() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "8.8.8.-1", null, null, null, + null, null, null, null, null); + } + + @Test() + public void testLookup000() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, null, "0.0.0.0", null, null, null, + null, null, null, null, null); + } + + @Test() + public void testLookupNoBlocksLines() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + new ArrayList<String>(), null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupNoLocationLines() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, new ArrayList<String>(), null, null, null, "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupNoIso3166Lines() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, new ArrayList<String>(), null, null, "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupNoRegionLines() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, new ArrayList<String>(), null, "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupNoGeoipASNum2Lines() { + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, new ArrayList<String>(), "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupNoCorrespondingLocation() { + List<String> geoLiteCityLocationLines = new ArrayList<String>(); + geoLiteCityLocationLines.add("Copyright (c) 2012 MaxMind LLC. All " + + "Rights Reserved."); + geoLiteCityLocationLines.add("locId,country,region,city,postalCode," + + "latitude,longitude,metroCode,areaCode"); + geoLiteCityLocationLines.add("223,"US","","","",38.0000," + + "-97.0000,,"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, geoLiteCityLocationLines, null, null, null, "8.8.8.8", + null, null, null, null, null, null, "AS15169", "Google Inc."); + } + + @Test() + public void testLookupNoCorrespondingCountryName() { + List<String> iso3166Lines = new ArrayList<String>(); + iso3166Lines.add("UY,"Uruguay""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, iso3166Lines, null, null, "8.8.8.8", "us", + null, "California", "Mountain View", "37.3860", "-122.0838", + "AS15169", "Google Inc."); + } + + @Test() + public void testLookupNoCorrespondingRegionName() { + List<String> regionLines = new ArrayList<String>(); + regionLines.add("US,CO,"Colorado""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, regionLines, null, "8.8.8.8", "us", + "United States", null, "Mountain View", "37.3860", "-122.0838", + "AS15169", "Google Inc."); + } + + @Test() + public void testLookupBlocksEndBeforeStart() { + List<String> manualGeoLiteCityBlocksLines = new ArrayList<String>(); + manualGeoLiteCityBlocksLines.add("Copyright (c) 2011 MaxMind Inc. " + + "All Rights Reserved."); + manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + manualGeoLiteCityBlocksLines.add(""134739200","134744063"," + + ""223""); + manualGeoLiteCityBlocksLines.add(""134744319","134744064"," + + ""32191""); + manualGeoLiteCityBlocksLines.add(""134744320","134751743"," + + ""223""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + manualGeoLiteCityBlocksLines, null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, "AS15169", + "Google Inc."); + } + + @Test() + public void testLookupBlocksStartNotANumber() { + List<String> manualGeoLiteCityBlocksLines = new ArrayList<String>(); + manualGeoLiteCityBlocksLines.add("Copyright (c) 2011 MaxMind Inc. " + + "All Rights Reserved."); + manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + manualGeoLiteCityBlocksLines.add(""one","134744319"," + + ""32191""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + manualGeoLiteCityBlocksLines, null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupBlocksStartTooLarge() { + List<String> manualGeoLiteCityBlocksLines = new ArrayList<String>(); + manualGeoLiteCityBlocksLines.add("Copyright (c) 2011 MaxMind Inc. " + + "All Rights Reserved."); + manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + manualGeoLiteCityBlocksLines.add(""1" + + String.valueOf(Long.MAX_VALUE) + "","134744319","32191""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + manualGeoLiteCityBlocksLines, null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupBlocksLocationX() { + List<String> manualGeoLiteCityBlocksLines = new ArrayList<String>(); + manualGeoLiteCityBlocksLines.add("Copyright (c) 2011 MaxMind Inc. " + + "All Rights Reserved."); + manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + manualGeoLiteCityBlocksLines.add(""134744064","134744319","X""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + manualGeoLiteCityBlocksLines, null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupBlocksTooFewFields() { + List<String> manualGeoLiteCityBlocksLines = new ArrayList<String>(); + manualGeoLiteCityBlocksLines.add("Copyright (c) 2011 MaxMind Inc. " + + "All Rights Reserved."); + manualGeoLiteCityBlocksLines.add("startIpNum,endIpNum,locId"); + manualGeoLiteCityBlocksLines.add(""134744064","134744319""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, + manualGeoLiteCityBlocksLines, null, null, null, null, null, null, + "8.8.8.8", null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupLocationLocIdNotANumber() { + List<String> geoLiteCityLocationLines = new ArrayList<String>(); + geoLiteCityLocationLines.add("Copyright (c) 2012 MaxMind LLC. All " + + "Rights Reserved."); + geoLiteCityLocationLines.add("locId,country,region,city,postalCode," + + "latitude,longitude,metroCode,areaCode"); + geoLiteCityLocationLines.add("threetwoonenineone,"US","CA"," + + ""Mountain View","",37.3860,-122.0838,807,650"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, geoLiteCityLocationLines, null, null, null, "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupLocationTooFewFields() { + List<String> geoLiteCityLocationLines = new ArrayList<String>(); + geoLiteCityLocationLines.add("Copyright (c) 2012 MaxMind LLC. All " + + "Rights Reserved."); + geoLiteCityLocationLines.add("locId,country,region,city,postalCode," + + "latitude,longitude,metroCode,areaCode"); + geoLiteCityLocationLines.add("32191,"US","CA","Mountain View"," + + """,37.3860,-122.0838,807"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, geoLiteCityLocationLines, null, null, null, "8.8.8.8", + null, null, null, null, null, null, null, null); + } + + @Test() + public void testLookupIso3166TooFewFields() { + List<String> iso3166Lines = new ArrayList<String>(); + iso3166Lines.add("US"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, iso3166Lines, null, null, "8.8.8.8", null, null, + null, null, null, null, null, null); + } + + @Test() + public void testLookupRegionTooFewFields() { + List<String> regionLines = new ArrayList<String>(); + regionLines.add("US,CA"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, regionLines, null, "8.8.8.8", null, null, + null, null, null, null, null, null); + } + + @Test() + public void testLookupGeoipASNum2EndBeforeStart() { + List<String> geoipASNum2Lines = new ArrayList<String>(); + geoipASNum2Lines.add("134743296,134744063,"AS3356 Level 3 " + + "Communications""); + geoipASNum2Lines.add("134744319,134744064,"AS15169 Google Inc.""); + geoipASNum2Lines.add("134744320,134750463,"AS3356 Level 3 " + + "Communications""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, geoipASNum2Lines, "8.8.8.8", "us", + "United States", "California", "Mountain View", "37.3860", + "-122.0838", null, null); + } + + @Test() + public void testLookupGeoipASNum2StartNotANumber() { + List<String> geoipASNum2Lines = new ArrayList<String>(); + geoipASNum2Lines.add("one,134744319,"AS15169 Google Inc.""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, geoipASNum2Lines, "8.8.8.8", null, + null, null, null, null, null, null, null); + } + + @Test() + public void testLookupGeoipASNum2StartTooLarge() { + List<String> geoipASNum2Lines = new ArrayList<String>(); + geoipASNum2Lines.add("1" + String.valueOf(Long.MAX_VALUE) + + ",134744319,"AS15169 Google Inc.""); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, geoipASNum2Lines, "8.8.8.8", null, + null, null, null, null, null, null, null); + } + + @Test() + public void testLookupGeoipASNum2TooFewFields() { + List<String> geoipASNum2Lines = new ArrayList<String>(); + geoipASNum2Lines.add("134744064,134744319"); + LookupServiceTestHelper.assertLookupResult(this.tempGeoipDir, null, + null, null, null, null, null, geoipASNum2Lines, "8.8.8.8", null, + null, null, null, null, null, null, null); + } +} +
tor-commits@lists.torproject.org