
commit cbba7ec70bf5cd6896cab57088164d4d90977e71 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Wed Aug 14 18:44:28 2013 +0200 MaxMind's GeoIP files use ISO-8859-1, not UTF-8. Without specifying a file encoding, Java uses the system default, which may be UTF-8 on Linux systems. But MaxMind's files all use ISO-8859-1. The effect is that our details files may contain incorrect lines like: "as_name":"Servi\uFFFDos de Comunica\uFFFD\uFFFDo S.A.", which should be: "as_name":"Servi\u00C1os de Comunica\u00C1\u201Eo S.A.", --- src/org/torproject/onionoo/LookupService.java | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java index 3791443..6fc2bf6 100644 --- a/src/org/torproject/onionoo/LookupService.java +++ b/src/org/torproject/onionoo/LookupService.java @@ -4,8 +4,9 @@ package org.torproject.onionoo; import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -118,8 +119,8 @@ public class LookupService { SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( addressStringNumbers.values()); long firstAddressNumber = sortedAddressNumbers.first(); - BufferedReader br = new BufferedReader(new FileReader( - geoLiteCityBlocksCsvFile)); + BufferedReader br = new BufferedReader(new InputStreamReader( + new FileInputStream(geoLiteCityBlocksCsvFile), "ISO-8859-1")); String line; long previousStartIpNum = -1L; while ((line = br.readLine()) != null) { @@ -187,8 +188,8 @@ public class LookupService { try { Set<Long> blockNumbers = new HashSet<Long>( addressNumberBlocks.values()); - BufferedReader br = new BufferedReader(new FileReader( - geoLiteCityLocationCsvFile)); + BufferedReader br = new BufferedReader(new InputStreamReader( + new FileInputStream(geoLiteCityLocationCsvFile), "ISO-8859-1")); String line; while ((line = br.readLine()) != null) { if (line.startsWith("C") || line.startsWith("l")) { @@ -225,8 +226,8 @@ public class LookupService { /* Read country names to memory. */ Map<String, String> countryNames = new HashMap<String, String>(); try { - BufferedReader br = new BufferedReader(new FileReader( - iso3166CsvFile)); + BufferedReader br = new BufferedReader(new InputStreamReader( + new FileInputStream(iso3166CsvFile), "ISO-8859-1")); String line; while ((line = br.readLine()) != null) { String[] parts = line.replaceAll("\"", "").split(",", 2); @@ -248,8 +249,8 @@ public class LookupService { /* Read region names to memory. */ Map<String, String> regionNames = new HashMap<String, String>(); try { - BufferedReader br = new BufferedReader(new FileReader( - regionCsvFile)); + BufferedReader br = new BufferedReader(new InputStreamReader( + new FileInputStream(regionCsvFile), "ISO-8859-1")); String line; while ((line = br.readLine()) != null) { String[] parts = line.replaceAll("\"", "").split(",", 3); @@ -275,8 +276,8 @@ public class LookupService { SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>( addressStringNumbers.values()); long firstAddressNumber = sortedAddressNumbers.first(); - BufferedReader br = new BufferedReader(new FileReader( - geoIPASNum2CsvFile)); + BufferedReader br = new BufferedReader(new InputStreamReader( + new FileInputStream(geoIPASNum2CsvFile), "ISO-8859-1")); String line; long previousStartIpNum = -1L; while ((line = br.readLine()) != null) {