commit cbba7ec70bf5cd6896cab57088164d4d90977e71
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Aug 14 18:44:28 2013 +0200
MaxMind's GeoIP files use ISO-8859-1, not UTF-8.
Without specifying a file encoding, Java uses the system default, which
may be UTF-8 on Linux systems. But MaxMind's files all use ISO-8859-1.
The effect is that our details files may contain incorrect lines like:
"as_name":"Servi\uFFFDos de Comunica\uFFFD\uFFFDo S.A.",
which should be:
"as_name":"Servi\u00C1os de Comunica\u00C1\u201Eo S.A.",
---
src/org/torproject/onionoo/LookupService.java | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java
index 3791443..6fc2bf6 100644
--- a/src/org/torproject/onionoo/LookupService.java
+++ b/src/org/torproject/onionoo/LookupService.java
@@ -4,8 +4,9 @@ package org.torproject.onionoo;
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -118,8 +119,8 @@ public class LookupService {
SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
addressStringNumbers.values());
long firstAddressNumber = sortedAddressNumbers.first();
- BufferedReader br = new BufferedReader(new FileReader(
- geoLiteCityBlocksCsvFile));
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new FileInputStream(geoLiteCityBlocksCsvFile), "ISO-8859-1"));
String line;
long previousStartIpNum = -1L;
while ((line = br.readLine()) != null) {
@@ -187,8 +188,8 @@ public class LookupService {
try {
Set<Long> blockNumbers = new HashSet<Long>(
addressNumberBlocks.values());
- BufferedReader br = new BufferedReader(new FileReader(
- geoLiteCityLocationCsvFile));
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new FileInputStream(geoLiteCityLocationCsvFile), "ISO-8859-1"));
String line;
while ((line = br.readLine()) != null) {
if (line.startsWith("C") || line.startsWith("l")) {
@@ -225,8 +226,8 @@ public class LookupService {
/* Read country names to memory. */
Map<String, String> countryNames = new HashMap<String, String>();
try {
- BufferedReader br = new BufferedReader(new FileReader(
- iso3166CsvFile));
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new FileInputStream(iso3166CsvFile), "ISO-8859-1"));
String line;
while ((line = br.readLine()) != null) {
String[] parts = line.replaceAll("\"", "").split(",", 2);
@@ -248,8 +249,8 @@ public class LookupService {
/* Read region names to memory. */
Map<String, String> regionNames = new HashMap<String, String>();
try {
- BufferedReader br = new BufferedReader(new FileReader(
- regionCsvFile));
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new FileInputStream(regionCsvFile), "ISO-8859-1"));
String line;
while ((line = br.readLine()) != null) {
String[] parts = line.replaceAll("\"", "").split(",", 3);
@@ -275,8 +276,8 @@ public class LookupService {
SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
addressStringNumbers.values());
long firstAddressNumber = sortedAddressNumbers.first();
- BufferedReader br = new BufferedReader(new FileReader(
- geoIPASNum2CsvFile));
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ new FileInputStream(geoIPASNum2CsvFile), "ISO-8859-1"));
String line;
long previousStartIpNum = -1L;
while ((line = br.readLine()) != null) {