[tor-commits] [onionoo/master] MaxMind's GeoIP files use ISO-8859-1, not UTF-8.

karsten at torproject.org karsten at torproject.org
Wed Aug 14 18:23:28 UTC 2013


commit cbba7ec70bf5cd6896cab57088164d4d90977e71
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Aug 14 18:44:28 2013 +0200

    MaxMind's GeoIP files use ISO-8859-1, not UTF-8.
    
    Without specifying a file encoding, Java uses the system default, which
    may be UTF-8 on Linux systems.  But MaxMind's files all use ISO-8859-1.
    The effect is that our details files may contain incorrect lines like:
    
      "as_name":"Servi\uFFFDos de Comunica\uFFFD\uFFFDo S.A.",
    
    which should be:
    
      "as_name":"Servi\u00C1os de Comunica\u00C1\u201Eo S.A.",
---
 src/org/torproject/onionoo/LookupService.java |   23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/org/torproject/onionoo/LookupService.java b/src/org/torproject/onionoo/LookupService.java
index 3791443..6fc2bf6 100644
--- a/src/org/torproject/onionoo/LookupService.java
+++ b/src/org/torproject/onionoo/LookupService.java
@@ -4,8 +4,9 @@ package org.torproject.onionoo;
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -118,8 +119,8 @@ public class LookupService {
       SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
           addressStringNumbers.values());
       long firstAddressNumber = sortedAddressNumbers.first();
-      BufferedReader br = new BufferedReader(new FileReader(
-          geoLiteCityBlocksCsvFile));
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(geoLiteCityBlocksCsvFile), "ISO-8859-1"));
       String line;
       long previousStartIpNum = -1L;
       while ((line = br.readLine()) != null) {
@@ -187,8 +188,8 @@ public class LookupService {
     try {
       Set<Long> blockNumbers = new HashSet<Long>(
           addressNumberBlocks.values());
-      BufferedReader br = new BufferedReader(new FileReader(
-          geoLiteCityLocationCsvFile));
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(geoLiteCityLocationCsvFile), "ISO-8859-1"));
       String line;
       while ((line = br.readLine()) != null) {
         if (line.startsWith("C") || line.startsWith("l")) {
@@ -225,8 +226,8 @@ public class LookupService {
     /* Read country names to memory. */
     Map<String, String> countryNames = new HashMap<String, String>();
     try {
-      BufferedReader br = new BufferedReader(new FileReader(
-          iso3166CsvFile));
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(iso3166CsvFile), "ISO-8859-1"));
       String line;
       while ((line = br.readLine()) != null) {
         String[] parts = line.replaceAll("\"", "").split(",", 2);
@@ -248,8 +249,8 @@ public class LookupService {
     /* Read region names to memory. */
     Map<String, String> regionNames = new HashMap<String, String>();
     try {
-      BufferedReader br = new BufferedReader(new FileReader(
-          regionCsvFile));
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(regionCsvFile), "ISO-8859-1"));
       String line;
       while ((line = br.readLine()) != null) {
         String[] parts = line.replaceAll("\"", "").split(",", 3);
@@ -275,8 +276,8 @@ public class LookupService {
       SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
           addressStringNumbers.values());
       long firstAddressNumber = sortedAddressNumbers.first();
-      BufferedReader br = new BufferedReader(new FileReader(
-          geoIPASNum2CsvFile));
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(geoIPASNum2CsvFile), "ISO-8859-1"));
       String line;
       long previousStartIpNum = -1L;
       while ((line = br.readLine()) != null) {



More information about the tor-commits mailing list