[tor-commits] [metrics-lib/master] Always use UTF-8 as charset.

karsten at torproject.org karsten at torproject.org
Fri Jun 23 08:38:22 UTC 2017


commit 2f6c9baa68466b50ac869a68daa3dfd4334f57b4
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Jun 22 15:15:36 2017 +0200

    Always use UTF-8 as charset.
    
    Implements #21932.
---
 CHANGELOG.md                                               |  4 ++++
 .../org/torproject/descriptor/impl/DescriptorImpl.java     | 14 +++++++-------
 .../org/torproject/descriptor/impl/TorperfResultImpl.java  |  9 +++++----
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fd39b96..394d4eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changes in version 2.0.0 - 2017-06-??
 
+ * Major changes
+   - Always use UTF-8 as charset rather than using the platform's
+     default charset.
+
  * Minor changes
    - Replace custom ImplementationNotAccessibleException thrown by
      DescriptorSourceFactory with generic RuntimeException.
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
index e0f94f0..acbee50 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
 
 import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
@@ -96,8 +97,8 @@ public abstract class DescriptorImpl implements Descriptor {
   }
 
   /**
-   * Returns a new {@link Scanner} for parsing the full raw descriptor starting
-   * using the platform's default charset.
+   * Returns a new {@link Scanner} for parsing the full raw descriptor using
+   * UTF-8 as charset.
    *
    * @return Scanner for the full raw descriptor bytes.
    */
@@ -107,17 +108,16 @@ public abstract class DescriptorImpl implements Descriptor {
 
   /**
    * Returns a new {@link Scanner} for parsing the raw descriptor starting at
-   * byte <code>offset</code> containing <code>length</code> bytes using the
-   * platform's default charset.
+   * byte <code>offset</code> containing <code>length</code> bytes using UTF-8
+   * as charset.
    *
    * @param offset The index of the first byte to parse.
    * @param length The number of bytes to parse.
    * @return Scanner for the given raw descriptor bytes.
    */
   protected Scanner newScanner(int offset, int length) {
-    /* XXX21932 */
-    return new Scanner(new ByteArrayInputStream(this.rawDescriptorBytes, offset,
-        length));
+    return new Scanner(new InputStreamReader(new ByteArrayInputStream(
+        this.rawDescriptorBytes, offset, length), StandardCharsets.UTF_8));
   }
 
   /**
diff --git a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
index ea9eb4b..1ff0fe3 100644
--- a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
 import org.torproject.descriptor.TorperfResult;
 
 import java.io.File;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -27,8 +28,8 @@ public class TorperfResultImpl extends DescriptorImpl
       throw new DescriptorParseException("Descriptor is empty.");
     }
     List<Descriptor> parsedDescriptors = new ArrayList<>();
-    /* XXX21932 */
-    String descriptorString = new String(rawDescriptorBytes);
+    String descriptorString = new String(rawDescriptorBytes,
+        StandardCharsets.UTF_8);
     Scanner scanner = new Scanner(descriptorString).useDelimiter("\r?\n");
     String typeAnnotation = "";
     while (scanner.hasNext()) {
@@ -48,8 +49,8 @@ public class TorperfResultImpl extends DescriptorImpl
       } else {
         /* XXX21932 */
         parsedDescriptors.add(new TorperfResultImpl(
-            (typeAnnotation + line).getBytes(), descriptorFile,
-            failUnrecognizedDescriptorLines));
+            (typeAnnotation + line).getBytes(StandardCharsets.UTF_8),
+            descriptorFile, failUnrecognizedDescriptorLines));
         typeAnnotation = "";
       }
     }



More information about the tor-commits mailing list