commit 2f6c9baa68466b50ac869a68daa3dfd4334f57b4
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Jun 22 15:15:36 2017 +0200
Always use UTF-8 as charset.
Implements #21932.
---
CHANGELOG.md | 4 ++++
.../org/torproject/descriptor/impl/DescriptorImpl.java | 14 +++++++-------
.../org/torproject/descriptor/impl/TorperfResultImpl.java | 9 +++++----
3 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fd39b96..394d4eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
# Changes in version 2.0.0 - 2017-06-??
+ * Major changes
+ - Always use UTF-8 as charset rather than using the platform's
+ default charset.
+
* Minor changes
- Replace custom ImplementationNotAccessibleException thrown by
DescriptorSourceFactory with generic RuntimeException.
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
index e0f94f0..acbee50 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -96,8 +97,8 @@ public abstract class DescriptorImpl implements Descriptor {
}
/**
- * Returns a new {@link Scanner} for parsing the full raw descriptor starting
- * using the platform's default charset.
+ * Returns a new {@link Scanner} for parsing the full raw descriptor using
+ * UTF-8 as charset.
*
* @return Scanner for the full raw descriptor bytes.
*/
@@ -107,17 +108,16 @@ public abstract class DescriptorImpl implements Descriptor {
/**
* Returns a new {@link Scanner} for parsing the raw descriptor starting at
- * byte <code>offset</code> containing <code>length</code> bytes using the
- * platform's default charset.
+ * byte <code>offset</code> containing <code>length</code> bytes using UTF-8
+ * as charset.
*
* @param offset The index of the first byte to parse.
* @param length The number of bytes to parse.
* @return Scanner for the given raw descriptor bytes.
*/
protected Scanner newScanner(int offset, int length) {
- /* XXX21932 */
- return new Scanner(new ByteArrayInputStream(this.rawDescriptorBytes, offset,
- length));
+ return new Scanner(new InputStreamReader(new ByteArrayInputStream(
+ this.rawDescriptorBytes, offset, length), StandardCharsets.UTF_8));
}
/**
diff --git a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
index ea9eb4b..1ff0fe3 100644
--- a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java
@@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
import org.torproject.descriptor.TorperfResult;
import java.io.File;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
@@ -27,8 +28,8 @@ public class TorperfResultImpl extends DescriptorImpl
throw new DescriptorParseException("Descriptor is empty.");
}
List<Descriptor> parsedDescriptors = new ArrayList<>();
- /* XXX21932 */
- String descriptorString = new String(rawDescriptorBytes);
+ String descriptorString = new String(rawDescriptorBytes,
+ StandardCharsets.UTF_8);
Scanner scanner = new Scanner(descriptorString).useDelimiter("\r?\n");
String typeAnnotation = "";
while (scanner.hasNext()) {
@@ -48,8 +49,8 @@ public class TorperfResultImpl extends DescriptorImpl
} else {
/* XXX21932 */
parsedDescriptors.add(new TorperfResultImpl(
- (typeAnnotation + line).getBytes(), descriptorFile,
- failUnrecognizedDescriptorLines));
+ (typeAnnotation + line).getBytes(StandardCharsets.UTF_8),
+ descriptorFile, failUnrecognizedDescriptorLines));
typeAnnotation = "";
}
}