commit 2f6c9baa68466b50ac869a68daa3dfd4334f57b4 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Jun 22 15:15:36 2017 +0200
Always use UTF-8 as charset.
Implements #21932. --- CHANGELOG.md | 4 ++++ .../org/torproject/descriptor/impl/DescriptorImpl.java | 14 +++++++------- .../org/torproject/descriptor/impl/TorperfResultImpl.java | 9 +++++---- 3 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index fd39b96..394d4eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changes in version 2.0.0 - 2017-06-??
+ * Major changes + - Always use UTF-8 as charset rather than using the platform's + default charset. + * Minor changes - Replace custom ImplementationNotAccessibleException thrown by DescriptorSourceFactory with generic RuntimeException. diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java index e0f94f0..acbee50 100644 --- a/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/DescriptorImpl.java @@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException;
import java.io.ByteArrayInputStream; import java.io.File; +import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -96,8 +97,8 @@ public abstract class DescriptorImpl implements Descriptor { }
/** - * Returns a new {@link Scanner} for parsing the full raw descriptor starting - * using the platform's default charset. + * Returns a new {@link Scanner} for parsing the full raw descriptor using + * UTF-8 as charset. * * @return Scanner for the full raw descriptor bytes. */ @@ -107,17 +108,16 @@ public abstract class DescriptorImpl implements Descriptor {
/** * Returns a new {@link Scanner} for parsing the raw descriptor starting at - * byte <code>offset</code> containing <code>length</code> bytes using the - * platform's default charset. + * byte <code>offset</code> containing <code>length</code> bytes using UTF-8 + * as charset. * * @param offset The index of the first byte to parse. * @param length The number of bytes to parse. * @return Scanner for the given raw descriptor bytes. */ protected Scanner newScanner(int offset, int length) { - /* XXX21932 */ - return new Scanner(new ByteArrayInputStream(this.rawDescriptorBytes, offset, - length)); + return new Scanner(new InputStreamReader(new ByteArrayInputStream( + this.rawDescriptorBytes, offset, length), StandardCharsets.UTF_8)); }
/** diff --git a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java index ea9eb4b..1ff0fe3 100644 --- a/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/TorperfResultImpl.java @@ -8,6 +8,7 @@ import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.TorperfResult;
import java.io.File; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -27,8 +28,8 @@ public class TorperfResultImpl extends DescriptorImpl throw new DescriptorParseException("Descriptor is empty."); } List<Descriptor> parsedDescriptors = new ArrayList<>(); - /* XXX21932 */ - String descriptorString = new String(rawDescriptorBytes); + String descriptorString = new String(rawDescriptorBytes, + StandardCharsets.UTF_8); Scanner scanner = new Scanner(descriptorString).useDelimiter("\r?\n"); String typeAnnotation = ""; while (scanner.hasNext()) { @@ -48,8 +49,8 @@ public class TorperfResultImpl extends DescriptorImpl } else { /* XXX21932 */ parsedDescriptors.add(new TorperfResultImpl( - (typeAnnotation + line).getBytes(), descriptorFile, - failUnrecognizedDescriptorLines)); + (typeAnnotation + line).getBytes(StandardCharsets.UTF_8), + descriptorFile, failUnrecognizedDescriptorLines)); typeAnnotation = ""; } }