commit 532ef3479a576733934edb45f588f2a074061f62 Author: iwakeh iwakeh@torproject.org Date: Thu Mar 15 13:58:17 2018 +0000
Only unescape valid UTF.
Add a utility method for only un-escaping valid utf and supply a test as well as test data for this issue.
Fixes task-22594. --- CHANGELOG.md | 8 ++++ .../org/torproject/onionoo/docs/DocumentStore.java | 4 +- .../torproject/onionoo/server/ResponseBuilder.java | 5 +-- .../torproject/onionoo/util/FormattingUtils.java | 34 +++++++++++++++++ .../onionoo/util/FormattingUtilsTest.java | 43 ++++++++++++++++++++++ src/test/resources/lines-for-escape-tests.txt | 16 ++++++++ 6 files changed, 104 insertions(+), 6 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fe389b..3a3c468 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# Changes in version 5.1-1.12.0 - 2018-??-?? + + * Minor changes + - Don't attempt to un-escape character sequences in contact lines + (like "\uk") that only happen to start like escaped utf-8 characters + (like "\u0055"). + + # Changes in version 5.1-1.11.0 - 2018-03-14
* Medium changes diff --git a/src/main/java/org/torproject/onionoo/docs/DocumentStore.java b/src/main/java/org/torproject/onionoo/docs/DocumentStore.java index 4622a34..f1f3803 100644 --- a/src/main/java/org/torproject/onionoo/docs/DocumentStore.java +++ b/src/main/java/org/torproject/onionoo/docs/DocumentStore.java @@ -9,7 +9,6 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonParseException;
-import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory;
@@ -318,8 +317,7 @@ public class DocumentStore { * objects are escaped JSON, e.g., \u00F2. When Gson serlializes * this string, it escapes the \ to \, hence writes \u00F2. We * need to undo this and change \u00F2 back to \u00F2. */ - documentString = StringUtils.replace(gson.toJson(document), - "\\u", "\u"); + documentString = FormattingUtils.replaceValidUtf(gson.toJson(document)); /* Existing details statuses don't contain opening and closing curly * brackets, so we should remove them from new details statuses, * too. */ diff --git a/src/main/java/org/torproject/onionoo/server/ResponseBuilder.java b/src/main/java/org/torproject/onionoo/server/ResponseBuilder.java index bb36a2c..e2bdf82 100644 --- a/src/main/java/org/torproject/onionoo/server/ResponseBuilder.java +++ b/src/main/java/org/torproject/onionoo/server/ResponseBuilder.java @@ -12,12 +12,11 @@ import org.torproject.onionoo.docs.DocumentStoreFactory; import org.torproject.onionoo.docs.SummaryDocument; import org.torproject.onionoo.docs.UptimeDocument; import org.torproject.onionoo.docs.WeightsDocument; +import org.torproject.onionoo.util.FormattingUtils;
import com.google.gson.Gson; import com.google.gson.GsonBuilder;
-import org.apache.commons.lang3.StringUtils; - import org.slf4j.Logger; import org.slf4j.LoggerFactory;
@@ -348,7 +347,7 @@ public class ResponseBuilder { /* Whenever we provide Gson with a string containing an escaped * non-ASCII character like \u00F2, it escapes the \ to \, which * we need to undo before including the string in a response. */ - return StringUtils.replace(gson.toJson(dd), "\\u", "\u"); + return FormattingUtils.replaceValidUtf(gson.toJson(dd)); } else { // TODO We should probably log that we didn't find a details // document that we expected to exist. diff --git a/src/main/java/org/torproject/onionoo/util/FormattingUtils.java b/src/main/java/org/torproject/onionoo/util/FormattingUtils.java index 7ed1377..3d16f5a 100644 --- a/src/main/java/org/torproject/onionoo/util/FormattingUtils.java +++ b/src/main/java/org/torproject/onionoo/util/FormattingUtils.java @@ -3,8 +3,18 @@
package org.torproject.onionoo.util;
+import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** Static helper methods for string processing etc. */ public class FormattingUtils {
+ private static Logger log = LoggerFactory.getLogger( + FormattingUtils.class); + private FormattingUtils() { }
@@ -35,5 +45,29 @@ public class FormattingUtils { public static String formatDecimalNumber(long decimalNumber) { return String.format("%,d", decimalNumber); } + + private static Pattern escapePattern = Pattern.compile( + "(\\{4}u[0-9a-fA-F]{4})"); + + /** De-escape only valid UTF and leave anything else escaped. */ + public static String replaceValidUtf(String text) { + if (null == text || text.isEmpty()) { + return text; + } + try { + StringBuffer sb = new StringBuffer(); + Matcher mat = escapePattern.matcher(text); + while (mat.find()) { + String unescaped = mat.group(1); + mat.appendReplacement(sb, unescaped); + } + mat.appendTail(sb); + return sb.toString(); + } catch (Throwable ex) { + log.debug("Couldn't process input '{}'.", text, ex); + return text; + } + } + }
diff --git a/src/test/java/org/torproject/onionoo/util/FormattingUtilsTest.java b/src/test/java/org/torproject/onionoo/util/FormattingUtilsTest.java new file mode 100644 index 0000000..8744696 --- /dev/null +++ b/src/test/java/org/torproject/onionoo/util/FormattingUtilsTest.java @@ -0,0 +1,43 @@ +package org.torproject.onionoo.util; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +import java.io.File; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class FormattingUtilsTest { + + /** Provide test data. */ + @Parameters + public static Collection<String[]> data() throws Exception { + List<String> lines = Files.readAllLines((new File(ClassLoader + .getSystemResource("lines-for-escape-tests.txt").toURI())) + .toPath()); + List<String[]> testData = new ArrayList<>(); + for (int i = 0; i < lines.size(); i += 2) { + testData.add(new String[]{lines.get(i), lines.get(i + 1)}); + } + return testData; + } + + @Parameter(0) + public String in; + + @Parameter(1) + public String out; + + @Test + public void testReplaceUtf() { + assertEquals(out, new String(FormattingUtils.replaceValidUtf(in))); + } +} diff --git a/src/test/resources/lines-for-escape-tests.txt b/src/test/resources/lines-for-escape-tests.txt new file mode 100644 index 0000000..4fb5895 --- /dev/null +++ b/src/test/resources/lines-for-escape-tests.txt @@ -0,0 +1,16 @@ + + +abc +abc +\\u +\\u +Haha/\\@/\\live/\\./\\co/\\./\\uk +Haha/\\@/\\live/\\./\\co/\\./\\uk +\\u20ac +\u20ac +\\u0024 +\u0024 +some \\u20ac other string \\u0024 to unescape +some \u20ac other string \u0024 to unescape +abcd efg\\u0024xyz\\uxxxx +abcd efg\u0024xyz\\uxxxx