commit 947ce706bb46c5d7e5c4cd2b0716a2458b4af86b Author: iwakeh iwakeh@torproject.org Date: Fri Apr 6 09:53:35 2018 +0000
Provide utf-characters instead of escape strings.
Also adapt tests to check the new functionality. Implements task-25740. --- CHANGELOG.md | 6 +++++ .../torproject/onionoo/util/FormattingUtils.java | 6 +++-- src/test/resources/lines-for-escape-tests.txt | 26 +++++++++++++--------- 3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index b903acc..692a417 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# Changes in version 6.0-1.13.0 - 2018-04-?? + + * Minor changes + - Turn valid utf-8 escape sequences into utf-8 characters. + + # Changes in version 5.2-1.12.0 - 2018-04-06
* Medium changes diff --git a/src/main/java/org/torproject/onionoo/util/FormattingUtils.java b/src/main/java/org/torproject/onionoo/util/FormattingUtils.java index 3d16f5a..558477d 100644 --- a/src/main/java/org/torproject/onionoo/util/FormattingUtils.java +++ b/src/main/java/org/torproject/onionoo/util/FormattingUtils.java @@ -3,6 +3,8 @@
package org.torproject.onionoo.util;
+import static org.apache.commons.lang3.StringEscapeUtils.unescapeJava; + import org.slf4j.Logger; import org.slf4j.LoggerFactory;
@@ -58,8 +60,8 @@ public class FormattingUtils { StringBuffer sb = new StringBuffer(); Matcher mat = escapePattern.matcher(text); while (mat.find()) { - String unescaped = mat.group(1); - mat.appendReplacement(sb, unescaped); + String unescaped = mat.group(1).substring(1); + mat.appendReplacement(sb, unescapeJava(unescaped)); } mat.appendTail(sb); return sb.toString(); diff --git a/src/test/resources/lines-for-escape-tests.txt b/src/test/resources/lines-for-escape-tests.txt index 4fb5895..b08b723 100644 --- a/src/test/resources/lines-for-escape-tests.txt +++ b/src/test/resources/lines-for-escape-tests.txt @@ -2,15 +2,21 @@
abc abc -\\u -\\u +\\u00bb \\u \\u00ab +» \\u « Haha/\\@/\\live/\\./\\co/\\./\\uk Haha/\\@/\\live/\\./\\co/\\./\\uk -\\u20ac -\u20ac -\\u0024 -\u0024 -some \\u20ac other string \\u0024 to unescape -some \u20ac other string \u0024 to unescape -abcd efg\\u0024xyz\\uxxxx -abcd efg\u0024xyz\\uxxxx +Euro \\u20ac +Euro € +Dollar \\u0024 +Dollar $ +Pound \\u00a3 +Pound £ +Yen \\u00a5 +Yen ¥ +\\u00a1some \\u00dE other string \\u00e7 to unescape! +¡some Þ other string ç to unescape! +abcd efg cent \\u00a2xyz\\uxxxx +abcd efg cent ¢xyz\\uxxxx +Telef\\u00F4nica +Telefônica