[tor-commits] [bridgedb/develop] Minor cleanup for htmlify_string() from commits faf48983 and ccb3b8d1.

isis at torproject.org isis at torproject.org
Thu Jun 4 08:03:19 UTC 2015


commit 0b62526335ed532fee5b2b1b3cba4ea04424333a
Author: Isis Lovecruft <isis at torproject.org>
Date:   Thu Jun 4 02:58:01 2015 +0000

    Minor cleanup for htmlify_string() from commits faf48983 and ccb3b8d1.
    
    First, we don't need the bridgedb.util.htmlify_string() function, since
    Mako provides syntax for "piping" content about to be written to the
    page through filters [0] and it even provides a HTML-escaping function
    (by default, it uses markupsafe.escape).
    
    Second, we do actually need to remove control characters (at the very
    least, but since technically we're parsing unicode… removing all
    escape/control characters isn't actually feasible) from bridge lines.
    
     * REMOVE bridgedb.util.htmlify_string().
     * CHANGE lib/bridgedb/templates/bridges.html to use Mako's builtin text
       filtering syntax and functions for replacing HTML token characters
       with their corresponding HTML special characters.
     * ADD bridgedb.util.replaceControlChars().
     * ADD unittests to ensure that everything is escaped properly.
    
    [0]: http://docs.makotemplates.org/en/latest/filtering.html
---
 lib/bridgedb/HTTPServer.py           |   12 ++--
 lib/bridgedb/templates/bridges.html  |    8 ++-
 lib/bridgedb/test/test_HTTPServer.py |  109 ++++++++++++++++++++++++++++++++--
 lib/bridgedb/util.py                 |   50 +++++++++-------
 4 files changed, 145 insertions(+), 34 deletions(-)

diff --git a/lib/bridgedb/HTTPServer.py b/lib/bridgedb/HTTPServer.py
index c969908..beff4bd 100644
--- a/lib/bridgedb/HTTPServer.py
+++ b/lib/bridgedb/HTTPServer.py
@@ -56,7 +56,7 @@ from bridgedb.qrcodes import generateQR
 from bridgedb.safelog import logSafely
 from bridgedb.schedule import Unscheduled
 from bridgedb.schedule import ScheduledInterval
-from bridgedb.util import htmlify_string
+from bridgedb.util import replaceControlChars
 
 
 TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), 'templates')
@@ -739,12 +739,11 @@ class WebResourceBridges(resource.Resource):
                                                        self.nBridgesToGive,
                                                        countryCode,
                                                        bridgeFilterRules=rules)
-            bridgeLines = "".join("%s\n" % b.getConfigLine(
+            bridgeLines = [replaceControlChars(b.getConfigLine(
                 includeFingerprint=self.includeFingerprints,
                 addressClass=addressClass,
                 transport=transport,
-                request=bridgedb.Dist.uniformMap(ip)
-                ) for b in bridges)
+                request=bridgedb.Dist.uniformMap(ip))) for b in bridges]
 
         answer = self.renderAnswer(request, bridgeLines, format)
         return answer
@@ -772,7 +771,7 @@ class WebResourceBridges(resource.Resource):
         if format == 'plain':
             request.setHeader("Content-Type", "text/plain")
             try:
-                rendered = bytes(bridgeLines)
+                rendered = bytes('\n'.join(bridgeLines))
             except Exception as err:
                 rendered = replaceErrorPage(err)
         else:
@@ -790,8 +789,7 @@ class WebResourceBridges(resource.Resource):
                                            rtl=rtl,
                                            lang=langs[0],
                                            answer=bridgeLines,
-                                           qrcode=qrcode,
-                                           htmlify_string=htmlify_string)
+                                           qrcode=qrcode)
             except Exception as err:
                 rendered = replaceErrorPage(err)
 
diff --git a/lib/bridgedb/templates/bridges.html b/lib/bridgedb/templates/bridges.html
index 0ecf1d4..b5ac544 100644
--- a/lib/bridgedb/templates/bridges.html
+++ b/lib/bridgedb/templates/bridges.html
@@ -1,8 +1,7 @@
 ## -*- coding: utf-8 -*-
 
 <%inherit file="base.html"/>
-<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0,
-    htmlify_string=None, **kwargs"/>
+<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0, **kwargs"/>
 
   </div>
 </div>
@@ -67,7 +66,10 @@
       <div class="row" id="bridgesrow1">
         <div class="col col-lg-12">
           <div class="bridge-lines" id="bridgelines">
-${htmlify_string(answer)}
+## See http://docs.makotemplates.org/en/latest/filtering.html
+% for bridgeline in answer:
+${bridgeline | h,trim}<br />
+% endfor
           </div>
         </div>
       </div>
diff --git a/lib/bridgedb/test/test_HTTPServer.py b/lib/bridgedb/test/test_HTTPServer.py
index 68616bf..d597c55 100644
--- a/lib/bridgedb/test/test_HTTPServer.py
+++ b/lib/bridgedb/test/test_HTTPServer.py
@@ -422,6 +422,8 @@ class DummyBridge(object):
     """A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked
     ``getConfigLine`` method."""
 
+    ptArgs = {}
+
     def _randORPort(self): return random.randint(9001, 9999)
     def _randPTPort(self): return random.randint(6001, 6666)
     def _returnFour(self): return random.randint(2**24, 2**32-1)
@@ -457,16 +459,32 @@ class DummyBridge(object):
             line.append(str(transport))
         line.append("%s:%s" % (self.ip, self.orport))
         if includeFingerprint is True: line.append(self.fingerprint)
+        if self.ptArgs:
+            line.append(','.join(['='.join(x) for x in self.ptArgs.items()]))
         bridgeLine = " ".join([item for item in line])
         #print "Created config line: %r" % bridgeLine
         return bridgeLine
 
 
+class DummyMaliciousBridge(DummyBridge):
+    """A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked
+    ``getConfigLine`` method and which maliciously insert an additional fake
+    bridgeline and some javascript into its PT arguments.
+    """
+    ptArgs = {
+        "eww": "\rBridge 1.2.3.4:1234",
+        "bad": "\nBridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567",
+        "evil": "<script>alert('fuuuu');</script>",
+    }
+
+
 class DummyIPBasedDistributor(object):
     """A mocked :class:`bridgedb.Dist.IPBasedDistributor` which is used to test
     :class:`bridgedb.HTTPServer.WebResourceBridges.
     """
 
+    _bridge_class = DummyBridge
+
     def _dumbAreaMapper(ip): return ip
 
     def __init__(self, areaMapper=None, nClusters=None, key=None,
@@ -486,7 +504,7 @@ class DummyIPBasedDistributor(object):
         """Needed because it's called in
         :meth:`WebResourceBridges.getBridgesForIP`.
         """
-        return [DummyBridge() for _ in xrange(N)]
+        return [self._bridge_class() for _ in xrange(N)]
 
 
 class DummyRequest(requesthelper.DummyRequest):
@@ -520,11 +538,19 @@ class WebResourceBridgesTests(unittest.TestCase):
         self.dist = DummyIPBasedDistributor()
         self.sched = ScheduledInterval(1, 'hour')
         self.nBridgesPerRequest = 2
+
+    def useBenignBridges(self):
+        self.dist._bridge_class = DummyBridge
         self.bridgesResource = HTTPServer.WebResourceBridges(
-            self.dist, self.sched, N=2,
-            #useForwardedHeader=True,
+            self.dist, self.sched, N=self.nBridgesPerRequest,
             includeFingerprints=True)
+        self.root.putChild(self.pagename, self.bridgesResource)
 
+    def useMaliciousBridges(self):
+        self.dist._bridge_class = DummyMaliciousBridge
+        self.bridgesResource = HTTPServer.WebResourceBridges(
+            self.dist, self.sched, N=self.nBridgesPerRequest,
+            includeFingerprints=True)
         self.root.putChild(self.pagename, self.bridgesResource)
 
     def parseBridgesFromHTMLPage(self, page):
@@ -550,8 +576,76 @@ class WebResourceBridgesTests(unittest.TestCase):
 
         return bridges
 
+    def test_render_GET_malicious_newlines(self):
+        """Test rendering a request when the some of the bridges returned have
+        malicious (HTML, Javascript, etc., in their) PT arguments.
+        """
+        self.useMaliciousBridges()
+
+        request = DummyRequest([self.pagename])
+        request.method = b'GET'
+        request.getClientIP = lambda: '1.1.1.1'
+
+        page = self.bridgesResource.render(request)
+        self.assertTrue(
+            'bad=Bridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567' in str(page),
+            "Newlines in bridge lines should be removed.")
+
+    def test_render_GET_malicious_returnchar(self):
+        """Test rendering a request when the some of the bridges returned have
+        malicious (HTML, Javascript, etc., in their) PT arguments.
+        """
+        self.useMaliciousBridges()
+
+        request = DummyRequest([self.pagename])
+        request.method = b'GET'
+        request.getClientIP = lambda: '1.1.1.1'
+
+        page = self.bridgesResource.render(request)
+        self.assertTrue(
+            'eww=Bridge 1.2.3.4:1234' in str(page),
+            "Return characters in bridge lines should be removed.")
+
+    def test_render_GET_malicious_javascript(self):
+        """Test rendering a request when the some of the bridges returned have
+        malicious (HTML, Javascript, etc., in their) PT arguments.
+        """
+        self.useMaliciousBridges()
+
+        request = DummyRequest([self.pagename])
+        request.method = b'GET'
+        request.getClientIP = lambda: '1.1.1.1'
+
+        page = self.bridgesResource.render(request)
+        self.assertTrue(
+            "evil=<script>alert('fuuuu');</script>" in str(page),
+            ("The characters &, <, >, ', and \" in bridge lines should be "
+             "replaced with their corresponding HTML special characters."))
+
+    def test_renderAnswer_GET_textplain_malicious(self):
+        """If the request format specifies 'plain', we should return content
+        with mimetype 'text/plain' and ASCII control characters replaced.
+        """
+        self.useMaliciousBridges()
+
+        request = DummyRequest([self.pagename])
+        request.args.update({'format': ['plain']})
+        request.getClientIP = lambda: '4.4.4.4'
+        request.method = b'GET'
+
+        page = self.bridgesResource.render(request)
+        self.assertTrue("html" not in str(page))
+        self.assertTrue(
+            'eww=Bridge 1.2.3.4:1234' in str(page),
+            "Return characters in bridge lines should be removed.")
+        self.assertTrue(
+            'bad=Bridge 6.6.6.6:6666' in str(page),
+            "Newlines in bridge lines should be removed.")
+
     def test_render_GET_vanilla(self):
         """Test rendering a request for normal, vanilla bridges."""
+        self.useBenignBridges()
+
         request = DummyRequest([self.pagename])
         request.method = b'GET'
         request.getClientIP = lambda: '1.1.1.1'
@@ -577,6 +671,8 @@ class WebResourceBridgesTests(unittest.TestCase):
         """The client's IP address should be obtainable from the
         'X-Forwarded-For' header in the request.
         """
+        self.useBenignBridges()
+
         self.bridgesResource.useForwardedHeader = True
         request = DummyRequest([self.pagename])
         request.method = b'GET'
@@ -594,6 +690,8 @@ class WebResourceBridgesTests(unittest.TestCase):
 
     def test_render_GET_RTLlang(self):
         """Test rendering a request for plain bridges in Arabic."""
+        self.useBenignBridges()
+
         request = DummyRequest([b"bridges?transport=obfs3"])
         request.method = b'GET'
         request.getClientIP = lambda: '3.3.3.3'
@@ -614,6 +712,8 @@ class WebResourceBridgesTests(unittest.TestCase):
 
     def test_render_GET_RTLlang_obfs3(self):
         """Test rendering a request for obfs3 bridges in Farsi."""
+        self.useBenignBridges()
+
         request = DummyRequest([b"bridges?transport=obfs3"])
         request.method = b'GET'
         request.getClientIP = lambda: '3.3.3.3'
@@ -643,11 +743,12 @@ class WebResourceBridgesTests(unittest.TestCase):
             self.assertGreater(int(port), 0)
             self.assertLessEqual(int(port), 65535)
 
-
     def test_renderAnswer_textplain(self):
         """If the request format specifies 'plain', we should return content
         with mimetype 'text/plain'.
         """
+        self.useBenignBridges()
+
         request = DummyRequest([self.pagename])
         request.args.update({'format': ['plain']})
         request.getClientIP = lambda: '4.4.4.4'
diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py
index 580f31f..7f906d7 100644
--- a/lib/bridgedb/util.py
+++ b/lib/bridgedb/util.py
@@ -180,26 +180,6 @@ def levenshteinDistance(s1, s2, len1=None, len2=None,
     memo[key] = distance
     return distance
 
-htmlify_string_map = {
-    '<': '<',
-    '>': '>',
-    '&': '&',
-    '"': '"',
-    "'": ''',
-    '\n': '<br/>'
-    }
-def htmlify_string(s):
-    """Encode HTML special characters, and newlines, in s.
-
-    >>> htmlify_string('<script>alert("badthink");</script>')
-    '<script>alert("badthink");</script>'
-    >>> htmlify_string('bridge 1\nbridge 2')
-    'bridge 1<br/>bridge 2'
-
-    :param str s: The string to encode.
-    """
-    return ''.join(map((lambda ch: htmlify_string_map.get(ch, ch)), s))
-
 def isascii(s):
     """Return True if there are no non-ASCII characters in s, False otherwise.
 
@@ -235,6 +215,36 @@ def isascii_noncontrol(s):
     """
     return all(map((lambda ch: 32 <= ord(ch) < 127), s))
 
+def replaceControlChars(text, replacement=None, encoding="utf-8"):
+    """Remove ASCII control characters [0-31, 92, 127].
+
+    >>> replaceControlChars('foo\n bar\\ baz\r \t\0quux\n')
+    'foo bar baz quux'
+    >>> replaceControlChars("\bI wonder if I'm outside the quotes now")
+    "I wonder if I'm outside the quotes now"
+
+    :param str text: Some text to remove ASCII control characters from.
+    :param int replacement: If given, the **replacement** should be an integer
+        representing the decimal representation of the byte to replace
+        occurences of ASCII control characters with. For example, if they
+        should be replaced with the character ``'a'``, then ``97`` should be
+        used as the **replacement**, because ``ord('a') == 97``.
+    :param str encoding: The encoding of the **text**.
+    :rtype: str
+    :returns: The sanitized **text**.
+    """
+    escaped = bytearray()
+
+    for byte in bytearray(text, encoding):
+        if byte in range(0, 32) + [92, 127]:
+            if replacement:
+                byte = replacement
+            else:
+                continue
+        escaped += bytearray([byte])
+
+    return str(escaped)
+
 
 class JustifiedLogFormatter(logging.Formatter):
     """A logging formatter which pretty prints thread and calling function



More information about the tor-commits mailing list