commit 0b62526335ed532fee5b2b1b3cba4ea04424333a Author: Isis Lovecruft isis@torproject.org Date: Thu Jun 4 02:58:01 2015 +0000
Minor cleanup for htmlify_string() from commits faf48983 and ccb3b8d1.
First, we don't need the bridgedb.util.htmlify_string() function, since Mako provides syntax for "piping" content about to be written to the page through filters [0] and it even provides a HTML-escaping function (by default, it uses markupsafe.escape).
Second, we do actually need to remove control characters (at the very least, but since technically we're parsing unicode… removing all escape/control characters isn't actually feasible) from bridge lines.
* REMOVE bridgedb.util.htmlify_string(). * CHANGE lib/bridgedb/templates/bridges.html to use Mako's builtin text filtering syntax and functions for replacing HTML token characters with their corresponding HTML special characters. * ADD bridgedb.util.replaceControlChars(). * ADD unittests to ensure that everything is escaped properly.
[0]: http://docs.makotemplates.org/en/latest/filtering.html --- lib/bridgedb/HTTPServer.py | 12 ++-- lib/bridgedb/templates/bridges.html | 8 ++- lib/bridgedb/test/test_HTTPServer.py | 109 ++++++++++++++++++++++++++++++++-- lib/bridgedb/util.py | 50 +++++++++------- 4 files changed, 145 insertions(+), 34 deletions(-)
diff --git a/lib/bridgedb/HTTPServer.py b/lib/bridgedb/HTTPServer.py index c969908..beff4bd 100644 --- a/lib/bridgedb/HTTPServer.py +++ b/lib/bridgedb/HTTPServer.py @@ -56,7 +56,7 @@ from bridgedb.qrcodes import generateQR from bridgedb.safelog import logSafely from bridgedb.schedule import Unscheduled from bridgedb.schedule import ScheduledInterval -from bridgedb.util import htmlify_string +from bridgedb.util import replaceControlChars
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), 'templates') @@ -739,12 +739,11 @@ class WebResourceBridges(resource.Resource): self.nBridgesToGive, countryCode, bridgeFilterRules=rules) - bridgeLines = "".join("%s\n" % b.getConfigLine( + bridgeLines = [replaceControlChars(b.getConfigLine( includeFingerprint=self.includeFingerprints, addressClass=addressClass, transport=transport, - request=bridgedb.Dist.uniformMap(ip) - ) for b in bridges) + request=bridgedb.Dist.uniformMap(ip))) for b in bridges]
answer = self.renderAnswer(request, bridgeLines, format) return answer @@ -772,7 +771,7 @@ class WebResourceBridges(resource.Resource): if format == 'plain': request.setHeader("Content-Type", "text/plain") try: - rendered = bytes(bridgeLines) + rendered = bytes('\n'.join(bridgeLines)) except Exception as err: rendered = replaceErrorPage(err) else: @@ -790,8 +789,7 @@ class WebResourceBridges(resource.Resource): rtl=rtl, lang=langs[0], answer=bridgeLines, - qrcode=qrcode, - htmlify_string=htmlify_string) + qrcode=qrcode) except Exception as err: rendered = replaceErrorPage(err)
diff --git a/lib/bridgedb/templates/bridges.html b/lib/bridgedb/templates/bridges.html index 0ecf1d4..b5ac544 100644 --- a/lib/bridgedb/templates/bridges.html +++ b/lib/bridgedb/templates/bridges.html @@ -1,8 +1,7 @@ ## -*- coding: utf-8 -*-
<%inherit file="base.html"/> -<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0, - htmlify_string=None, **kwargs"/> +<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0, **kwargs"/>
</div> </div> @@ -67,7 +66,10 @@ <div class="row" id="bridgesrow1"> <div class="col col-lg-12"> <div class="bridge-lines" id="bridgelines"> -${htmlify_string(answer)} +## See http://docs.makotemplates.org/en/latest/filtering.html +% for bridgeline in answer: +${bridgeline | h,trim}<br /> +% endfor </div> </div> </div> diff --git a/lib/bridgedb/test/test_HTTPServer.py b/lib/bridgedb/test/test_HTTPServer.py index 68616bf..d597c55 100644 --- a/lib/bridgedb/test/test_HTTPServer.py +++ b/lib/bridgedb/test/test_HTTPServer.py @@ -422,6 +422,8 @@ class DummyBridge(object): """A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked ``getConfigLine`` method."""
+ ptArgs = {} + def _randORPort(self): return random.randint(9001, 9999) def _randPTPort(self): return random.randint(6001, 6666) def _returnFour(self): return random.randint(2**24, 2**32-1) @@ -457,16 +459,32 @@ class DummyBridge(object): line.append(str(transport)) line.append("%s:%s" % (self.ip, self.orport)) if includeFingerprint is True: line.append(self.fingerprint) + if self.ptArgs: + line.append(','.join(['='.join(x) for x in self.ptArgs.items()])) bridgeLine = " ".join([item for item in line]) #print "Created config line: %r" % bridgeLine return bridgeLine
+class DummyMaliciousBridge(DummyBridge): + """A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked + ``getConfigLine`` method and which maliciously insert an additional fake + bridgeline and some javascript into its PT arguments. + """ + ptArgs = { + "eww": "\rBridge 1.2.3.4:1234", + "bad": "\nBridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567", + "evil": "<script>alert('fuuuu');</script>", + } + + class DummyIPBasedDistributor(object): """A mocked :class:`bridgedb.Dist.IPBasedDistributor` which is used to test :class:`bridgedb.HTTPServer.WebResourceBridges. """
+ _bridge_class = DummyBridge + def _dumbAreaMapper(ip): return ip
def __init__(self, areaMapper=None, nClusters=None, key=None, @@ -486,7 +504,7 @@ class DummyIPBasedDistributor(object): """Needed because it's called in :meth:`WebResourceBridges.getBridgesForIP`. """ - return [DummyBridge() for _ in xrange(N)] + return [self._bridge_class() for _ in xrange(N)]
class DummyRequest(requesthelper.DummyRequest): @@ -520,11 +538,19 @@ class WebResourceBridgesTests(unittest.TestCase): self.dist = DummyIPBasedDistributor() self.sched = ScheduledInterval(1, 'hour') self.nBridgesPerRequest = 2 + + def useBenignBridges(self): + self.dist._bridge_class = DummyBridge self.bridgesResource = HTTPServer.WebResourceBridges( - self.dist, self.sched, N=2, - #useForwardedHeader=True, + self.dist, self.sched, N=self.nBridgesPerRequest, includeFingerprints=True) + self.root.putChild(self.pagename, self.bridgesResource)
+ def useMaliciousBridges(self): + self.dist._bridge_class = DummyMaliciousBridge + self.bridgesResource = HTTPServer.WebResourceBridges( + self.dist, self.sched, N=self.nBridgesPerRequest, + includeFingerprints=True) self.root.putChild(self.pagename, self.bridgesResource)
def parseBridgesFromHTMLPage(self, page): @@ -550,8 +576,76 @@ class WebResourceBridgesTests(unittest.TestCase):
return bridges
+ def test_render_GET_malicious_newlines(self): + """Test rendering a request when the some of the bridges returned have + malicious (HTML, Javascript, etc., in their) PT arguments. + """ + self.useMaliciousBridges() + + request = DummyRequest([self.pagename]) + request.method = b'GET' + request.getClientIP = lambda: '1.1.1.1' + + page = self.bridgesResource.render(request) + self.assertTrue( + 'bad=Bridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567' in str(page), + "Newlines in bridge lines should be removed.") + + def test_render_GET_malicious_returnchar(self): + """Test rendering a request when the some of the bridges returned have + malicious (HTML, Javascript, etc., in their) PT arguments. + """ + self.useMaliciousBridges() + + request = DummyRequest([self.pagename]) + request.method = b'GET' + request.getClientIP = lambda: '1.1.1.1' + + page = self.bridgesResource.render(request) + self.assertTrue( + 'eww=Bridge 1.2.3.4:1234' in str(page), + "Return characters in bridge lines should be removed.") + + def test_render_GET_malicious_javascript(self): + """Test rendering a request when the some of the bridges returned have + malicious (HTML, Javascript, etc., in their) PT arguments. + """ + self.useMaliciousBridges() + + request = DummyRequest([self.pagename]) + request.method = b'GET' + request.getClientIP = lambda: '1.1.1.1' + + page = self.bridgesResource.render(request) + self.assertTrue( + "evil=<script>alert('fuuuu');</script>" in str(page), + ("The characters &, <, >, ', and " in bridge lines should be " + "replaced with their corresponding HTML special characters.")) + + def test_renderAnswer_GET_textplain_malicious(self): + """If the request format specifies 'plain', we should return content + with mimetype 'text/plain' and ASCII control characters replaced. + """ + self.useMaliciousBridges() + + request = DummyRequest([self.pagename]) + request.args.update({'format': ['plain']}) + request.getClientIP = lambda: '4.4.4.4' + request.method = b'GET' + + page = self.bridgesResource.render(request) + self.assertTrue("html" not in str(page)) + self.assertTrue( + 'eww=Bridge 1.2.3.4:1234' in str(page), + "Return characters in bridge lines should be removed.") + self.assertTrue( + 'bad=Bridge 6.6.6.6:6666' in str(page), + "Newlines in bridge lines should be removed.") + def test_render_GET_vanilla(self): """Test rendering a request for normal, vanilla bridges.""" + self.useBenignBridges() + request = DummyRequest([self.pagename]) request.method = b'GET' request.getClientIP = lambda: '1.1.1.1' @@ -577,6 +671,8 @@ class WebResourceBridgesTests(unittest.TestCase): """The client's IP address should be obtainable from the 'X-Forwarded-For' header in the request. """ + self.useBenignBridges() + self.bridgesResource.useForwardedHeader = True request = DummyRequest([self.pagename]) request.method = b'GET' @@ -594,6 +690,8 @@ class WebResourceBridgesTests(unittest.TestCase):
def test_render_GET_RTLlang(self): """Test rendering a request for plain bridges in Arabic.""" + self.useBenignBridges() + request = DummyRequest([b"bridges?transport=obfs3"]) request.method = b'GET' request.getClientIP = lambda: '3.3.3.3' @@ -614,6 +712,8 @@ class WebResourceBridgesTests(unittest.TestCase):
def test_render_GET_RTLlang_obfs3(self): """Test rendering a request for obfs3 bridges in Farsi.""" + self.useBenignBridges() + request = DummyRequest([b"bridges?transport=obfs3"]) request.method = b'GET' request.getClientIP = lambda: '3.3.3.3' @@ -643,11 +743,12 @@ class WebResourceBridgesTests(unittest.TestCase): self.assertGreater(int(port), 0) self.assertLessEqual(int(port), 65535)
- def test_renderAnswer_textplain(self): """If the request format specifies 'plain', we should return content with mimetype 'text/plain'. """ + self.useBenignBridges() + request = DummyRequest([self.pagename]) request.args.update({'format': ['plain']}) request.getClientIP = lambda: '4.4.4.4' diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py index 580f31f..7f906d7 100644 --- a/lib/bridgedb/util.py +++ b/lib/bridgedb/util.py @@ -180,26 +180,6 @@ def levenshteinDistance(s1, s2, len1=None, len2=None, memo[key] = distance return distance
-htmlify_string_map = { - '<': '<', - '>': '>', - '&': '&', - '"': '"', - "'": ''', - '\n': '<br/>' - } -def htmlify_string(s): - """Encode HTML special characters, and newlines, in s. - - >>> htmlify_string('<script>alert("badthink");</script>') - '<script>alert("badthink");</script>' - >>> htmlify_string('bridge 1\nbridge 2') - 'bridge 1<br/>bridge 2' - - :param str s: The string to encode. - """ - return ''.join(map((lambda ch: htmlify_string_map.get(ch, ch)), s)) - def isascii(s): """Return True if there are no non-ASCII characters in s, False otherwise.
@@ -235,6 +215,36 @@ def isascii_noncontrol(s): """ return all(map((lambda ch: 32 <= ord(ch) < 127), s))
+def replaceControlChars(text, replacement=None, encoding="utf-8"): + """Remove ASCII control characters [0-31, 92, 127]. + + >>> replaceControlChars('foo\n bar\ baz\r \t\0quux\n') + 'foo bar baz quux' + >>> replaceControlChars("\bI wonder if I'm outside the quotes now") + "I wonder if I'm outside the quotes now" + + :param str text: Some text to remove ASCII control characters from. + :param int replacement: If given, the **replacement** should be an integer + representing the decimal representation of the byte to replace + occurences of ASCII control characters with. For example, if they + should be replaced with the character ``'a'``, then ``97`` should be + used as the **replacement**, because ``ord('a') == 97``. + :param str encoding: The encoding of the **text**. + :rtype: str + :returns: The sanitized **text**. + """ + escaped = bytearray() + + for byte in bytearray(text, encoding): + if byte in range(0, 32) + [92, 127]: + if replacement: + byte = replacement + else: + continue + escaped += bytearray([byte]) + + return str(escaped) +
class JustifiedLogFormatter(logging.Formatter): """A logging formatter which pretty prints thread and calling function