commit 8a745093ecd27240053b7e251de46eb4c303df6f Author: Arturo Filastò arturo@filasto.net Date: Mon Apr 18 18:45:29 2016 +0200
Also record all the redirects
* Add monkey patch for bug in twisted RedirectAgent: https://twistedmatrix.com/trac/ticket/8265 --- ooni/templates/httpt.py | 22 ++++++++++++++-------- ooni/utils/trueheaders.py | 42 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py index 2b280f0..2a17f5b 100644 --- a/ooni/templates/httpt.py +++ b/ooni/templates/httpt.py @@ -7,6 +7,7 @@ from twisted.web.client import readBody, PartialDownloadError from twisted.internet import reactor from twisted.internet.endpoints import TCP4ClientEndpoint from ooni.utils.trueheaders import TrueHeadersAgent, TrueHeadersSOCKS5Agent +from ooni.utils.trueheaders import FixedRedirectAgent
from ooni.nettest import NetTestCase from ooni.utils import log, base64Dict @@ -37,8 +38,15 @@ class StreamListener(StreamListenerMixin): log.err("Tor Exit ip detection failed")
+def _representHeaders(headers): + represented_headers = {} + for name, value in headers.getAllRawHeaders(): + represented_headers[name] = value[0] + return represented_headers
def _representBody(body): + if not body: + return body # XXX perhaps add support for decoding gzip in the future. body = body.replace('\0', '') decoded = False @@ -115,9 +123,8 @@ class HTTPTest(NetTestCase):
if self.followRedirects: try: - from twisted.web.client import RedirectAgent - self.control_agent = RedirectAgent(self.control_agent) - self.agent = RedirectAgent(self.agent) + self.control_agent = FixedRedirectAgent(self.control_agent) + self.agent = FixedRedirectAgent(self.agent) self.report['agent'] = 'redirect' except: log.err("Warning! You are running an old version of twisted"\ @@ -147,11 +154,10 @@ class HTTPTest(NetTestCase):
failure (instance): An instance of :class:twisted.internet.failure.Failure """ - def _representHeaders(headers): - represented_headers = {} - for name, value in headers.getAllRawHeaders(): - represented_headers[name] = value[0] - return represented_headers + if response and response.previousResponse: + self.addToReport(request, response.previousResponse, + response_body=None, + failure_string=None)
log.debug("Adding %s to report" % request) request_headers = TrueHeaders(request['headers']) diff --git a/ooni/utils/trueheaders.py b/ooni/utils/trueheaders.py index d5aa206..a5b2fe3 100644 --- a/ooni/utils/trueheaders.py +++ b/ooni/utils/trueheaders.py @@ -7,9 +7,14 @@ import itertools from copy import copy
-from twisted.web import client, _newclient, http_headers +from twisted.python.failure import Failure + +from twisted.web import client, _newclient, http_headers, error + from twisted.web._newclient import RequestNotSent, RequestGenerationFailed from twisted.web._newclient import TransportProxyProducer, STATUS +from twisted.web._newclient import ResponseFailed + from twisted.internet import reactor from twisted.internet.defer import Deferred, fail, maybeDeferred, failure
@@ -179,3 +184,38 @@ class TrueHeadersSOCKS5Agent(SOCKS5Agent): self._wrappedAgent._pool = pool else: self._pool = pool + +class FixedRedirectAgent(client.BrowserLikeRedirectAgent): + """ + This is a redirect agent with this patch manually applied: + https://twistedmatrix.com/trac/ticket/8265 + """ + def _handleRedirect(self, response, method, uri, headers, redirectCount): + """ + Handle a redirect response, checking the number of redirects already + followed, and extracting the location header fields. + + This is pathed to fix a bug in infinite redirect loop. + """ + if redirectCount >= self._redirectLimit: + err = error.InfiniteRedirection( + response.code, + b'Infinite redirection detected', + location=uri) + raise ResponseFailed([Failure(err)], response) + locationHeaders = response.headers.getRawHeaders(b'location', []) + if not locationHeaders: + err = error.RedirectWithNoLocation( + response.code, b'No location header field', uri) + raise ResponseFailed([Failure(err)], response) + location = self._resolveLocation(response.request.absoluteURI, locationHeaders[0]) + deferred = self._agent.request(method, location, headers) + + def _chainResponse(newResponse): + newResponse.setPreviousResponse(response) + return newResponse + + deferred.addCallback(_chainResponse) + # This is the fix to properly handle redirects + return deferred.addCallback( + self._handleResponse, method, uri, headers, redirectCount + 1)