commit e496f0d1b3d395fec039574fd212675885b9e017 Author: Arturo Filastò art@fuffa.org Date: Sun Nov 25 08:20:57 2012 +0100
Add proper documentation to HTTP Test template * Do some minor refactoring --- ooni/templates/httpt.py | 177 +++++++++++++++++++++++++++++------------------ 1 files changed, 110 insertions(+), 67 deletions(-)
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py index e166263..e36c049 100644 --- a/ooni/templates/httpt.py +++ b/ooni/templates/httpt.py @@ -24,6 +24,9 @@ from ooni.utils.net import BodyReceiver, StringProducer, userAgents
from ooni.utils.txagentwithsocks import Agent, SOCKSError, TrueHeaders
+class InvalidSocksProxyOption(Exception): + pass + class HTTPTest(NetTestCase): """ A utility class for dealing with HTTP based testing. It provides methods to @@ -51,11 +54,10 @@ class HTTPTest(NetTestCase): response = {}
def _setUp(self): - log.debug("Setting up HTTPTest") try: import OpenSSL except: - log.err("Warning! pyOpenSSL is not installed. https websites will" + log.err("Warning! pyOpenSSL is not installed. https websites will " "not work")
self.control_agent = Agent(reactor, sockshost="127.0.0.1", @@ -63,27 +65,35 @@ class HTTPTest(NetTestCase):
sockshost, socksport = (None, None) if self.localOptions['socksproxy']: - self.report['socksproxy'] = self.localOptions['socksproxy'] - sockshost, socksport = self.localOptions['socksproxy'].split(':') + try: + sockshost, socksport = self.localOptions['socksproxy'].split(':') + self.report['socksproxy'] = self.localOptions['socksproxy'] + except ValueError: + raise InvalidSocksProxyOption socksport = int(socksport)
- self.agent = Agent(reactor, sockshost=sockshost, - socksport=socksport) + self.agent = Agent(reactor, sockshost=sockshost, socksport=socksport) + + self.report['agent'] = 'agent'
if self.followRedirects: try: from twisted.web.client import RedirectAgent self.control_agent = RedirectAgent(self.control_agent) self.agent = RedirectAgent(self.agent) + self.report['agent'] = 'redirect' except: log.err("Warning! You are running an old version of twisted"\ "(<= 10.1). I will not be able to follow redirects."\ "This may make the testing less precise.") - self.report['errors'].append("Could not import RedirectAgent")
self.processInputs() log.debug("Finished test setup")
+ def randomize_useragent(self, request): + user_agent = random.choice(userAgents) + request['headers']['User-Agent'] = [user_agent] + def processInputs(self): pass
@@ -107,12 +117,14 @@ class HTTPTest(NetTestCase): else: self.processResponseBody(response_body)
- def processResponseBody(self, data): + def processResponseBody(self, body): """ - This should handle all the response body smushing for getting it ready - to be passed onto the control. + Overwrite this method if you wish to interact with the response body of + every request that is made. + + Args:
- @param data: The content of the body returned. + body (str): The body of the HTTP response """ pass
@@ -120,7 +132,9 @@ class HTTPTest(NetTestCase): """ This should take care of dealing with the returned HTTP headers.
- @param headers: The content of the returned headers. + Args: + + headers (dict): The returned header fields. """ pass
@@ -128,32 +142,96 @@ class HTTPTest(NetTestCase): """ Handle a redirection via a 3XX HTTP status code.
- @param location: the url that is being redirected to. + Here you may place logic that evaluates the destination that you are + being redirected to. Matches against known censor redirects, etc. + + Note: if self.followRedirects is set to True, then this method will + never be called. + XXX perhaps we may want to hook _handleResponse in RedirectAgent to + call processRedirect every time we get redirected. + + Args: + + location (str): the url that we are being redirected to. """ pass
+ def _cbResponse(self, response, request, + headers_processor, body_processor): + """ + This callback is fired once we have gotten a response for our request. + If we are using a RedirectAgent then this will fire once we have + reached the end of the redirect chain. + + Args: + + response (:twisted.web.iweb.IResponse:): a provider for getting our response + + request (dict): the dict containing our response (XXX this should be dropped) + + header_processor (func): a function to be called with argument a + dict containing the response headers. This will lead + self.headerProcessor to not be called. + + + body_processor (func): a function to be called with as argument the + body of the response. This will lead self.bodyProcessor to not + be called. + + """ + if not response: + log.err("Got no response for request %s" % request) + return + else: + log.debug("Got response %s" % response) + + if str(response.code).startswith('3'): + self.processRedirect(response.headers.getRawHeaders('Location')[0]) + + # [!] We are passing to the headers_processor the headers dict and + # not the Headers() object + response_headers_dict = list(response.headers.getAllRawHeaders()) + if headers_processor: + headers_processor(response_headers_dict) + else: + self.processResponseHeaders(response_headers_dict) + + finished = defer.Deferred() + response.deliverBody(BodyReceiver(finished)) + finished.addCallback(self._processResponseBody, request, + response, body_processor) + + return finished + def doRequest(self, url, method="GET", headers={}, body=None, headers_processor=None, body_processor=None, use_tor=False): """ - Perform an HTTP request with the specified method. + Perform an HTTP request with the specified method and headers. + + Args:
- url: the full url path of the request + url (str): the full URL of the request. The scheme may be either + http, https, or httpo for http over Tor Hidden Service.
- method: the HTTP Method to be used + Kwargs:
- headers: the request headers to be sent as a dict + method (str): the HTTP method name to use for the request
- body: the request body + headers (dict): the request headers to send
- headers_processor: a function to be used for processing the HTTP header - responses (defaults to self.processResponseHeaders). - This function takes as argument the HTTP headers as a - dict. + body (str): the request body
- body_processory: a function to be used for processing the HTTP response - body (defaults to self.processResponseBody). - This function takes the response body as an argument. + headers_processor : a function to be used for processing the HTTP + header responses (defaults to self.processResponseHeaders). + This function takes as argument the HTTP headers as a dict. + + body_processory: a function to be used for processing the HTTP + response body (defaults to self.processResponseBody). This + function takes the response body as an argument. + + use_tor (bool): specify if the HTTP request should be done over Tor + or not.
"""
@@ -170,7 +248,7 @@ class HTTPTest(NetTestCase): log.debug("Using SOCKS proxy %s for request" % (self.localOptions['socksproxy'])) url = 's'+url
- log.debug("Performing request %s %s %s" % (url, method, headers)) + log.msg("Performing request %s %s %s" % (url, method, headers))
request = {} request['method'] = method @@ -182,8 +260,6 @@ class HTTPTest(NetTestCase): log.debug("Randomizing user agent") self.randomize_useragent(request)
- log.debug("Writing to report the request") - if 'requests' not in self.report: self.report['requests'] = []
@@ -198,51 +274,18 @@ class HTTPTest(NetTestCase):
def errback(failure): failure.trap(ConnectionRefusedError, SOCKSError) - if type(failure.value) is ConnectionRefusedError: + if isinstance(failure.value, ConnectionRefusedError): log.err("Connection refused. The backend may be down") - else: - log.err("Sock error. The SOCK proxy may be down") - self.report["failure"] = str(failure.value) + self.report['failure'] = 'connection_refused_error'
- def finished(data): - return + elif isinstance(failure.value, SOCKSError): + log.err("Sock error. The SOCK proxy may be down") + self.report['failure'] = 'sockserror'
d = agent.request(request['method'], request['url'], headers, body_producer)
- d.addErrback(errback) d.addCallback(self._cbResponse, request, headers_processor, body_processor) - d.addCallback(finished) + d.addErrback(errback) return d
- def _cbResponse(self, response, request, headers_processor, - body_processor): - - if not response: - log.err("Got no response") - return - else: - log.debug("Got response %s" % response) - - if str(response.code).startswith('3'): - self.processRedirect(response.headers.getRawHeaders('Location')[0]) - - # [!] We are passing to the headers_processor the headers dict and - # not the Headers() object - response_headers_dict = list(response.headers.getAllRawHeaders()) - if headers_processor: - headers_processor(response_headers_dict) - else: - self.processResponseHeaders(response_headers_dict) - - finished = defer.Deferred() - response.deliverBody(BodyReceiver(finished)) - finished.addCallback(self._processResponseBody, request, - response, body_processor) - - return finished - - def randomize_useragent(self, request): - user_agent = random.choice(userAgents) - request['headers']['User-Agent'] = [user_agent] -
tor-commits@lists.torproject.org