[tor-commits] [ooni-probe/master] Add proper documentation to HTTP Test template

art at torproject.org art at torproject.org
Sun Nov 25 14:47:21 UTC 2012


commit e496f0d1b3d395fec039574fd212675885b9e017
Author: Arturo Filastò <art at fuffa.org>
Date:   Sun Nov 25 08:20:57 2012 +0100

    Add proper documentation to HTTP Test template
    * Do some minor refactoring
---
 ooni/templates/httpt.py |  177 +++++++++++++++++++++++++++++------------------
 1 files changed, 110 insertions(+), 67 deletions(-)

diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index e166263..e36c049 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -24,6 +24,9 @@ from ooni.utils.net import BodyReceiver, StringProducer, userAgents
 
 from ooni.utils.txagentwithsocks import Agent, SOCKSError, TrueHeaders
 
+class InvalidSocksProxyOption(Exception):
+    pass
+
 class HTTPTest(NetTestCase):
     """
     A utility class for dealing with HTTP based testing. It provides methods to
@@ -51,11 +54,10 @@ class HTTPTest(NetTestCase):
     response = {}
 
     def _setUp(self):
-        log.debug("Setting up HTTPTest")
         try:
             import OpenSSL
         except:
-            log.err("Warning! pyOpenSSL is not installed. https websites will"
+            log.err("Warning! pyOpenSSL is not installed. https websites will "
                      "not work")
 
         self.control_agent = Agent(reactor, sockshost="127.0.0.1",
@@ -63,27 +65,35 @@ class HTTPTest(NetTestCase):
 
         sockshost, socksport = (None, None)
         if self.localOptions['socksproxy']:
-            self.report['socksproxy'] = self.localOptions['socksproxy']
-            sockshost, socksport = self.localOptions['socksproxy'].split(':')
+            try:
+                sockshost, socksport = self.localOptions['socksproxy'].split(':')
+                self.report['socksproxy'] = self.localOptions['socksproxy']
+            except ValueError:
+                raise InvalidSocksProxyOption
             socksport = int(socksport)
 
-        self.agent = Agent(reactor, sockshost=sockshost,
-                socksport=socksport)
+        self.agent = Agent(reactor, sockshost=sockshost, socksport=socksport)
+
+        self.report['agent'] = 'agent'
 
         if self.followRedirects:
             try:
                 from twisted.web.client import RedirectAgent
                 self.control_agent = RedirectAgent(self.control_agent)
                 self.agent = RedirectAgent(self.agent)
+                self.report['agent'] = 'redirect'
             except:
                 log.err("Warning! You are running an old version of twisted"\
                         "(<= 10.1). I will not be able to follow redirects."\
                         "This may make the testing less precise.")
-                self.report['errors'].append("Could not import RedirectAgent")
 
         self.processInputs()
         log.debug("Finished test setup")
 
+    def randomize_useragent(self, request):
+        user_agent = random.choice(userAgents)
+        request['headers']['User-Agent'] = [user_agent]
+
     def processInputs(self):
         pass
 
@@ -107,12 +117,14 @@ class HTTPTest(NetTestCase):
         else:
             self.processResponseBody(response_body)
 
-    def processResponseBody(self, data):
+    def processResponseBody(self, body):
         """
-        This should handle all the response body smushing for getting it ready
-        to be passed onto the control.
+        Overwrite this method if you wish to interact with the response body of
+        every request that is made.
+
+        Args:
 
-        @param data: The content of the body returned.
+            body (str): The body of the HTTP response
         """
         pass
 
@@ -120,7 +132,9 @@ class HTTPTest(NetTestCase):
         """
         This should take care of dealing with the returned HTTP headers.
 
-        @param headers: The content of the returned headers.
+        Args:
+
+            headers (dict): The returned header fields.
         """
         pass
 
@@ -128,32 +142,96 @@ class HTTPTest(NetTestCase):
         """
         Handle a redirection via a 3XX HTTP status code.
 
-        @param location: the url that is being redirected to.
+        Here you may place logic that evaluates the destination that you are
+        being redirected to. Matches against known censor redirects, etc.
+
+        Note: if self.followRedirects is set to True, then this method will
+            never be called.
+            XXX perhaps we may want to hook _handleResponse in RedirectAgent to
+                call processRedirect every time we get redirected.
+
+        Args:
+
+            location (str): the url that we are being redirected to.
         """
         pass
 
+    def _cbResponse(self, response, request,
+            headers_processor, body_processor):
+        """
+        This callback is fired once we have gotten a response for our request.
+        If we are using a RedirectAgent then this will fire once we have
+        reached the end of the redirect chain.
+
+        Args:
+
+            response (:twisted.web.iweb.IResponse:): a provider for getting our response
+
+            request (dict): the dict containing our response (XXX this should be dropped)
+
+            header_processor (func): a function to be called with argument a
+                dict containing the response headers. This will lead
+                self.headerProcessor to not be called.
+
+
+            body_processor (func): a function to be called with as argument the
+                body of the response. This will lead self.bodyProcessor to not
+                be called.
+
+        """
+        if not response:
+            log.err("Got no response for request %s" % request)
+            return
+        else:
+            log.debug("Got response %s" % response)
+
+        if str(response.code).startswith('3'):
+            self.processRedirect(response.headers.getRawHeaders('Location')[0])
+
+        # [!] We are passing to the headers_processor the headers dict and
+        # not the Headers() object
+        response_headers_dict = list(response.headers.getAllRawHeaders())
+        if headers_processor:
+            headers_processor(response_headers_dict)
+        else:
+            self.processResponseHeaders(response_headers_dict)
+
+        finished = defer.Deferred()
+        response.deliverBody(BodyReceiver(finished))
+        finished.addCallback(self._processResponseBody, request,
+                response, body_processor)
+
+        return finished
+
     def doRequest(self, url, method="GET",
                   headers={}, body=None, headers_processor=None,
                   body_processor=None, use_tor=False):
         """
-        Perform an HTTP request with the specified method.
+        Perform an HTTP request with the specified method and headers.
+
+        Args:
 
-        url: the full url path of the request
+            url (str): the full URL of the request. The scheme may be either
+                http, https, or httpo for http over Tor Hidden Service.
 
-        method: the HTTP Method to be used
+        Kwargs:
 
-        headers: the request headers to be sent as a dict
+            method (str): the HTTP method name to use for the request
 
-        body: the request body
+            headers (dict): the request headers to send
 
-        headers_processor: a function to be used for processing the HTTP header
-                          responses (defaults to self.processResponseHeaders).
-                          This function takes as argument the HTTP headers as a
-                          dict.
+            body (str): the request body
 
-        body_processory: a function to be used for processing the HTTP response
-                         body (defaults to self.processResponseBody).
-                         This function takes the response body as an argument.
+            headers_processor : a function to be used for processing the HTTP
+                header responses (defaults to self.processResponseHeaders).
+                This function takes as argument the HTTP headers as a dict.
+
+            body_processory: a function to be used for processing the HTTP
+                response body (defaults to self.processResponseBody). This
+                function takes the response body as an argument.
+
+            use_tor (bool): specify if the HTTP request should be done over Tor
+                or not.
 
         """
 
@@ -170,7 +248,7 @@ class HTTPTest(NetTestCase):
             log.debug("Using SOCKS proxy %s for request" % (self.localOptions['socksproxy']))
             url = 's'+url
 
-        log.debug("Performing request %s %s %s" % (url, method, headers))
+        log.msg("Performing request %s %s %s" % (url, method, headers))
 
         request = {}
         request['method'] = method
@@ -182,8 +260,6 @@ class HTTPTest(NetTestCase):
             log.debug("Randomizing user agent")
             self.randomize_useragent(request)
 
-        log.debug("Writing to report the request")
-
         if 'requests' not in self.report:
             self.report['requests'] = []
 
@@ -198,51 +274,18 @@ class HTTPTest(NetTestCase):
 
         def errback(failure):
             failure.trap(ConnectionRefusedError, SOCKSError)
-            if type(failure.value) is ConnectionRefusedError:
+            if isinstance(failure.value, ConnectionRefusedError):
                 log.err("Connection refused. The backend may be down")
-            else:
-                 log.err("Sock error. The SOCK proxy may be down")
-            self.report["failure"] = str(failure.value)
+                self.report['failure'] = 'connection_refused_error'
 
-        def finished(data):
-            return
+            elif isinstance(failure.value, SOCKSError):
+                log.err("Sock error. The SOCK proxy may be down")
+                self.report['failure'] = 'sockserror'
 
         d = agent.request(request['method'], request['url'], headers,
                 body_producer)
 
-        d.addErrback(errback)
         d.addCallback(self._cbResponse, request, headers_processor, body_processor)
-        d.addCallback(finished)
+        d.addErrback(errback)
         return d
 
-    def _cbResponse(self, response, request, headers_processor,
-            body_processor):
-
-        if not response:
-            log.err("Got no response")
-            return
-        else:
-            log.debug("Got response %s" % response)
-
-        if str(response.code).startswith('3'):
-            self.processRedirect(response.headers.getRawHeaders('Location')[0])
-
-        # [!] We are passing to the headers_processor the headers dict and
-        # not the Headers() object
-        response_headers_dict = list(response.headers.getAllRawHeaders())
-        if headers_processor:
-            headers_processor(response_headers_dict)
-        else:
-            self.processResponseHeaders(response_headers_dict)
-
-        finished = defer.Deferred()
-        response.deliverBody(BodyReceiver(finished))
-        finished.addCallback(self._processResponseBody, request,
-                response, body_processor)
-
-        return finished
-
-    def randomize_useragent(self, request):
-        user_agent = random.choice(userAgents)
-        request['headers']['User-Agent'] = [user_agent]
-





More information about the tor-commits mailing list