[tor-commits] [ooni-probe/master] Improve HTTP Host test to work with the HTTP Requests test backend

art at torproject.org art at torproject.org
Sat Nov 10 20:58:49 UTC 2012


commit b142d1fa8aeda530e0b155e5fc5aae377f523f76
Author: Arturo Filastò <art at fuffa.org>
Date:   Sat Nov 10 21:11:54 2012 +0100

    Improve HTTP Host test to work with the HTTP Requests test backend
---
 nettests/core/http_host.py |   63 +++++++++++++++++++++++++++++++++++++++-----
 ooni/templates/httpt.py    |   15 +++++++---
 2 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/nettests/core/http_host.py b/nettests/core/http_host.py
index b87594d..0e73f82 100644
--- a/nettests/core/http_host.py
+++ b/nettests/core/http_host.py
@@ -9,14 +9,17 @@
 # :authors: Arturo Filastò
 # :licence: see LICENSE
 
+import json
 from twisted.python import usage
+
+from ooni.utils import log
 from ooni.templates import httpt
 
 class UsageOptions(usage.Options):
-    optParameters = [
-                     ['backend', 'b', 'http://ooni.nu/test/', 'Test backend to use']
-                    ]
-
+    optParameters = [['backend', 'b', 'http://127.0.0.1:1234', 
+                        'URL of the test backend to use'],
+                     ['content', 'c', None, 
+                        'The file to read from containing the content of a block page']]
 
 class HTTPHost(httpt.HTTPTest):
     """
@@ -25,11 +28,13 @@ class HTTPHost(httpt.HTTPTest):
     """
     name = "HTTP Host"
     author = "Arturo Filastò"
-    version = 0.1
+    version = "0.2"
 
     usageOptions = UsageOptions
 
-    inputFile = ['urls', 'f', None, 'Urls file']
+    inputFile = ['file', 'f', None, 'List of hostnames to test for censorship']
+
+    requiredOptions = ['backend']
 
     def test_send_host_header(self):
         """
@@ -42,14 +47,58 @@ class HTTPHost(httpt.HTTPTest):
         headers["Host"] = [self.input]
         return self.doRequest(self.localOptions['backend'], headers=headers)
 
+    def check_for_censorship(self, body):
+        """
+        If we have specified what a censorship page looks like here we will
+        check if the page we are looking at matches it.
+
+        XXX this is not tested, though it is basically what was used to detect
+        censorship in the palestine case.
+        """
+        if self.localOptions['content']:
+            self.report['censored'] = True
+
+            censorship_page = open(self.localOptions['content'])
+            response_page = iter(body.split("\n"))
+
+            for censorship_line in censorship_page.xreadlines():
+                response_line = response_page.next()
+                if response_line != censorship_line:
+                    self.report['censored'] = False
+                    break
+
+            censorship_page.close()
+
     def processResponseBody(self, body):
         """
         XXX this is to be filled in with either a domclass based classified or
         with a rule that will allow to detect that the body of the result is
         that of a censored site.
         """
-        if 'not censored' in body:
+        # If we don't see a json array we know that something is wrong for
+        # sure
+        if not body.startswith("{"):
+            self.report['transparent_http_proxy'] = True
+            self.check_for_censorship(body)
+            return
+        try:
+            content = json.loads(body)
+        except:
+            log.debug("The json does not parse, this is not what we expected")
+            self.report['trans_http_proxy'] = True
+            self.check_for_censorship(body)
+            return
+
+        # We base the determination of the presence of a transparent HTTP
+        # proxy on the basis of the response containing the json that is to be
+        # returned by a HTTP Request Test Helper
+        if 'request_method' in content and \
+                'request_uri' in content and \
+                'request_headers' in content:
+            log.debug("Found the keys I expected in %s" % content)
             self.report['trans_http_proxy'] = False
         else:
+            log.debug("Did not find the keys I expected in %s" % content)
             self.report['trans_http_proxy'] = True
 
+        self.check_for_censorship(body)
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index 96b7cc8..4f3be3d 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -14,6 +14,7 @@ from twisted.internet.ssl import ClientContextFactory
 
 from twisted.web.client import Agent
 from twisted.internet import reactor
+from twisted.internet.error import ConnectionRefusedError
 
 from twisted.web._newclient import Request
 from twisted.web.http_headers import Headers
@@ -29,9 +30,11 @@ class HTTPTest(NetTestCase):
     The main functions to look at are processResponseBody and
     processResponseHeader that are invoked once the headers have been received
     and once the request body has been received.
+
+    XXX all of this requires some refactoring.
     """
     name = "HTTP Test"
-    version = 0.1
+    version = "0.1.1"
 
     randomizeUA = True
     followRedirects = False
@@ -127,9 +130,10 @@ class HTTPTest(NetTestCase):
 
         d = self.build_request(url, method, headers, body)
 
-        def errback(data):
-            log.err("Error in test %s" % data)
-            self.report["error"] = data
+        def errback(failure):
+            failure.trap(ConnectionRefusedError)
+            log.err("Connection refused. The backend may be down")
+            self.report["failure"] = str(failure.value)
 
         def finished(data):
             return
@@ -186,7 +190,8 @@ class HTTPTest(NetTestCase):
 
         finished = defer.Deferred()
         response.deliverBody(BodyReceiver(finished))
-        finished.addCallback(self._processResponseBody, body_processor)
+        finished.addCallback(self._processResponseBody, 
+                body_processor)
 
         return finished
 





More information about the tor-commits mailing list