[tor-commits] [ooni-probe/master] Test and document HTTP Requests test

art at torproject.org art at torproject.org
Thu Nov 29 14:42:49 UTC 2012


commit 4922b1ce9795b19a8e726217f45fc65ad17ea162
Author: Arturo Filastò <art at fuffa.org>
Date:   Thu Nov 29 13:35:27 2012 +0100

    Test and document HTTP Requests test
    * Add support reporting the modification of response headers
    Note: This test used to be called HTTP Body Length, but got extended to support
    response headers.
---
 docs/source/tests/http_requests.rst   |  238 +++++++++++++++++++++++++++++++++
 nettests/blocking/http_body_length.py |   90 -------------
 nettests/blocking/http_requests.py    |  120 +++++++++++++++++
 3 files changed, 358 insertions(+), 90 deletions(-)

diff --git a/docs/source/tests/http_requests.rst b/docs/source/tests/http_requests.rst
new file mode 100644
index 0000000..cc98882
--- /dev/null
+++ b/docs/source/tests/http_requests.rst
@@ -0,0 +1,238 @@
+Details
+=======
+
+*Test Name*: HTTP Requests
+
+*Current version*: 0.1
+
+*NetTest*: HTTP Requests (https://gitweb.torproject.org/ooni-probe.git/blob/HEAD:/nettests/blocking/http_requests.py)
+
+*Test Helper*: None
+
+*Test Type*: Content Blocking
+
+*Requires Root*: No
+
+Description
+===========
+
+This test perform a HTTP GET request for the / resource over the test network
+and over Tor. It then compares the two responses to see if the response bodies of the two requests match and if the 
+proportion between the expected body length (the one over Tor) and the one over
+the control network match.
+
+If the proportion between the two body lengths is <= a certain tollerance
+factor (by default set to 0.8), then we say that they do not match.
+
+The reason for doing so is that a lot of sites serve geolocalized content based
+on the location from which the request originated from.
+
+How to run the test
+===================
+
+To test a single site run:
+
+`./bin/ooniprobe nettests/blocking/http_requests.py -u http://<test_site>/`
+
+To test a set of sites from a list containing sites to test run:
+
+`./bin/ooniprobe nettests/blocking/http_requests.py -f <input_file>`
+
+
+Sample report
+=============
+
+`./bin/ooniprobe nettests/blocking/http_requests.py -f example_inputs/url_lists_file.txt`
+
+::
+
+    ###########################################
+    # OONI Probe Report for HTTP Requests Test test
+    # Thu Nov 29 13:20:06 2012
+    ###########################################
+    ---
+    options:
+      collector: null
+      help: 0
+      logfile: null
+      pcapfile: null
+      reportfile: null
+      resume: 0
+      subargs: [-f, example_inputs/url_lists_file.txt]
+      test: nettests/blocking/http_requests.py
+    probe_asn: null
+    probe_cc: null
+    probe_ip: 127.0.0.1
+    software_name: ooniprobe
+    software_version: 0.0.7.1-alpha
+    start_time: 1354188006.0
+    test_name: HTTP Requests Test
+    test_version: '0.1'
+    ...
+    ---
+    input: http://ooni.nu/test
+    report:
+      agent: agent
+      body_length_match: false
+      body_proportion: 0.9732142857142857
+      factor: 0.8
+      requests:
+      - request:
+          body: null
+          headers:
+          - - User-Agent
+            - - &id001 [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1), 'Internet
+                  Explorer 7, Windows Vista']
+          method: GET
+          url: http://ooni.nu/test
+        response:
+          body: "\n<html>\n    <head>\n        <meta http-equiv=\"refresh\" content=\"\
+            0;URL=http://ooni.nu/test/\">\n    </head>\n    <body bgcolor=\"#FFFFFF\"\
+            \ text=\"#000000\">\n    <a href=\"http://ooni.nu/test/\">click here</a>\n\
+            \    </body>\n</html>\n"
+          code: 302
+          headers:
+          - - Content-Length
+            - ['218']
+          - - Server
+            - [TwistedWeb/10.1.0]
+          - - Connection
+            - [close]
+          - - Location
+            - ['http://ooni.nu/test/']
+          - - Date
+            - ['Thu, 29 Nov 2012 12:20:25 GMT']
+          - - Content-Type
+            - [text/html]
+      - request:
+          body: null
+          headers:
+          - - User-Agent
+            - - *id001
+          method: GET
+          url: shttp://ooni.nu/test
+        response:
+          body: "\n<html>\n    <head>\n        <meta http-equiv=\"refresh\" content=\"\
+            0;URL=http://ooni.nu:80/test/\">\n    </head>\n    <body bgcolor=\"#FFFFFF\"\
+            \ text=\"#000000\">\n    <a href=\"http://ooni.nu:80/test/\">click here</a>\n\
+            \    </body>\n</html>\n"
+          code: 302
+          headers:
+          - - Content-Length
+            - ['224']
+          - - Server
+            - [TwistedWeb/10.1.0]
+          - - Connection
+            - [close]
+          - - Location
+            - ['http://ooni.nu:80/test/']
+          - - Date
+            - ['Thu, 29 Nov 2012 12:20:33 GMT']
+          - - Content-Type
+            - [text/html]
+      socksproxy: null
+    test_name: test_get
+    test_runtime: 9.357746124267578
+    test_started: 1354191606.333243
+    ...
+    ---
+    input: http://torproject.org/
+    report:
+      agent: agent
+      body_length_match: false
+      body_proportion: 1.0
+      factor: 0.8
+      requests:
+      - request:
+          body: null
+          headers:
+          - - User-Agent
+            - - &id001 [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1), 'Internet
+                  Explorer 7, Windows Vista']
+          method: GET
+          url: http://torproject.org/
+        response:
+          body: '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+
+            <html><head>
+
+            <title>302 Found</title>
+
+            </head><body>
+
+            <h1>Found</h1>
+
+            <p>The document has moved <a href="https://www.torproject.org/">here</a>.</p>
+
+            <hr>
+
+            <address>Apache Server at torproject.org Port 80</address>
+
+            </body></html>
+
+            '
+          code: 302
+          headers:
+          - - Content-Length
+            - ['275']
+          - - Vary
+            - [Accept-Encoding]
+          - - Server
+            - [Apache]
+          - - Connection
+            - [close]
+          - - Location
+            - ['https://www.torproject.org/']
+          - - Date
+            - ['Thu, 29 Nov 2012 12:20:08 GMT']
+          - - Content-Type
+            - [text/html; charset=iso-8859-1]
+      - request:
+          body: null
+          headers:
+          - - User-Agent
+            - - *id001
+          method: GET
+          url: shttp://torproject.org/
+        response:
+          body: '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+
+            <html><head>
+
+            <title>302 Found</title>
+
+            </head><body>
+
+            <h1>Found</h1>
+
+            <p>The document has moved <a href="https://www.torproject.org/">here</a>.</p>
+
+            <hr>
+
+            <address>Apache Server at torproject.org Port 80</address>
+
+            </body></html>
+
+            '
+          code: 302
+          headers:
+          - - Content-Length
+            - ['275']
+          - - Vary
+            - [Accept-Encoding]
+          - - Server
+            - [Apache]
+          - - Connection
+            - [close]
+          - - Location
+            - ['https://www.torproject.org/']
+          - - Date
+            - ['Thu, 29 Nov 2012 12:20:16 GMT']
+          - - Content-Type
+            - [text/html; charset=iso-8859-1]
+      socksproxy: null
+    test_name: test_get
+    test_runtime: 8.688138008117676
+    test_started: 1354191607.287672
+    ...
+
diff --git a/nettests/blocking/http_body_length.py b/nettests/blocking/http_body_length.py
deleted file mode 100644
index 7c5b0d2..0000000
--- a/nettests/blocking/http_body_length.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# -*- encoding: utf-8 -*-
-#
-# :authors: Arturo Filastò
-# :licence: see LICENSE
-
-from twisted.internet import defer
-from twisted.python import usage
-from ooni.templates import httpt
-
-class UsageOptions(usage.Options):
-    optParameters = [
-                     ['url', 'u', None, 'Specify a single URL to test.'],
-                     ['factor', 'f', 0.8, 'What factor should be used for triggering censorship (0.8 == 80%)']
-                    ]
-
-class HTTPBodyLength(httpt.HTTPTest):
-    """
-    Performs a two GET requests to the set of sites to be tested for
-    censorship, one over a known good control channel (Tor), the other over the
-    test network.
-    We then look at the response body lengths and see if the control response
-    differs from the experiment response by a certain factor.
-    """
-    name = "HTTP Body length test"
-    author = "Arturo Filastò"
-    version = "0.1"
-
-    usageOptions = UsageOptions
-
-    inputFile = ['file', 'f', None, 
-            'List of URLS to perform GET and POST requests to']
-
-    # These values are used for determining censorship based on response body
-    # lengths
-    control_body_length = None
-    experiment_body_length = None
-
-    def setUp(self):
-        """
-        Check for inputs.
-        """
-        if self.input:
-            self.url = self.input
-        elif self.localOptions['url']:
-            self.url = self.localOptions['url']
-        else:
-            raise Exception("No input specified")
-
-        self.factor = self.localOptions['factor']
-
-    def compare_body_lengths(self):
-        body_length_a = self.control_body_length
-        body_length_b = self.experiment_body_length
-
-        rel = float(body_length_a)/float(body_length_b)
-        if rel > 1:
-            rel = 1/rel
-
-        self.report['body_proportion'] = rel
-        self.report['factor'] = self.factor
-        if rel < self.factor:
-            self.report['censorship'] = True
-        else:
-            self.report['censorship'] = False
-
-    def test_get(self):
-        def errback(failure):
-            log.err("There was an error while testing %s" % self.url)
-            log.exception(failure)
-
-        def control_body(result):
-            self.control_body_length = len(result)
-            if self.experiment_body_length:
-                self.compare_body_lengths()
-
-        def experiment_body(result):
-            self.experiment_body_length = len(result)
-            if self.control_body_length:
-                self.compare_body_lengths()
-
-        dl = []
-        experiment_request = self.doRequest(self.url, method="GET",
-                body_processor=experiment_body)
-        control_request = self.doRequest(self.url, method="GET",
-                use_tor=True, body_processor=control_body)
-        dl.append(experiment_request)
-        dl.append(control_request)
-        d = defer.DeferredList(dl)
-        return d
-
diff --git a/nettests/blocking/http_requests.py b/nettests/blocking/http_requests.py
new file mode 100644
index 0000000..bc2803b
--- /dev/null
+++ b/nettests/blocking/http_requests.py
@@ -0,0 +1,120 @@
+# -*- encoding: utf-8 -*-
+#
+# :authors: Arturo Filastò
+# :licence: see LICENSE
+
+from twisted.internet import defer
+from twisted.python import usage
+from ooni.templates import httpt
+
+class UsageOptions(usage.Options):
+    optParameters = [
+                     ['url', 'u', None, 'Specify a single URL to test.'],
+                     ['factor', 'f', 0.8, 'What factor should be used for triggering censorship (0.8 == 80%)']
+                    ]
+
+class HTTPRequestsTest(httpt.HTTPTest):
+    """
+    Performs a two GET requests to the set of sites to be tested for
+    censorship, one over a known good control channel (Tor), the other over the
+    test network.
+
+    We check to see if the response headers match and if the response body
+    lengths match.
+    """
+    name = "HTTP Requests Test"
+    author = "Arturo Filastò"
+    version = "0.1"
+
+    usageOptions = UsageOptions
+
+    inputFile = ['file', 'f', None, 
+            'List of URLS to perform GET and POST requests to']
+
+    # These values are used for determining censorship based on response body
+    # lengths
+    control_body_length = None
+    experiment_body_length = None
+
+    def setUp(self):
+        """
+        Check for inputs.
+        """
+        if self.input:
+            self.url = self.input
+        elif self.localOptions['url']:
+            self.url = self.localOptions['url']
+        else:
+            raise Exception("No input specified")
+
+        self.factor = self.localOptions['factor']
+
+    def compare_body_lengths(self):
+        body_length_a = self.control_body_length
+        body_length_b = self.experiment_body_length
+
+        rel = float(body_length_a)/float(body_length_b)
+        if rel > 1:
+            rel = 1/rel
+
+        self.report['body_proportion'] = rel
+        self.report['factor'] = self.factor
+        if rel < self.factor:
+            self.report['body_length_match'] = True
+        else:
+            self.report['body_length_match'] = False
+
+    def compare_headers(self):
+        diff = TrueHeaders(self.control_headers).getDiff(self.experiment_headers)
+        if diff:
+            self.report['headers_match'] = False
+        else:
+            self.report['headers_match'] = True
+
+    def test_get(self):
+        def errback(failure):
+            log.err("There was an error while testing %s" % self.url)
+            log.exception(failure)
+
+        def control_body(result):
+            """
+            Callback for processing the control HTTP body response.
+            """
+            self.control_body_length = len(result)
+            if self.experiment_body_length:
+                self.compare_body_lengths()
+
+        def experiment_body(result):
+            """
+            Callback for processing the experiment HTTP body response.
+            """
+            self.experiment_body_length = len(result)
+            if self.control_body_length:
+                self.compare_body_lengths()
+
+        def control_headers(headers_dict):
+            """
+            Callback for processing the control HTTP headers response.
+            """
+            self.control_headers = headers_dict
+
+        def experiment_headers(headers_dict):
+            """
+            Callback for processing the experiment HTTP headers response.
+            """
+            self.experiment_headers = headers_dict
+
+        dl = []
+        experiment_request = self.doRequest(self.url, method="GET",
+                body_processor=experiment_body,
+                headers_processor=control_headers)
+
+        control_request = self.doRequest(self.url, method="GET",
+                use_tor=True, body_processor=control_body,
+                headers_processor=control_headers)
+
+        dl.append(experiment_request)
+        dl.append(control_request)
+        d = defer.DeferredList(dl)
+        return d
+





More information about the tor-commits mailing list