[tor-commits] [ooni-probe/master] Test and Implement HTTP Header Field Manipulation Test

art at torproject.org art at torproject.org
Sun Nov 25 05:57:54 UTC 2012


commit fb5ab197c4efabd005286c32335010f24a102267
Author: Arturo Filastò <art at fuffa.org>
Date:   Sun Nov 25 06:50:37 2012 +0100

    Test and Implement HTTP Header Field Manipulation Test
    (rename it to what we had originally called it since it made most sense)
    * Extend TrueHeaders to support calculation of difference between two HTTP
      headers respectful of capitalization
    * Write unittests for TrueHeaders functions with decent code path coverage
    * Add commented out testdeck for running HTTP Header Field manipulation test
    * Fix bug in calculation of runtime of test
---
 before_i_commit.testdeck                        |    9 +
 nettests/core/http_header_field_manipulation.py |  181 ++++++++++++++++++++++
 nettests/core/http_requests.py                  |  187 -----------------------
 ooni/reporter.py                                |    2 +-
 ooni/utils/txagentwithsocks.py                  |   36 +++++
 oonib/testhelpers/http_helpers.py               |    5 +-
 tests/test_trueheaders.py                       |   41 +++++
 7 files changed, 272 insertions(+), 189 deletions(-)

diff --git a/before_i_commit.testdeck b/before_i_commit.testdeck
index e0b30ea..7fb33f9 100644
--- a/before_i_commit.testdeck
+++ b/before_i_commit.testdeck
@@ -38,3 +38,12 @@
     reportfile: http_url_lists.yamloo
     subargs: [-f, test_inputs/url_lists_file.txt]
     test: nettests/core/http_url_list.py
+# XXX this is disabled because it requires oonib to be running
+#- options:
+#    collector: null
+#    help: 0
+#    logfile: null
+#    pcapfile: null
+#    reportfile: null
+#    subargs: [-h, test_inputs/test_header_field_manipulation.txt]
+#    test: nettests/core/http_header_field_manipulation.py
diff --git a/nettests/core/http_header_field_manipulation.py b/nettests/core/http_header_field_manipulation.py
new file mode 100644
index 0000000..08ee8c7
--- /dev/null
+++ b/nettests/core/http_header_field_manipulation.py
@@ -0,0 +1,181 @@
+# -*- encoding: utf-8 -*-
+#
+# :authors: Arturo Filastò
+# :licence: see LICENSE
+
+import random
+import json
+import yaml
+
+from twisted.python import usage
+
+from ooni.utils import log, net, randomStr
+from ooni.templates import httpt
+from ooni.utils.txagentwithsocks import TrueHeaders
+
+def random_capitalization(string):
+    output = ""
+    original_string = string
+    string = string.swapcase()
+    for i in range(len(string)):
+        if random.randint(0, 1):
+            output += string[i].swapcase()
+        else:
+            output += string[i]
+    if original_string == output:
+        return random_capitalization(output)
+    else:
+        return output
+
+class UsageOptions(usage.Options):
+    optParameters = [
+            ['backend', 'b', 'http://127.0.0.1:57001', 
+                'URL of the backend to use for sending the requests'],
+            ['headers', 'h', None,
+                'Specify a yaml formatted file from which to read the request headers to send']
+            ]
+
+class HTTPHeaderFieldManipulation(httpt.HTTPTest):
+    """
+    It performes HTTP requests with request headers that vary capitalization
+    towards a backend. If we detect that the headers the backend received
+    matches the ones we have sent then we have detected tampering.
+    """
+    name = "HTTP Header Field Manipulation"
+    author = "Arturo Filastò"
+    version = "0.1.3"
+
+    randomizeUA = False
+    usageOptions = UsageOptions
+
+    requiredOptions = ['backend']
+
+    def get_headers(self):
+        headers = {}
+        if self.localOptions['headers']:
+            try:
+                f = open(self.localOptions['headers'])
+            except IOError:
+                raise Exception("Specified input file does not exist")
+            content = ''.join(f.readlines())
+            f.close()
+            headers = yaml.safe_load(content)
+            return headers
+        else:
+            # XXX generate these from a random choice taken from whatheaders.com
+            # http://s3.amazonaws.com/data.whatheaders.com/whatheaders-latest.xml.zip
+            headers = {"User-Agent": [random.choice(net.userAgents)[0]],
+                "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
+                "Accept-Encoding": ["gzip,deflate,sdch"],
+                "Accept-Language": ["en-US,en;q=0.8"],
+                "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"],
+                "Host": [randomStr(15)+'.com']
+            }
+            return headers
+
+    def get_random_caps_headers(self):
+        headers = {}
+        normal_headers = self.get_headers()
+        for k, v in normal_headers.items():
+            new_key = random_capitalization(k)
+            headers[new_key] = v
+        return headers
+
+    def processInputs(self):
+        if self.localOptions['backend']:
+            self.url = self.localOptions['backend']
+        else:
+            raise Exception("No backend specified")
+
+    def processResponseBody(self, data):
+        self.check_for_tampering(data)
+
+    def check_for_tampering(self, data):
+        """
+        Here we do checks to verify if the request we made has been tampered
+        with. We have 3 categories of tampering:
+
+        *  **total** when the response is not a json object and therefore we were not
+        able to reach the ooniprobe test backend
+
+        *  **request_line_capitalization** when the HTTP Request line (e.x. GET /
+        HTTP/1.1) does not match the capitalization we set.
+
+        *  **header_field_number** when the number of headers we sent does not match
+        with the ones the backend received
+
+        *  **header_name_capitalization** when the header field names do not match
+        those that we sent.
+
+        *  **header_field_value** when the header field value does not match with the
+        one we transmitted.
+        """
+        self.report['tampering'] = {
+            'total': False,
+            'request_line_capitalization': False,
+            'header_name_capitalization': False,
+            'header_field_value': False,
+            'header_field_number': False
+        }
+        try:
+            response = json.loads(data)
+        except ValueError:
+            self.report['tampering']['total'] = True
+            return
+
+        request_request_line = "%s / HTTP/1.1" % self.request_method
+
+        try:
+            response_request_line = response['request_line']
+            response_headers_dict = response['headers_dict']
+        except KeyError:
+            self.report['tampering']['total'] = True
+            return
+
+        if request_request_line != response_request_line:
+            self.report['tampering']['request_line_capitalization'] = True
+
+        request_headers = TrueHeaders(self.request_headers)
+        diff = request_headers.getDiff(response_headers_dict, ignore=['Connection'])
+        if diff:
+            self.report['tampering']['header_field_name'] = True
+        else:
+            self.report['tampering']['header_field_name'] = False
+        self.report['tampering']['header_name_diff'] = list(diff)
+
+    def test_get(self):
+        self.request_method = "GET"
+        self.request_headers = self.get_random_caps_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
+    def test_get_random_capitalization(self):
+        self.request_method = random_capitalization("GET")
+        self.request_headers = self.get_random_caps_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
+    def test_post(self):
+        self.request_method = "POST"
+        self.request_headers = self.get_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
+    def test_post_random_capitalization(self):
+        self.request_method = random_capitalization("POST")
+        self.request_headers = self.get_random_caps_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
+    def test_put(self):
+        self.request_method = "PUT"
+        self.request_headers = self.get_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
+    def test_put_random_capitalization(self):
+        self.request_method = random_capitalization("PUT")
+        self.request_headers = self.get_random_caps_headers()
+        return self.doRequest(self.url, self.request_method,
+                headers=self.request_headers)
+
diff --git a/nettests/core/http_requests.py b/nettests/core/http_requests.py
deleted file mode 100644
index 5d67070..0000000
--- a/nettests/core/http_requests.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# -*- encoding: utf-8 -*-
-#
-# :authors: Arturo Filastò
-# :licence: see LICENSE
-
-import random
-import json
-
-from twisted.python import usage
-
-from ooni.utils import log, net, randomStr
-from ooni.templates import httpt
-
-def random_capitalization(string):
-    output = ""
-    original_string = string
-    string = string.swapcase()
-    for i in range(len(string)):
-        if random.randint(0, 1):
-            output += string[i].swapcase()
-        else:
-            output += string[i]
-    if original_string == output:
-        return random_capitalization(output)
-    else:
-        return output
-
-class UsageOptions(usage.Options):
-    optParameters = [
-            ['backend', 'b', 'http://127.0.0.1:57001', 
-                'URL of the backend to use for sending the requests'],
-            ['headers', 'h', None,
-                'Specify a yaml formatted file from which to read the request headers to send']
-            ]
-
-class HTTPRequests(httpt.HTTPTest):
-    """
-    This test is also known as Header Field manipulation. It performes HTTP
-    requests with variations in capitalization towards the backend.
-    """
-    name = "HTTP Requests"
-    author = "Arturo Filastò"
-    version = "0.1.1"
-
-    randomizeUA = False
-    usageOptions = UsageOptions
-
-    requiredOptions = ['backend']
-
-    def processInputs(self):
-        if self.localOptions['backend']:
-            self.url = self.localOptions['backend']
-        else:
-            raise Exception("No backend specified")
-
-    def processResponseBody(self, data):
-        self.check_for_tampering(data)
-
-    def check_for_tampering(self, data):
-        """
-        Here we do checks to verify if the request we made has been tampered
-        with. We have 3 categories of tampering:
-
-        *  **total** when the response is not a json object and therefore we were not
-        able to reach the ooniprobe test backend
-
-        *  **request_line_capitalization** when the HTTP Request line (e.x. GET /
-        HTTP/1.1) does not match the capitalization we set.
-
-        *  **header_field_number** when the number of headers we sent does not match
-        with the ones the backend received
-
-        *  **header_name_capitalization** when the header field names do not match
-        those that we sent.
-
-        *  **header_field_value** when the header field value does not match with the
-        one we transmitted.
-        """
-        self.report['tampering'] = {'total': False,
-                'request_line_capitalization': False, 
-                'header_name_capitalization': False,
-                'header_field_value': False,
-                'header_field_number': False
-        }
-
-        try:
-            response = json.loads(data)
-        except ValueError:
-            self.report['tampering']['total'] = True
-            return
-
-        requestLine = "%s / HTTP/1.1" % self.request_method
-        if response['request_line'] != requestLine:
-            self.report['tampering']['request_line_capitalization'] = True
-
-        # We compare against length -1 because the response will also contain
-        # the Connection: close header since we do not do persistent
-        # connections
-        if len(self.request_headers) != (len(response['headers_dict']) - 1):
-            self.report['tampering']['header_field_number'] = True
-
-        for header, value in self.request_headers.items():
-            # XXX this still needs some work
-            # in particular if the response headers are of different length or
-            # some extra headers get added in the response (so the lengths
-            # match), we will get header_name_capitalization set to true, while
-            # the actual tampering is the addition of an extraneous header
-            # field.
-            if header == "Connection":
-                # Ignore Connection header
-                continue
-            try:
-                response_value = response['headers_dict'][header]
-                if response_value != value[0]:
-                    log.msg("Tampering detected because %s != %s" % (response_value, value[0]))
-                    self.report['tampering']['header_field_value'] = True
-            except KeyError:
-                log.msg("Tampering detected because %s not in %s" % (header, response['headers_dict']))
-                self.report['tampering']['header_name_capitalization'] = True
-
-    def get_headers(self):
-        headers = {}
-        if self.localOptions['headers']:
-            # XXX test this code
-            try:
-                f = open(self.localOptions['headers'])
-            except IOError:
-                raise Exception("Specified input file does not exist")
-            content = ''.join(f.readlines())
-            f.close()
-            headers = yaml.load(content)
-            return headers
-        else:
-            headers = {"User-Agent": [random.choice(net.userAgents)[0]],
-                "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
-                "Accept-Encoding": ["gzip,deflate,sdch"],
-                "Accept-Language": ["en-US,en;q=0.8"],
-                "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"],
-                "Host": [randomStr(15)+'.com']
-            }
-            return headers
-
-    def get_random_caps_headers(self):
-        headers = {}
-        normal_headers = self.get_headers()
-        for k, v in normal_headers.items():
-            new_key = random_capitalization(k)
-            headers[new_key] = v
-        return headers
-
-    def test_get(self):
-        self.request_method = "GET"
-        self.request_headers = self.get_random_caps_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-    def test_get_random_capitalization(self):
-        self.request_method = random_capitalization("GET")
-        self.request_headers = self.get_random_caps_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-    def test_post(self):
-        self.request_method = "POST"
-        self.request_headers = self.get_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-    def test_post_random_capitalization(self):
-        self.request_method = random_capitalization("POST")
-        self.request_headers = self.get_random_caps_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-    def test_put(self):
-        self.request_method = "PUT"
-        self.request_headers = self.get_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-    def test_put_random_capitalization(self):
-        self.request_method = random_capitalization("PUT")
-        self.request_headers = self.get_random_caps_headers()
-        return self.doRequest(self.url, self.request_method,
-                headers=self.request_headers)
-
-
diff --git a/ooni/reporter.py b/ooni/reporter.py
index b79f27f..63f501e 100644
--- a/ooni/reporter.py
+++ b/ooni/reporter.py
@@ -184,7 +184,7 @@ class OReporter(object):
             test_input = test.input
 
         test_started = test._start_time
-        test_runtime = test_started - time.time()
+        test_runtime = time.time() - test_started
 
         report = {'input': test_input,
                 'test_name': test_name,
diff --git a/ooni/utils/txagentwithsocks.py b/ooni/utils/txagentwithsocks.py
index 57c27e4..7b000fc 100644
--- a/ooni/utils/txagentwithsocks.py
+++ b/ooni/utils/txagentwithsocks.py
@@ -4,6 +4,8 @@
 # :licence: see LICENSE
 
 import struct
+import itertools
+from copy import copy
 
 from zope.interface import implements
 from twisted.web import client, _newclient, http_headers
@@ -137,6 +139,40 @@ class TrueHeaders(http_headers.Headers):
         self._rawHeaders[name.lower()]['name'] = name
         self._rawHeaders[name.lower()]['values'] = values
 
+    def getDiff(self, header_dict, ignore=[]):
+        """
+        ignore: specify a list of header fields to ignore
+
+        Returns a set containing the header names that are not present in
+        header_dict or not present in self.
+        """
+        diff = set()
+        field_names = []
+
+        headers_a = copy(self)
+        headers_b = TrueHeaders(header_dict)
+        for name in ignore:
+            try:
+                del headers_a._rawHeaders[name.lower()]
+            except KeyError:
+                pass
+            try:
+                del headers_b._rawHeaders[name.lower()]
+            except KeyError:
+                pass
+
+        for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
+                headers_b.getAllRawHeaders()):
+            field_names.append(k)
+
+        for name in field_names:
+            if self.getRawHeaders(name) and \
+                name in header_dict:
+                pass
+            else:
+                diff.add(name)
+        return diff
+
     def getAllRawHeaders(self):
         for k, v in self._rawHeaders.iteritems():
             yield v['name'], v['values']
diff --git a/oonib/testhelpers/http_helpers.py b/oonib/testhelpers/http_helpers.py
index b384216..1fa0ccb 100644
--- a/oonib/testhelpers/http_helpers.py
+++ b/oonib/testhelpers/http_helpers.py
@@ -77,7 +77,10 @@ class SimpleHTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
     def allHeadersReceived(self):
         headers_dict = {}
         for k, v in self.headers:
-            headers_dict[k] = v
+            if k not in headers_dict:
+                headers_dict[k] = []
+            headers_dict[k].append(v)
+
         response = {'request_headers': self.headers,
             'request_line': self.requestLine,
             'headers_dict': headers_dict
diff --git a/tests/test_trueheaders.py b/tests/test_trueheaders.py
new file mode 100644
index 0000000..33521b8
--- /dev/null
+++ b/tests/test_trueheaders.py
@@ -0,0 +1,41 @@
+from twisted.trial import unittest
+
+from ooni.utils.txagentwithsocks import TrueHeaders
+
+dummy_headers_dict = {
+        'Header1': ['Value1', 'Value2'],
+        'Header2': ['ValueA', 'ValueB']
+}
+
+dummy_headers_dict2 = {
+        'Header1': ['Value1', 'Value2'],
+        'Header2': ['ValueA', 'ValueB'],
+        'Header3': ['ValueA', 'ValueB'],
+}
+
+dummy_headers_dict3 = {
+        'Header1': ['Value1', 'Value2'],
+        'Header2': ['ValueA', 'ValueB'],
+        'Header4': ['ValueA', 'ValueB'],
+}
+
+
+class TestTrueHeaders(unittest.TestCase):
+    def test_names_match(self):
+        th = TrueHeaders(dummy_headers_dict)
+        self.assertEqual(th.getDiff(dummy_headers_dict), set())
+
+    def test_names_not_match(self):
+        th = TrueHeaders(dummy_headers_dict)
+        self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3']))
+
+        th = TrueHeaders(dummy_headers_dict3)
+        self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3', 'Header4']))
+
+    def test_names_match_expect_ignore(self):
+        th = TrueHeaders(dummy_headers_dict)
+        self.assertEqual(th.getDiff(dummy_headers_dict2, ignore=['Header3']), set())
+
+
+
+



More information about the tor-commits mailing list