commit fb5ab197c4efabd005286c32335010f24a102267 Author: Arturo Filastò art@fuffa.org Date: Sun Nov 25 06:50:37 2012 +0100
Test and Implement HTTP Header Field Manipulation Test (rename it to what we had originally called it since it made most sense) * Extend TrueHeaders to support calculation of difference between two HTTP headers respectful of capitalization * Write unittests for TrueHeaders functions with decent code path coverage * Add commented out testdeck for running HTTP Header Field manipulation test * Fix bug in calculation of runtime of test --- before_i_commit.testdeck | 9 + nettests/core/http_header_field_manipulation.py | 181 ++++++++++++++++++++++ nettests/core/http_requests.py | 187 ----------------------- ooni/reporter.py | 2 +- ooni/utils/txagentwithsocks.py | 36 +++++ oonib/testhelpers/http_helpers.py | 5 +- tests/test_trueheaders.py | 41 +++++ 7 files changed, 272 insertions(+), 189 deletions(-)
diff --git a/before_i_commit.testdeck b/before_i_commit.testdeck index e0b30ea..7fb33f9 100644 --- a/before_i_commit.testdeck +++ b/before_i_commit.testdeck @@ -38,3 +38,12 @@ reportfile: http_url_lists.yamloo subargs: [-f, test_inputs/url_lists_file.txt] test: nettests/core/http_url_list.py +# XXX this is disabled because it requires oonib to be running +#- options: +# collector: null +# help: 0 +# logfile: null +# pcapfile: null +# reportfile: null +# subargs: [-h, test_inputs/test_header_field_manipulation.txt] +# test: nettests/core/http_header_field_manipulation.py diff --git a/nettests/core/http_header_field_manipulation.py b/nettests/core/http_header_field_manipulation.py new file mode 100644 index 0000000..08ee8c7 --- /dev/null +++ b/nettests/core/http_header_field_manipulation.py @@ -0,0 +1,181 @@ +# -*- encoding: utf-8 -*- +# +# :authors: Arturo Filastò +# :licence: see LICENSE + +import random +import json +import yaml + +from twisted.python import usage + +from ooni.utils import log, net, randomStr +from ooni.templates import httpt +from ooni.utils.txagentwithsocks import TrueHeaders + +def random_capitalization(string): + output = "" + original_string = string + string = string.swapcase() + for i in range(len(string)): + if random.randint(0, 1): + output += string[i].swapcase() + else: + output += string[i] + if original_string == output: + return random_capitalization(output) + else: + return output + +class UsageOptions(usage.Options): + optParameters = [ + ['backend', 'b', 'http://127.0.0.1:57001', + 'URL of the backend to use for sending the requests'], + ['headers', 'h', None, + 'Specify a yaml formatted file from which to read the request headers to send'] + ] + +class HTTPHeaderFieldManipulation(httpt.HTTPTest): + """ + It performes HTTP requests with request headers that vary capitalization + towards a backend. If we detect that the headers the backend received + matches the ones we have sent then we have detected tampering. + """ + name = "HTTP Header Field Manipulation" + author = "Arturo Filastò" + version = "0.1.3" + + randomizeUA = False + usageOptions = UsageOptions + + requiredOptions = ['backend'] + + def get_headers(self): + headers = {} + if self.localOptions['headers']: + try: + f = open(self.localOptions['headers']) + except IOError: + raise Exception("Specified input file does not exist") + content = ''.join(f.readlines()) + f.close() + headers = yaml.safe_load(content) + return headers + else: + # XXX generate these from a random choice taken from whatheaders.com + # http://s3.amazonaws.com/data.whatheaders.com/whatheaders-latest.xml.zip + headers = {"User-Agent": [random.choice(net.userAgents)[0]], + "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"], + "Accept-Encoding": ["gzip,deflate,sdch"], + "Accept-Language": ["en-US,en;q=0.8"], + "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"], + "Host": [randomStr(15)+'.com'] + } + return headers + + def get_random_caps_headers(self): + headers = {} + normal_headers = self.get_headers() + for k, v in normal_headers.items(): + new_key = random_capitalization(k) + headers[new_key] = v + return headers + + def processInputs(self): + if self.localOptions['backend']: + self.url = self.localOptions['backend'] + else: + raise Exception("No backend specified") + + def processResponseBody(self, data): + self.check_for_tampering(data) + + def check_for_tampering(self, data): + """ + Here we do checks to verify if the request we made has been tampered + with. We have 3 categories of tampering: + + * **total** when the response is not a json object and therefore we were not + able to reach the ooniprobe test backend + + * **request_line_capitalization** when the HTTP Request line (e.x. GET / + HTTP/1.1) does not match the capitalization we set. + + * **header_field_number** when the number of headers we sent does not match + with the ones the backend received + + * **header_name_capitalization** when the header field names do not match + those that we sent. + + * **header_field_value** when the header field value does not match with the + one we transmitted. + """ + self.report['tampering'] = { + 'total': False, + 'request_line_capitalization': False, + 'header_name_capitalization': False, + 'header_field_value': False, + 'header_field_number': False + } + try: + response = json.loads(data) + except ValueError: + self.report['tampering']['total'] = True + return + + request_request_line = "%s / HTTP/1.1" % self.request_method + + try: + response_request_line = response['request_line'] + response_headers_dict = response['headers_dict'] + except KeyError: + self.report['tampering']['total'] = True + return + + if request_request_line != response_request_line: + self.report['tampering']['request_line_capitalization'] = True + + request_headers = TrueHeaders(self.request_headers) + diff = request_headers.getDiff(response_headers_dict, ignore=['Connection']) + if diff: + self.report['tampering']['header_field_name'] = True + else: + self.report['tampering']['header_field_name'] = False + self.report['tampering']['header_name_diff'] = list(diff) + + def test_get(self): + self.request_method = "GET" + self.request_headers = self.get_random_caps_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + + def test_get_random_capitalization(self): + self.request_method = random_capitalization("GET") + self.request_headers = self.get_random_caps_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + + def test_post(self): + self.request_method = "POST" + self.request_headers = self.get_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + + def test_post_random_capitalization(self): + self.request_method = random_capitalization("POST") + self.request_headers = self.get_random_caps_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + + def test_put(self): + self.request_method = "PUT" + self.request_headers = self.get_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + + def test_put_random_capitalization(self): + self.request_method = random_capitalization("PUT") + self.request_headers = self.get_random_caps_headers() + return self.doRequest(self.url, self.request_method, + headers=self.request_headers) + diff --git a/nettests/core/http_requests.py b/nettests/core/http_requests.py deleted file mode 100644 index 5d67070..0000000 --- a/nettests/core/http_requests.py +++ /dev/null @@ -1,187 +0,0 @@ -# -*- encoding: utf-8 -*- -# -# :authors: Arturo Filastò -# :licence: see LICENSE - -import random -import json - -from twisted.python import usage - -from ooni.utils import log, net, randomStr -from ooni.templates import httpt - -def random_capitalization(string): - output = "" - original_string = string - string = string.swapcase() - for i in range(len(string)): - if random.randint(0, 1): - output += string[i].swapcase() - else: - output += string[i] - if original_string == output: - return random_capitalization(output) - else: - return output - -class UsageOptions(usage.Options): - optParameters = [ - ['backend', 'b', 'http://127.0.0.1:57001', - 'URL of the backend to use for sending the requests'], - ['headers', 'h', None, - 'Specify a yaml formatted file from which to read the request headers to send'] - ] - -class HTTPRequests(httpt.HTTPTest): - """ - This test is also known as Header Field manipulation. It performes HTTP - requests with variations in capitalization towards the backend. - """ - name = "HTTP Requests" - author = "Arturo Filastò" - version = "0.1.1" - - randomizeUA = False - usageOptions = UsageOptions - - requiredOptions = ['backend'] - - def processInputs(self): - if self.localOptions['backend']: - self.url = self.localOptions['backend'] - else: - raise Exception("No backend specified") - - def processResponseBody(self, data): - self.check_for_tampering(data) - - def check_for_tampering(self, data): - """ - Here we do checks to verify if the request we made has been tampered - with. We have 3 categories of tampering: - - * **total** when the response is not a json object and therefore we were not - able to reach the ooniprobe test backend - - * **request_line_capitalization** when the HTTP Request line (e.x. GET / - HTTP/1.1) does not match the capitalization we set. - - * **header_field_number** when the number of headers we sent does not match - with the ones the backend received - - * **header_name_capitalization** when the header field names do not match - those that we sent. - - * **header_field_value** when the header field value does not match with the - one we transmitted. - """ - self.report['tampering'] = {'total': False, - 'request_line_capitalization': False, - 'header_name_capitalization': False, - 'header_field_value': False, - 'header_field_number': False - } - - try: - response = json.loads(data) - except ValueError: - self.report['tampering']['total'] = True - return - - requestLine = "%s / HTTP/1.1" % self.request_method - if response['request_line'] != requestLine: - self.report['tampering']['request_line_capitalization'] = True - - # We compare against length -1 because the response will also contain - # the Connection: close header since we do not do persistent - # connections - if len(self.request_headers) != (len(response['headers_dict']) - 1): - self.report['tampering']['header_field_number'] = True - - for header, value in self.request_headers.items(): - # XXX this still needs some work - # in particular if the response headers are of different length or - # some extra headers get added in the response (so the lengths - # match), we will get header_name_capitalization set to true, while - # the actual tampering is the addition of an extraneous header - # field. - if header == "Connection": - # Ignore Connection header - continue - try: - response_value = response['headers_dict'][header] - if response_value != value[0]: - log.msg("Tampering detected because %s != %s" % (response_value, value[0])) - self.report['tampering']['header_field_value'] = True - except KeyError: - log.msg("Tampering detected because %s not in %s" % (header, response['headers_dict'])) - self.report['tampering']['header_name_capitalization'] = True - - def get_headers(self): - headers = {} - if self.localOptions['headers']: - # XXX test this code - try: - f = open(self.localOptions['headers']) - except IOError: - raise Exception("Specified input file does not exist") - content = ''.join(f.readlines()) - f.close() - headers = yaml.load(content) - return headers - else: - headers = {"User-Agent": [random.choice(net.userAgents)[0]], - "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"], - "Accept-Encoding": ["gzip,deflate,sdch"], - "Accept-Language": ["en-US,en;q=0.8"], - "Accept-Charset": ["ISO-8859-1,utf-8;q=0.7,*;q=0.3"], - "Host": [randomStr(15)+'.com'] - } - return headers - - def get_random_caps_headers(self): - headers = {} - normal_headers = self.get_headers() - for k, v in normal_headers.items(): - new_key = random_capitalization(k) - headers[new_key] = v - return headers - - def test_get(self): - self.request_method = "GET" - self.request_headers = self.get_random_caps_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - def test_get_random_capitalization(self): - self.request_method = random_capitalization("GET") - self.request_headers = self.get_random_caps_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - def test_post(self): - self.request_method = "POST" - self.request_headers = self.get_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - def test_post_random_capitalization(self): - self.request_method = random_capitalization("POST") - self.request_headers = self.get_random_caps_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - def test_put(self): - self.request_method = "PUT" - self.request_headers = self.get_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - def test_put_random_capitalization(self): - self.request_method = random_capitalization("PUT") - self.request_headers = self.get_random_caps_headers() - return self.doRequest(self.url, self.request_method, - headers=self.request_headers) - - diff --git a/ooni/reporter.py b/ooni/reporter.py index b79f27f..63f501e 100644 --- a/ooni/reporter.py +++ b/ooni/reporter.py @@ -184,7 +184,7 @@ class OReporter(object): test_input = test.input
test_started = test._start_time - test_runtime = test_started - time.time() + test_runtime = time.time() - test_started
report = {'input': test_input, 'test_name': test_name, diff --git a/ooni/utils/txagentwithsocks.py b/ooni/utils/txagentwithsocks.py index 57c27e4..7b000fc 100644 --- a/ooni/utils/txagentwithsocks.py +++ b/ooni/utils/txagentwithsocks.py @@ -4,6 +4,8 @@ # :licence: see LICENSE
import struct +import itertools +from copy import copy
from zope.interface import implements from twisted.web import client, _newclient, http_headers @@ -137,6 +139,40 @@ class TrueHeaders(http_headers.Headers): self._rawHeaders[name.lower()]['name'] = name self._rawHeaders[name.lower()]['values'] = values
+ def getDiff(self, header_dict, ignore=[]): + """ + ignore: specify a list of header fields to ignore + + Returns a set containing the header names that are not present in + header_dict or not present in self. + """ + diff = set() + field_names = [] + + headers_a = copy(self) + headers_b = TrueHeaders(header_dict) + for name in ignore: + try: + del headers_a._rawHeaders[name.lower()] + except KeyError: + pass + try: + del headers_b._rawHeaders[name.lower()] + except KeyError: + pass + + for k, v in itertools.chain(headers_a.getAllRawHeaders(), \ + headers_b.getAllRawHeaders()): + field_names.append(k) + + for name in field_names: + if self.getRawHeaders(name) and \ + name in header_dict: + pass + else: + diff.add(name) + return diff + def getAllRawHeaders(self): for k, v in self._rawHeaders.iteritems(): yield v['name'], v['values'] diff --git a/oonib/testhelpers/http_helpers.py b/oonib/testhelpers/http_helpers.py index b384216..1fa0ccb 100644 --- a/oonib/testhelpers/http_helpers.py +++ b/oonib/testhelpers/http_helpers.py @@ -77,7 +77,10 @@ class SimpleHTTPChannel(basic.LineReceiver, policies.TimeoutMixin): def allHeadersReceived(self): headers_dict = {} for k, v in self.headers: - headers_dict[k] = v + if k not in headers_dict: + headers_dict[k] = [] + headers_dict[k].append(v) + response = {'request_headers': self.headers, 'request_line': self.requestLine, 'headers_dict': headers_dict diff --git a/tests/test_trueheaders.py b/tests/test_trueheaders.py new file mode 100644 index 0000000..33521b8 --- /dev/null +++ b/tests/test_trueheaders.py @@ -0,0 +1,41 @@ +from twisted.trial import unittest + +from ooni.utils.txagentwithsocks import TrueHeaders + +dummy_headers_dict = { + 'Header1': ['Value1', 'Value2'], + 'Header2': ['ValueA', 'ValueB'] +} + +dummy_headers_dict2 = { + 'Header1': ['Value1', 'Value2'], + 'Header2': ['ValueA', 'ValueB'], + 'Header3': ['ValueA', 'ValueB'], +} + +dummy_headers_dict3 = { + 'Header1': ['Value1', 'Value2'], + 'Header2': ['ValueA', 'ValueB'], + 'Header4': ['ValueA', 'ValueB'], +} + + +class TestTrueHeaders(unittest.TestCase): + def test_names_match(self): + th = TrueHeaders(dummy_headers_dict) + self.assertEqual(th.getDiff(dummy_headers_dict), set()) + + def test_names_not_match(self): + th = TrueHeaders(dummy_headers_dict) + self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3'])) + + th = TrueHeaders(dummy_headers_dict3) + self.assertEqual(th.getDiff(dummy_headers_dict2), set(['Header3', 'Header4'])) + + def test_names_match_expect_ignore(self): + th = TrueHeaders(dummy_headers_dict) + self.assertEqual(th.getDiff(dummy_headers_dict2, ignore=['Header3']), set()) + + + +
tor-commits@lists.torproject.org