commit 4922b1ce9795b19a8e726217f45fc65ad17ea162 Author: Arturo Filastò art@fuffa.org Date: Thu Nov 29 13:35:27 2012 +0100
Test and document HTTP Requests test * Add support reporting the modification of response headers Note: This test used to be called HTTP Body Length, but got extended to support response headers. --- docs/source/tests/http_requests.rst | 238 +++++++++++++++++++++++++++++++++ nettests/blocking/http_body_length.py | 90 ------------- nettests/blocking/http_requests.py | 120 +++++++++++++++++ 3 files changed, 358 insertions(+), 90 deletions(-)
diff --git a/docs/source/tests/http_requests.rst b/docs/source/tests/http_requests.rst new file mode 100644 index 0000000..cc98882 --- /dev/null +++ b/docs/source/tests/http_requests.rst @@ -0,0 +1,238 @@ +Details +======= + +*Test Name*: HTTP Requests + +*Current version*: 0.1 + +*NetTest*: HTTP Requests (https://gitweb.torproject.org/ooni-probe.git/blob/HEAD:/nettests/blocking/ht...) + +*Test Helper*: None + +*Test Type*: Content Blocking + +*Requires Root*: No + +Description +=========== + +This test perform a HTTP GET request for the / resource over the test network +and over Tor. It then compares the two responses to see if the response bodies of the two requests match and if the +proportion between the expected body length (the one over Tor) and the one over +the control network match. + +If the proportion between the two body lengths is <= a certain tollerance +factor (by default set to 0.8), then we say that they do not match. + +The reason for doing so is that a lot of sites serve geolocalized content based +on the location from which the request originated from. + +How to run the test +=================== + +To test a single site run: + +`./bin/ooniprobe nettests/blocking/http_requests.py -u http://<test_site>/` + +To test a set of sites from a list containing sites to test run: + +`./bin/ooniprobe nettests/blocking/http_requests.py -f <input_file>` + + +Sample report +============= + +`./bin/ooniprobe nettests/blocking/http_requests.py -f example_inputs/url_lists_file.txt` + +:: + + ########################################### + # OONI Probe Report for HTTP Requests Test test + # Thu Nov 29 13:20:06 2012 + ########################################### + --- + options: + collector: null + help: 0 + logfile: null + pcapfile: null + reportfile: null + resume: 0 + subargs: [-f, example_inputs/url_lists_file.txt] + test: nettests/blocking/http_requests.py + probe_asn: null + probe_cc: null + probe_ip: 127.0.0.1 + software_name: ooniprobe + software_version: 0.0.7.1-alpha + start_time: 1354188006.0 + test_name: HTTP Requests Test + test_version: '0.1' + ... + --- + input: http://ooni.nu/test + report: + agent: agent + body_length_match: false + body_proportion: 0.9732142857142857 + factor: 0.8 + requests: + - request: + body: null + headers: + - - User-Agent + - - &id001 [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1), 'Internet + Explorer 7, Windows Vista'] + method: GET + url: http://ooni.nu/test + response: + body: "\n<html>\n <head>\n <meta http-equiv="refresh" content="\ + 0;URL=http://ooni.nu/test/%5C%22%3E%5Cn </head>\n <body bgcolor="#FFFFFF"\ + \ text="#000000">\n <a href="http://ooni.nu/test/%5C%22%3Eclick here</a>\n\ + \ </body>\n</html>\n" + code: 302 + headers: + - - Content-Length + - ['218'] + - - Server + - [TwistedWeb/10.1.0] + - - Connection + - [close] + - - Location + - ['http://ooni.nu/test/'] + - - Date + - ['Thu, 29 Nov 2012 12:20:25 GMT'] + - - Content-Type + - [text/html] + - request: + body: null + headers: + - - User-Agent + - - *id001 + method: GET + url: shttp://ooni.nu/test + response: + body: "\n<html>\n <head>\n <meta http-equiv="refresh" content="\ + 0;URL=http://ooni.nu:80/test/%5C%22%3E%5Cn </head>\n <body bgcolor="#FFFFFF"\ + \ text="#000000">\n <a href="http://ooni.nu:80/test/%5C%22%3Eclick here</a>\n\ + \ </body>\n</html>\n" + code: 302 + headers: + - - Content-Length + - ['224'] + - - Server + - [TwistedWeb/10.1.0] + - - Connection + - [close] + - - Location + - ['http://ooni.nu:80/test/'] + - - Date + - ['Thu, 29 Nov 2012 12:20:33 GMT'] + - - Content-Type + - [text/html] + socksproxy: null + test_name: test_get + test_runtime: 9.357746124267578 + test_started: 1354191606.333243 + ... + --- + input: http://torproject.org/ + report: + agent: agent + body_length_match: false + body_proportion: 1.0 + factor: 0.8 + requests: + - request: + body: null + headers: + - - User-Agent + - - &id001 [Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1), 'Internet + Explorer 7, Windows Vista'] + method: GET + url: http://torproject.org/ + response: + body: '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> + + <html><head> + + <title>302 Found</title> + + </head><body> + + <h1>Found</h1> + + <p>The document has moved <a href="https://www.torproject.org/">here</a>.</p> + + <hr> + + <address>Apache Server at torproject.org Port 80</address> + + </body></html> + + ' + code: 302 + headers: + - - Content-Length + - ['275'] + - - Vary + - [Accept-Encoding] + - - Server + - [Apache] + - - Connection + - [close] + - - Location + - ['https://www.torproject.org/'] + - - Date + - ['Thu, 29 Nov 2012 12:20:08 GMT'] + - - Content-Type + - [text/html; charset=iso-8859-1] + - request: + body: null + headers: + - - User-Agent + - - *id001 + method: GET + url: shttp://torproject.org/ + response: + body: '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> + + <html><head> + + <title>302 Found</title> + + </head><body> + + <h1>Found</h1> + + <p>The document has moved <a href="https://www.torproject.org/">here</a>.</p> + + <hr> + + <address>Apache Server at torproject.org Port 80</address> + + </body></html> + + ' + code: 302 + headers: + - - Content-Length + - ['275'] + - - Vary + - [Accept-Encoding] + - - Server + - [Apache] + - - Connection + - [close] + - - Location + - ['https://www.torproject.org/'] + - - Date + - ['Thu, 29 Nov 2012 12:20:16 GMT'] + - - Content-Type + - [text/html; charset=iso-8859-1] + socksproxy: null + test_name: test_get + test_runtime: 8.688138008117676 + test_started: 1354191607.287672 + ... + diff --git a/nettests/blocking/http_body_length.py b/nettests/blocking/http_body_length.py deleted file mode 100644 index 7c5b0d2..0000000 --- a/nettests/blocking/http_body_length.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- encoding: utf-8 -*- -# -# :authors: Arturo Filastò -# :licence: see LICENSE - -from twisted.internet import defer -from twisted.python import usage -from ooni.templates import httpt - -class UsageOptions(usage.Options): - optParameters = [ - ['url', 'u', None, 'Specify a single URL to test.'], - ['factor', 'f', 0.8, 'What factor should be used for triggering censorship (0.8 == 80%)'] - ] - -class HTTPBodyLength(httpt.HTTPTest): - """ - Performs a two GET requests to the set of sites to be tested for - censorship, one over a known good control channel (Tor), the other over the - test network. - We then look at the response body lengths and see if the control response - differs from the experiment response by a certain factor. - """ - name = "HTTP Body length test" - author = "Arturo Filastò" - version = "0.1" - - usageOptions = UsageOptions - - inputFile = ['file', 'f', None, - 'List of URLS to perform GET and POST requests to'] - - # These values are used for determining censorship based on response body - # lengths - control_body_length = None - experiment_body_length = None - - def setUp(self): - """ - Check for inputs. - """ - if self.input: - self.url = self.input - elif self.localOptions['url']: - self.url = self.localOptions['url'] - else: - raise Exception("No input specified") - - self.factor = self.localOptions['factor'] - - def compare_body_lengths(self): - body_length_a = self.control_body_length - body_length_b = self.experiment_body_length - - rel = float(body_length_a)/float(body_length_b) - if rel > 1: - rel = 1/rel - - self.report['body_proportion'] = rel - self.report['factor'] = self.factor - if rel < self.factor: - self.report['censorship'] = True - else: - self.report['censorship'] = False - - def test_get(self): - def errback(failure): - log.err("There was an error while testing %s" % self.url) - log.exception(failure) - - def control_body(result): - self.control_body_length = len(result) - if self.experiment_body_length: - self.compare_body_lengths() - - def experiment_body(result): - self.experiment_body_length = len(result) - if self.control_body_length: - self.compare_body_lengths() - - dl = [] - experiment_request = self.doRequest(self.url, method="GET", - body_processor=experiment_body) - control_request = self.doRequest(self.url, method="GET", - use_tor=True, body_processor=control_body) - dl.append(experiment_request) - dl.append(control_request) - d = defer.DeferredList(dl) - return d - diff --git a/nettests/blocking/http_requests.py b/nettests/blocking/http_requests.py new file mode 100644 index 0000000..bc2803b --- /dev/null +++ b/nettests/blocking/http_requests.py @@ -0,0 +1,120 @@ +# -*- encoding: utf-8 -*- +# +# :authors: Arturo Filastò +# :licence: see LICENSE + +from twisted.internet import defer +from twisted.python import usage +from ooni.templates import httpt + +class UsageOptions(usage.Options): + optParameters = [ + ['url', 'u', None, 'Specify a single URL to test.'], + ['factor', 'f', 0.8, 'What factor should be used for triggering censorship (0.8 == 80%)'] + ] + +class HTTPRequestsTest(httpt.HTTPTest): + """ + Performs a two GET requests to the set of sites to be tested for + censorship, one over a known good control channel (Tor), the other over the + test network. + + We check to see if the response headers match and if the response body + lengths match. + """ + name = "HTTP Requests Test" + author = "Arturo Filastò" + version = "0.1" + + usageOptions = UsageOptions + + inputFile = ['file', 'f', None, + 'List of URLS to perform GET and POST requests to'] + + # These values are used for determining censorship based on response body + # lengths + control_body_length = None + experiment_body_length = None + + def setUp(self): + """ + Check for inputs. + """ + if self.input: + self.url = self.input + elif self.localOptions['url']: + self.url = self.localOptions['url'] + else: + raise Exception("No input specified") + + self.factor = self.localOptions['factor'] + + def compare_body_lengths(self): + body_length_a = self.control_body_length + body_length_b = self.experiment_body_length + + rel = float(body_length_a)/float(body_length_b) + if rel > 1: + rel = 1/rel + + self.report['body_proportion'] = rel + self.report['factor'] = self.factor + if rel < self.factor: + self.report['body_length_match'] = True + else: + self.report['body_length_match'] = False + + def compare_headers(self): + diff = TrueHeaders(self.control_headers).getDiff(self.experiment_headers) + if diff: + self.report['headers_match'] = False + else: + self.report['headers_match'] = True + + def test_get(self): + def errback(failure): + log.err("There was an error while testing %s" % self.url) + log.exception(failure) + + def control_body(result): + """ + Callback for processing the control HTTP body response. + """ + self.control_body_length = len(result) + if self.experiment_body_length: + self.compare_body_lengths() + + def experiment_body(result): + """ + Callback for processing the experiment HTTP body response. + """ + self.experiment_body_length = len(result) + if self.control_body_length: + self.compare_body_lengths() + + def control_headers(headers_dict): + """ + Callback for processing the control HTTP headers response. + """ + self.control_headers = headers_dict + + def experiment_headers(headers_dict): + """ + Callback for processing the experiment HTTP headers response. + """ + self.experiment_headers = headers_dict + + dl = [] + experiment_request = self.doRequest(self.url, method="GET", + body_processor=experiment_body, + headers_processor=control_headers) + + control_request = self.doRequest(self.url, method="GET", + use_tor=True, body_processor=control_body, + headers_processor=control_headers) + + dl.append(experiment_request) + dl.append(control_request) + d = defer.DeferredList(dl) + return d +