commit 958850898457e78e7091dd2aa5193e9956d26a8f Author: Arturo Filastò arturo@filasto.net Date: Mon May 9 15:56:35 2016 +0200
Take also into consideration if the HTTP headers match in detecting blockpages.
This significantly brings down the false positive ratio. Though there are still some instances of sites being mis-reported as blocked.
Examples of these are sites that have different web servers for sites in the various languages. --- ooni/nettests/blocking/web_connectivity.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py index 08cae57..eb05835 100644 --- a/ooni/nettests/blocking/web_connectivity.py +++ b/ooni/nettests/blocking/web_connectivity.py @@ -347,6 +347,8 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): self.report['dns_consistency'] = 'inconsistent' tcp_connect = self.compare_tcp_experiments()
+ got_expected_web_page = (self.report['body_length_match'] or + self.report['headers_match'])
if (dns_consistent == True and tcp_connect == False and experiment_http_failure is not None): @@ -354,23 +356,30 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
# XXX we may want to have different codes for these two types of # blocking - elif (dns_consistent == True and tcp_connect == True and - self.report['body_length_match'] == False): - blocking = 'http' - elif (dns_consistent == True and tcp_connect == True and - experiment_http_failure is not None and - control_http_failure is None): + elif (dns_consistent == True and + tcp_connect == True and + got_expected_web_page == False): blocking = 'http'
+ elif (dns_consistent == True and + tcp_connect == True and + experiment_http_failure is not None and + control_http_failure is None): + if experiment_http_failure == 'dns_lookup_error': + blocking = 'dns' + else: + blocking = 'http' + elif (dns_consistent == False and - (experiment_http_failure is not None or - self.report['body_length_match'] == False)): + (got_expected_web_page == False or + experiment_http_failure is not None)): blocking = 'dns'
# This happens when the DNS resolution is injected, but the domain # doesn't have a valid record anymore or it resolves to an address # that is only accessible from within the country/network of the probe. elif (dns_consistent == False and + got_expected_web_page == False and (self.control['dns']['failure'] is not None or control_http_failure != experiment_http_failure)): blocking = 'dns'
tor-commits@lists.torproject.org