commit 8e7199d85c877353b83b3da98936c602d4c4a5b4 Author: Arturo Filastò arturo@filasto.net Date: Mon May 30 10:42:29 2016 +0200
Address feedback by @bassosimone
* Test the next words if the word is less than 5 chars in web_connectivity test.
* Move the charset detection charset into the correct module and change the unittest accordingly.
* Include ooni.common in setup.py
* Various code style fixes --- ooni/backend_client.py | 6 +----- ooni/common/http_utils.py | 2 +- ooni/nettests/blocking/web_connectivity.py | 21 ++++++++++++--------- ooni/templates/httpt.py | 4 ---- ooni/tests/test_common.py | 19 +++++++++++++++++++ ooni/tests/test_templates.py | 16 ---------------- ooni/utils/net.py | 1 - setup.py | 1 + 8 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/ooni/backend_client.py b/ooni/backend_client.py index d4c463e..0e85dd7 100644 --- a/ooni/backend_client.py +++ b/ooni/backend_client.py @@ -207,9 +207,7 @@ class CollectorClient(OONIBClient): @d.addErrback def err(failure): failure.trap(Error) - if failure.value.status == '404': - return True - return False + return failure.value.status == '404'
return d
@@ -345,8 +343,6 @@ class CollectorClient(OONIBClient):
class WebConnectivityClient(OONIBClient): def isReachable(self): - # XXX maybe in the future we can have a dedicated API endpoint to - # test the reachability of the collector. d = self.queryBackend('GET', '/status')
@d.addCallback diff --git a/ooni/common/http_utils.py b/ooni/common/http_utils.py index 6d636d5..57c3a15 100644 --- a/ooni/common/http_utils.py +++ b/ooni/common/http_utils.py @@ -2,7 +2,7 @@ import re import codecs from base64 import b64encode
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)') +META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>!;]+)')
def representBody(body): if not body: diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py index 655bf76..b4ccff1 100644 --- a/ooni/nettests/blocking/web_connectivity.py +++ b/ooni/nettests/blocking/web_connectivity.py @@ -42,10 +42,9 @@ class UsageOptions(usage.Options): def is_public_ipv4_address(address): try: ip_address = IPv4Address(address) - if not any([ip_address.is_private, - ip_address.is_loopback]): - return True - return False + return not any( + [ip_address.is_private, ip_address.is_loopback] + ) except AddressValueError: return None
@@ -293,13 +292,17 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): def compare_titles(self, experiment_http_response): experiment_title = extractTitle(experiment_http_response.body).strip() control_title = self.control['http_request']['title'].strip() - first_exp_word = experiment_title.split(' ')[0] - first_ctrl_word = control_title.split(' ')[0] - if len(first_exp_word) < 5: + + control_words = control_title.split(' ') + for exp_word, idx in enumerate(experiment_title.split(' ')): # We don't consider to match words that are shorter than 5 # characters (5 is the average word length for english) - return False - return (first_ctrl_word.lower() == first_exp_word.lower()) + if len(exp_word) < 5: + continue + try: + return control_words[idx].lower() == exp_word.lower() + except IndexError: + return False
def compare_http_experiments(self, experiment_http_response):
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py index f8ea941..3f7e77d 100644 --- a/ooni/templates/httpt.py +++ b/ooni/templates/httpt.py @@ -1,5 +1,3 @@ -import re -import codecs import random
from txtorcon.interface import StreamListenerMixin @@ -22,8 +20,6 @@ from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent from ooni.common.http_utils import representBody from ooni.errors import handleAllFailures
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>!;]+)') - class InvalidSocksProxyOption(Exception): pass
diff --git a/ooni/tests/test_common.py b/ooni/tests/test_common.py new file mode 100644 index 0000000..1cd77cf --- /dev/null +++ b/ooni/tests/test_common.py @@ -0,0 +1,19 @@ +from twisted.trial import unittest +from ooni.common.http_utils import META_CHARSET_REGEXP + +class TestHTTPUtils(unittest.TestCase): + def test_charset_detection(self): + no_charset_html = """ + <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html> +<head> + <title>Foo</title> +""" + with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">' + with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">' + with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">' + self.assertEqual(META_CHARSET_REGEXP.search(no_charset_html), None) + self.assertEqual(META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1') + self.assertEqual(META_CHARSET_REGEXP.search( + with_two_charsets).group(1), 'UTF-8') + self.assertEqual(META_CHARSET_REGEXP.search(with_empty_charset), None) diff --git a/ooni/tests/test_templates.py b/ooni/tests/test_templates.py index ebd5b2e..7aa399b 100644 --- a/ooni/tests/test_templates.py +++ b/ooni/tests/test_templates.py @@ -46,22 +46,6 @@ class TestHTTPT(unittest.TestCase): yield self.assertFailure(http_test.doRequest('http://invaliddomain/'), DNSLookupError) assert http_test.report['requests'][0]['failure'] == 'dns_lookup_error'
- def test_charset_detection(self): - no_charset_html = """ - <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html> -<head> - <title>Foo</title> -""" - with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">' - with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">' - with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">' - self.assertEqual(httpt.META_CHARSET_REGEXP.search(no_charset_html), None) - self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1') - self.assertEqual(httpt.META_CHARSET_REGEXP.search( - with_two_charsets).group(1), 'UTF-8') - self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_empty_charset), None) - class TestDNST(unittest.TestCase): def setUp(self): if not is_internet_connected(): diff --git a/ooni/utils/net.py b/ooni/utils/net.py index 9918b60..79d17f4 100644 --- a/ooni/utils/net.py +++ b/ooni/utils/net.py @@ -1,4 +1,3 @@ -import re import sys import socket from random import randint diff --git a/setup.py b/setup.py index da0b967..3bfc5cd 100644 --- a/setup.py +++ b/setup.py @@ -171,6 +171,7 @@ data_files = [] packages = [ 'ooni', 'ooni.api', + 'ooni.common', 'ooni.deckgen', 'ooni.deckgen.processors', 'ooni.kit',
tor-commits@lists.torproject.org