commit d80a68098dc4fd13a4963e26d6c4e5ed2ad31b43 Author: Arturo Filastò arturo@filasto.net Date: Sun May 8 17:55:46 2016 +0200
Ignore empty charset values --- ooni/templates/httpt.py | 2 +- ooni/tests/test_templates.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py index 2a17f5b..51fba1a 100644 --- a/ooni/templates/httpt.py +++ b/ooni/templates/httpt.py @@ -17,7 +17,7 @@ from ooni.utils.net import StringProducer, userAgents from ooni.utils.trueheaders import TrueHeaders from ooni.errors import handleAllFailures
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)') +META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]+)')
class InvalidSocksProxyOption(Exception): pass diff --git a/ooni/tests/test_templates.py b/ooni/tests/test_templates.py index 5b2c77a..e8fe636 100644 --- a/ooni/tests/test_templates.py +++ b/ooni/tests/test_templates.py @@ -54,8 +54,10 @@ class TestHTTPT(unittest.TestCase): <title>Foo</title> """ with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">' + with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">' self.assertEqual(httpt.META_CHARSET_REGEXP.search(no_charset_html), None) self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1') + self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_empty_charset), None)
class TestDNST(unittest.TestCase): def setUp(self):