[tor-commits] [ooni-probe/master] Hardcoded vendor tests in separate function which optionally runs through setting in config, user-defined tests are optionally specified in asset file.

Mon Jul 9 14:39:04 UTC 2012

commit 6d57e84a19699a6f0dd2c6152ecafc87edacec9e
Author: Isis Lovecruft <isis at patternsinthevoid.net>
Date:   Mon Apr 9 19:32:12 2012 -0700

    Hardcoded vendor tests in separate function which optionally runs through setting in config, user-defined tests are optionally specified in asset file.
---
 assets/captive_portal_tests.txt |    4 +
 ooni-probe.conf                 |   13 ++-
 tests/captiveportal.py          |  208 ++++++++++++++++++++++++++++-----------
 3 files changed, 166 insertions(+), 59 deletions(-)

diff --git a/assets/captive_portal_tests.txt b/assets/captive_portal_tests.txt
new file mode 100644
index 0000000..1bd016f
--- /dev/null
+++ b/assets/captive_portal_tests.txt
@@ -0,0 +1,4 @@
+
+http://ooni.nu, Open Observatory of Network Interference, 200
+http://www.patternsinthevoid.net/2CDB8B35pub.asc, mQINBE5qkHABEADVnasCm9w9hUff1E4iKnzcAdp4lx6XU5USmYdwKg2RQt2VFqWQ, 200
+http://www.google.com, Search the world's information, 200
diff --git a/ooni-probe.conf b/ooni-probe.conf
index a744fcf..a19c1d0 100644
--- a/ooni-probe.conf
+++ b/ooni-probe.conf
@@ -40,13 +40,20 @@ dns_reverse_lookup = true
 
 ### captiveportal testing configuration parameters
 
+# This is an optional list of user defined captive portal tests, 
+# one per line, with each line in the format: 
+# experiment_url, control_result, control_code 
+# where experiment_url is the test page to retrieve,
+#       control_result is some unique text found on the test page,
+#   and control_code is the expected HTTP status code.
+captive_portal = captive_portal_tests.txt  
+
 # The default User Agent that ooni-probe should send for
 # HTTP requests (pretend we're a Windows box running FF10): 
 default_ua = Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2
 
-# This is the list of captive portal tests, in the format:
-# test_name, experiment_url, control_result, control_code
-captive_portal = captive_portal_tests.txt  
+# Enable vendor tests for captive portals:
+do_captive_portal_vendor_tests = true
 
 ### traceroute testing related config parameters
 
diff --git a/tests/captiveportal.py b/tests/captiveportal.py
index 324d3cc..494a4c3 100644
--- a/tests/captiveportal.py
+++ b/tests/captiveportal.py
@@ -23,7 +23,7 @@ __desc__ = "Captive portal detection test"
 
 class CaptivePortalAsset(Asset):
     """
-    Parses captive_portal_test.txt into an Asset.
+    Parses captive_portal_tests.txt into an Asset.
     """
     def __init__(self, file=None):
         self = Asset.__init__(self, file)
@@ -32,28 +32,13 @@ class CaptivePortalAsset(Asset):
         self = Asset.parse_line(self, line)
         return line.replace('\n', '').split(', ')
 
-    '''
-    def next_asset(self):
-        self = Asset.next_asset(self)
-        with self.fh as fh:
-            asset_list = []
-            lines = fh.readlines()
-            for line in lines:
-                parsed_line = self.parse_line(line)
-                if parsed_line:
-                    asset_list.append(parsed_line)
-                else:
-                    fh.seek(0)
-                    raise StopIteration
-            return asset_list
-    '''
-
 class CaptivePortal(Test):
     """
     Compares content and status codes of HTTP responses, and attempts
     to determine if content has been altered.
 
-    TODO: compare headers
+    TODO: compare headers, random URL requests with control obtained
+    through Tor.
     """
     def __init__(self, ooni):
         Test.__init__(self, ooni, name='test')
@@ -70,7 +55,7 @@ class CaptivePortal(Test):
         return response
  
     def http_content_match_fuzzy_opt(self, experimental_url, control_result,
-                                     fuzzy=False):
+                                     headers=None, fuzzy=False):
         """
         Makes an HTTP request on port 80 for experimental_url, then
         compares the response_content of experimental_url with the
@@ -78,13 +63,15 @@ class CaptivePortal(Test):
         True, the response_content is compared with a regex of the
         control_result. If the response_content from the
         experimental_url and the control_result match, returns True
-        with the HTTP status code, False if otherwise.
+        with the HTTP status code, False and status code if otherwise.
         """
         log = self.logger
-        default_ua = self.default_ua
 
-        response = self.http_fetch(experimental_url, 
-                                   headers={'User-Agent': default_ua})
+        if headers is None:
+            default_ua = self.default_ua
+            headers = {'User-Agent': default_ua}
+
+        response = self.http_fetch(experimental_url, headers)
         response_content = response.read()
         response_code = response.code
         if response_content is not None:
@@ -129,6 +116,89 @@ class CaptivePortal(Test):
             return False
         return True
 
+    def run_vendor_tests(self, *a, **kw):
+        """
+        These are several vendor tests used to detect the presence of
+        a captive portal. Each test compares HTTP status code and
+        content to the control results and has its own User-Agent
+        string, in order to emulate the test as it would occur on the
+        device it was intended for. Vendor tests are defined in the
+        format: 
+        [experimental_url, control_response, control_code, ua, test_name]
+        """
+        cm = self.http_content_match_fuzzy_opt
+        sm = self.http_status_code_match
+        snm = self.http_status_code_no_match
+        
+        log = self.logger
+
+        vendor_tests = [['http://www.apple.com/library/test/success.html',
+                         'Success',
+                         '200',
+                         'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543a Safari/419.3',
+                         'Apple HTTP Captive Portal'],
+                        ['http://tools.ietf.org/html/draft-nottingham-http-portal-02',
+                         '428 Network Authentication Required',
+                         '428',
+                         'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0',
+                         'W3 Captive Portal'],
+                        ['http://www.msftncsi.com/ncsi.txt',
+                         'Microsoft NCSI',
+                         '200',
+                         'Microsoft NCSI',
+                         'MS HTTP Captive Portal',]]
+
+        log.debug("Getting vendor test data")
+        
+        for vt in vendor_tests:
+            experiment_url = vt[0]
+            control_result = vt[1]
+            control_code = vt[2]
+            ua = vt[3]
+            test_name = vt[4]
+
+            if test_name == "MS HTTP Captive Portal":
+                log.info("Running the %s test..." % test_name)
+                content_match, experiment_code = cm(experiment_url, control_result,
+                                                    headers={'User-Agent': ua})
+                status_match = sm(experiment_code, control_code)
+                if status_match and content_match:
+                    log.info("The %s test was unable to detect a captive portal."
+                             % test_name)
+                else:
+                    log.info("The %s test shows that your network is filtered." 
+                             % test_name)
+                
+            elif test_name == "Apple HTTP Captive Portal":
+                log.info("Running the %s test..." % test_name)
+                content_fuzzy_match, experiment_code = cm(experiment_url, 
+                                                          control_result,
+                                                          headers={'User-Agent': ua},
+                                                          fuzzy=True)
+                status_match = sm(experiment_code, control_code)
+                if status_match and content_fuzzy_match:
+                    log.info("The %s test was unable to detect a captive portal."
+                             % test_name)
+                else:
+                    log.info("The %s test shows that your network is filtered." 
+                             % test_name)
+                
+            elif test_name == "W3 Captive Portal":
+                log.info("Running the %s test..." % test_name)
+                content_fuzzy_match, experiment_code = cm(experiment_url, 
+                                                          control_result,
+                                                          headers={'User-Agent': ua},
+                                                          fuzzy=True)
+                status_no_match = snm(experiment_code, control_code)
+                if status_no_match and content_fuzzy_match:
+                    log.info("The %s test was unable to detect a captive portal."
+                             % test_name)
+                else:
+                    log.info("The %s test shows that your network is filtered." 
+                             % test_name)
+            else:
+                log.warn("Ooni is trying to run an undefined CP vendor test.")
+
     def experiment(self, *a, **kw):
         """
         Compares the content and status code of the HTTP response for
@@ -139,53 +209,79 @@ class CaptivePortal(Test):
         in the experimental content. Returns True if matches are found,
         and False if otherwise.
         """
-        test_name = kw['data'][0] 
-        experiment_url = kw['data'][1]
-        control_result = kw['data'][2]
-        control_code = kw['data'][3]
-
+        if (os.path.isfile(os.path.join(self.config.main.assetdir,
+                                        self.config.tests.captive_portal))):
+            kw['data'].append(None)
+            kw['data'].append('user defined')
+        
+        experiment_url = kw['data'][0]
+        control_result = kw['data'][1]
+        control_code = kw['data'][2]
+        ua = kw['data'][3]
+        test_name = kw['data'][4]
+    
         cm = self.http_content_match_fuzzy_opt
         sm = self.http_status_code_match
-
+        snm = self.http_status_code_no_match
+        
         log = self.logger
-        log.info("Running the %s test..." % test_name)
         
-        content_match, experiment_code = cm(experiment_url, control_result)
-        status_match = sm(experiment_code, control_code)
-
-        if status_match and content_match:
-            log.info("The %s test was unable to detect a captive portal."
-                     % test_name)
-            return True
-        elif status_match and not content_match:
-            log.info("The %s test detected mismatched content, retrying with " \
-                         "fuzzy match enabled." % test_name)
-            content_fuzzy_match, experiment_code = cm(experiment_url, 
-                                                      control_result,
-                                                      fuzzy=True)
-            if content_fuzzy_match:
-                return True
+        if test_name == "user defined":
+            log.info("Running the %s test for %s..." % (test_name, experiment_url))
+            content_match, experiment_code = cm(experiment_url, control_result)
+            status_match = sm(experiment_code, control_code)
+            if status_match and content_match:
+                log.info("The %s test was unable to detect a captive portal." 
+                         % test_name)
+                return True, test_name
+            elif status_match and not content_match:
+                log.info("The %s test detected mismatched content, retrying "
+                         "with fuzzy match enabled." % test_name)
+                content_fuzzy_match, experiment_code = cm(experiment_url, 
+                                                          control_result,
+                                                          fuzzy=True)
+                if content_fuzzy_match:
+                    return True, test_name
+                else:
+                    return False, test_name
             else:
-                return False
+                log.info("The %s test shows that your network is filtered." 
+                         % test_name)
+                return False, test_name
+        
         else:
-            log.info("The %s test shows that your network is filtered, possibly " \
-                         "due to a captive portal." % test_name)
-            return False
-
-        return False
+            log.warn("Ooni is trying to run an undefined captive portal test.")
+            return False, test_name
+        
 
 def run(ooni):
     """
-    Run the CaptivePortal(Test).
+    Runs the CaptivePortal(Test).
+
+    If do_captive_portal_vendor_tests is set to true, then vendor
+    specific captive portal tests will be run.
+
+    If captive_portal = filename.txt, then user-specified tests
+    will be run.
+
+    Either vendor tests or user-defined tests can be run, or both.
     """
     config = ooni.config
     log = ooni.logger
-    assets = [CaptivePortalAsset(os.path.join(config.main.assetdir, 
-                                              config.tests.captive_portal))]
 
+    assets = []
+    if (os.path.isfile(os.path.join(config.main.assetdir,
+                                    config.tests.captive_portal))):
+        assets.append(CaptivePortalAsset(os.path.join(config.main.assetdir, 
+                                                      config.tests.captive_portal)))
+    
     captiveportal = CaptivePortal(ooni)
     log.info("Starting captive portal test...")
+    log.info("Running user defined tests...")
     captiveportal.run(assets, {'index': 1})
-    log.info("Captive portal test finished!")
-
+    
+    if config.tests.do_captive_portal_vendor_tests:
+        log.info("Running vendor tests...")
+        captiveportal.run_vendor_tests()
 
+    log.info("Captive portal test finished!")