[tor-commits] [ooni-probe/master] Implement utility class for dealing with HTTP based testing

art at torproject.org art at torproject.org
Wed Jun 13 17:23:58 UTC 2012


commit b1e8d865f121854e9f96ca70b54eb4a656679b66
Author: Arturo Filastò <art at baculo.org>
Date:   Wed Jun 13 19:25:55 2012 +0200

    Implement utility class for dealing with HTTP based testing
---
 ooni/ooniprobe.py      |    3 +-
 ooni/plugoo/tests.py   |   13 ++++-
 ooni/plugoo/work.py    |    4 +-
 ooni/protocols/http.py |  118 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/ooni/ooniprobe.py b/ooni/ooniprobe.py
index 7248e70..a997041 100755
--- a/ooni/ooniprobe.py
+++ b/ooni/ooniprobe.py
@@ -64,7 +64,8 @@ def runTest(test, options, global_options):
 
     log.start(global_options['log'], 1)
 
-    wgen = work.WorkGenerator(test_class(options, global_options, report),
+    wgen = work.WorkGenerator(test_class(options, global_options, report,
+                                         reactor=reactor),
                               dict(options),
                               start=options['resume'])
 
diff --git a/ooni/plugoo/tests.py b/ooni/plugoo/tests.py
index 0632670..608cee3 100644
--- a/ooni/plugoo/tests.py
+++ b/ooni/plugoo/tests.py
@@ -19,12 +19,19 @@ from ooni.plugoo.interface import ITest
 class OONITest(object):
     blocking = False
 
-    def __init__(self, local_options, global_options, report, ooninet=None):
+    def __init__(self, local_options, global_options, report, ooninet=None,
+            reactor=None):
         self.local_options = local_options
         self.global_options = global_options
         self.assets = self.load_assets()
         self.report = report
         #self.ooninet = ooninet
+        self.reactor = reactor
+        self.initialize()
+        self.result = {}
+
+    def initialize(self):
+        pass
 
     def load_assets(self):
         """
@@ -40,11 +47,11 @@ class OONITest(object):
     def finished(self, control):
         #self.ooninet.report(result)
         self.end_time = datetime.now()
-        result = {}
+        result = self.result
         result['start_time'] = self.start_time
         result['end_time'] = self.end_time
         result['run_time'] = self.end_time - self.start_time
-        result['result'] = control
+        result['control'] = control
         log.msg("FINISHED %s" % result)
         self.report(result)
         return result
diff --git a/ooni/plugoo/work.py b/ooni/plugoo/work.py
index 16a6e2c..7e9be0c 100644
--- a/ooni/plugoo/work.py
+++ b/ooni/plugoo/work.py
@@ -54,8 +54,8 @@ class Worker(object):
         if self._running < self.maxconcurrent:
             asset, test, idx = workunit
             self._running += 1
-            test.startTest(asset).addBoth(self._run)
-            return
+            return test.startTest(asset).addBoth(self._run)
+
         d = defer.Deferred()
         self._queued.append((workunit, d))
         return d
diff --git a/ooni/protocols/http.py b/ooni/protocols/http.py
new file mode 100644
index 0000000..068ec0d
--- /dev/null
+++ b/ooni/protocols/http.py
@@ -0,0 +1,118 @@
+import random
+from zope.interface import implements
+from twisted.python import usage
+from twisted.plugin import IPlugin
+from twisted.internet import protocol, defer
+from ooni.plugoo.tests import ITest, OONITest
+from ooni.plugoo.assets import Asset
+from ooni.protocols.useragents import useragents
+from ooni import log
+
+useragents = [("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6", "Firefox 2.0, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", "Internet Explorer 7, Windows Vista"),
+              ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)", "Internet Explorer 7, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322)", "Internet Explorer 6, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.1; .NET CLR 1.1.4322)", "Internet Explorer 5, Windows XP"),
+              ("Opera/9.20 (Windows NT 6.0; U; en)", "Opera 9.2, Windows Vista"),
+              ("Opera/9.00 (Windows NT 5.1; U; en)", "Opera 9.0, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.50", "Opera 8.5, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 8.0", "Opera 8.0, Windows XP"),
+              ("Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Opera 7.02 [en]", "Opera 7.02, Windows XP"),
+              ("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1", "Netscape 8.1, Windows XP")]
+
+class BodyReceiver(protocol.Protocol):
+    def __init__(self, finished):
+        self.finished = finished
+        self.data = ""
+
+    def dataReceived(self, bytes):
+        self.data += bytes
+
+    def connectionLost(self, reason):
+        self.finished.callback(self.data)
+
+
+from twisted.web.http_headers import Headers
+class HTTPTest(OONITest):
+    """
+    A utility class for dealing with HTTP based testing. It provides methods to
+    be overriden for dealing with HTTP based testing.
+    The main functions to look at are processResponseBody and
+    processResponseHeader that are invoked once the headers have been received
+    and once the request body has been received.
+    """
+    randomize_ua = True
+
+    def initialize(self, reactor=None):
+        from twisted.web.client import Agent
+        import yaml
+
+        if not self.reactor:
+            from twisted.internet import reactor
+            self.reactor = reactor
+        self.agent = Agent(self.reactor)
+        self.request = {}
+        self.response = {}
+
+    def _processResponseBody(self, data):
+        self.response['body'] = data
+        self.result['response'] = self.response
+        self.processResponseBody(data)
+
+    def processResponseBody(self, data):
+        """
+        This should handle all the response body smushing for getting it ready
+        to be passed onto the control.
+
+        @param data: The content of the body returned.
+        """
+        pass
+
+    def processResponseHeaders(self, headers):
+        """
+        This should take care of dealing with the returned HTTP headers.
+
+        @param headers: The content of the returned headers.
+        """
+        pass
+
+    def experiment(self, args):
+        log.msg("Running experiment")
+        d = self.build_request(args['url'])
+        def finished(data):
+            return data
+
+        d.addCallback(self._cbResponse)
+        d.addCallback(finished)
+        return d
+
+    def _cbResponse(self, response):
+        self.response['headers'] = list(response.headers.getAllRawHeaders())
+        self.processResponseHeaders(self.response['headers'])
+        finished = defer.Deferred()
+        response.deliverBody(BodyReceiver(finished))
+        finished.addCallback(self._processResponseBody)
+
+    def randomize_useragent(self):
+        user_agent = random.choice(useragents)
+        self.request['headers']['User-Agent'] = [user_agent]
+
+    def build_request(self, url, method="GET", headers=None, body=None):
+        self.request['method'] = method
+        self.request['url'] = url
+        self.request['headers'] = headers if headers else {}
+        self.request['body'] = body
+        if self.randomize_ua:
+            self.randomize_useragent()
+
+        self.result['request'] = self.request
+        return self.agent.request(self.request['method'], self.request['url'],
+                                  Headers(self.request['headers']),
+                                  self.request['body'])
+
+    def load_assets(self):
+        if self.local_options:
+            return {'url': Asset(self.local_options['asset'])}
+        else:
+            return {}
+





More information about the tor-commits mailing list