[tor-commits] [ooni-probe/master] Improve oonib

art at torproject.org art at torproject.org
Wed Nov 28 14:06:29 UTC 2012


commit 19d533e4d1a473365d9794834cf4dea0a5a931a1
Author: Arturo Filastò <art at fuffa.org>
Date:   Wed Nov 28 14:52:31 2012 +0100

    Improve oonib
    * More robust error handling
    * Add docstrings on not implemented collector parts
    * Refactor ooni reporting system
---
 ooni/__init__.py               |    6 +-
 oonib/__init__.py              |    2 +
 oonib/report/__init__.py       |    8 ++
 oonib/report/api.py            |  156 ++++------------------------------------
 oonib/report/file_collector.py |  145 +++++++++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 145 deletions(-)

diff --git a/ooni/__init__.py b/ooni/__init__.py
index bf98e16..36afc9a 100644
--- a/ooni/__init__.py
+++ b/ooni/__init__.py
@@ -11,9 +11,9 @@ from . import runner
 from . import templates
 from . import utils
 
+__author__ = "Arturo Filastò"
+__version__ = "0.0.7.1-alpha"
+
 __all__ = ['config', 'inputunit', 'kit',
            'lib', 'nettest', 'oonicli', 'reporter',
            'runner', 'templates', 'utils']
-
-__author__ = "Arturo Filastò"
-__version__ = "0.0.7.1-alpha"
diff --git a/oonib/__init__.py b/oonib/__init__.py
index 1a853dd..ab7419c 100644
--- a/oonib/__init__.py
+++ b/oonib/__init__.py
@@ -14,6 +14,8 @@ from storm.uri import URI
 from storm.twisted.transact import Transactor
 from storm.databases.sqlite import SQLite
 
+__version__ = '0.0.1'
+
 from oonib import config
 
 database = SQLite(URI(config.main.database_uri))
diff --git a/oonib/report/__init__.py b/oonib/report/__init__.py
index e69de29..96667b4 100644
--- a/oonib/report/__init__.py
+++ b/oonib/report/__init__.py
@@ -0,0 +1,8 @@
+def generateReportID():
+    return otime.timestamp() + '_' + randomStr(20)
+
+class MissingField(Exception):
+    pass
+
+class InvalidRequestField(Exception):
+    pass
diff --git a/oonib/report/api.py b/oonib/report/api.py
index 5668857..9c42a54 100644
--- a/oonib/report/api.py
+++ b/oonib/report/api.py
@@ -1,7 +1,7 @@
 """
-/report/do
+/new
 
-/report/pcap
+/pcap
 
 This is the async pcap reporting system. It requires the client to have created a report already, but can work independently from test progress.
 
@@ -18,47 +18,9 @@ from cyclone import web
 
 from ooni.utils import randomStr, otime
 from oonib import models, config
-
-backend_version = '0.0.1'
-
-def generateReportID():
-    return otime.timestamp() + '_' + randomStr(20)
-
-class MissingField(Exception):
-    pass
-
-class InvalidRequestField(Exception):
-    pass
-
-def parseNewReportRequest(request):
-    """
-    Here we parse a new report request.
-    """
-    version_string = re.compile("[0-9A-Za-z_\-\.]+$")
-    name = re.compile("[a-zA-Z0-9_\- ]+$")
-    expected_request = {'software_name': name,
-     'software_version': version_string,
-     'test_name': name,
-     'test_version': version_string,
-     'progress': re.compile("[0-9]+$")
-    }
-    parsed_request = json.loads(request)
-    for k, regexp in expected_request.items():
-        try:
-            value_to_check = parsed_request[k]
-        except KeyError:
-            raise MissingField(k)
-        print "Matching %s with %s | %s" % (regexp, value_to_check, k)
-        if re.match(regexp, str(value_to_check)):
-            continue
-        else:
-            raise InvalidRequestField(k)
-    return parsed_request
+from oonib.report import file_collector
 
 def parseUpdateReportRequest(request):
-    # XXX this and the function above can probably be refactored into something
-    # more compact. There is quite a bit of code duplication going on here.
-
     #db_report_id_regexp = re.compile("[a-zA-Z0-9]+$")
 
     # this is the regexp for the reports that include the timestamp
@@ -80,92 +42,6 @@ def parseUpdateReportRequest(request):
 
     return parsed_request
 
-class NewReportHandlerFile(web.RequestHandler):
-    """
-    Responsible for creating and updating reports by writing to flat file.
-    """
-    def post(self):
-        """
-        Creates a new report with the input
-
-        * Request
-
-          {'software_name': 'XXX',
-           'software_version': 'XXX',
-           'test_name': 'XXX',
-           'test_version': 'XXX',
-           'progress': 'XXX',
-           'content': 'XXX'
-           }
-
-          Optional:
-            'test_helper': 'XXX'
-            'client_ip': 'XXX'
-
-        * Response
-
-          {'backend_version': 'XXX', 'report_id': 'XXX'}
-
-        """
-        # XXX here we should validate and sanitize the request
-        try:
-            report_data = parseNewReportRequest(self.request.body)
-        except InvalidRequestField, e:
-            raise web.HTTPError(400, "Invalid Request Field %s" % e)
-        except MissingField, e:
-            raise web.HTTPError(400, "Missing Request Field %s" % e)
-
-        print "Parsed this data %s" % report_data
-        software_name = report_data['software_name']
-        software_version = report_data['software_version']
-        test_name = report_data['test_name']
-        test_version = report_data['test_version']
-        content = report_data['content']
-
-        report_id = generateReportID()
-
-        #report_filename = '_'.join((report_id,
-        #    report_data['software_name'],
-        #    report_data['software_version'],
-        #    report_data['test_name'],
-        #    report_data['test_version']))
-
-        # The report filename contains the timestamp of the report plus a
-        # random nonce
-        report_filename = os.path.join(config.main.report_dir, report_id)
-        report_filename += '.yamloo'
-
-        response = {'backend_version': backend_version, 
-                'report_id': report_id
-                }
-
-        fp = open(report_filename, 'w+')
-        fp.write(report_data['content'])
-        fp.close()
-        self.write(response)
-
-    def put(self):
-        """
-        Update an already existing report.
-
-          {'report_id': 'XXX',
-           'content': 'XXX'
-          }
-        """
-        parsed_request = parseUpdateReportRequest(self.request.body)
-        report_id = parsed_request['report_id']
-        print "Got this request %s" % parsed_request
-
-        report_filename = os.path.join(config.main.report_dir, report_id)
-        report_filename += '.yamloo'
-        try:
-            with open(report_filename, 'a+') as f: 
-                # XXX this could be quite big. We should probably use the
-                # twisted.internet.fdesc module
-                print parsed_request['content']
-                f.write(parsed_request['content'])
-        except IOError as e:
-            web.HTTPError(404, "Report not found")
 
 class NewReportHandlerDB(web.RequestHandler):
     """
@@ -177,7 +53,8 @@ class NewReportHandlerDB(web.RequestHandler):
     @defer.inlineCallbacks
     def post(self):
         """
-        Creates a new report with the input
+        Creates a new report with the input to the database.
+        XXX this is not yet implemented.
 
         * Request
 
@@ -193,23 +70,23 @@ class NewReportHandlerDB(web.RequestHandler):
             'test_helper': 'XXX'
             'client_ip': 'XXX'
 
-        * Response
+          * Response
 
           {'backend_version': 'XXX', 'report_id': 'XXX'}
 
         """
-        parsed_request = json.loads(self.request.body)
-        # XXX here we should validate and sanitize the request
-        report_data = parsed_request
+        report_data = json.loads(self.request.body)
         new_report = models.Report()
-        print "Got %s as request" % parsed_request
+        log.debug("Got this request %s" % report_data)
         result = yield new_report.new(report_data)
         self.write(result)
         self.finish()
 
     def put(self):
         """
-        Update an already existing report.
+        Update an already existing report with the database.
+
+        XXX this is not yet implemented.
 
           {'report_id': 'XXX',
            'content': 'XXX'
@@ -217,15 +94,10 @@ class NewReportHandlerDB(web.RequestHandler):
         """
         pass
 
-class PCAPReportHandler(web.RequestHandler):
-    def get(self):
-        pass
-
-    def post(self):
-        pass
 
-reportingBackendAPI = [(r"/report/new", NewReportHandlerFile),
-    (r"/report/pcap", PCAPReportHandler)
+reportingBackendAPI = [
+    (r"/report", file_collector.NewReportHandlerFile),
+    (r"/pcap", file_collector.PCAPReportHandler)
 ]
 
 reportingBackend = web.Application(reportingBackendAPI, debug=True)
diff --git a/oonib/report/file_collector.py b/oonib/report/file_collector.py
new file mode 100644
index 0000000..9896784
--- /dev/null
+++ b/oonib/report/file_collector.py
@@ -0,0 +1,145 @@
+import random
+import string
+import json
+import re
+import os
+
+from oonib.report import generateReportID
+from oonib.report import MissingField, InvalidRequestField
+
+from cyclone import web
+
+def parseNewReportRequest(request):
+    """
+    Here we parse a new report request.
+    """
+    version_string = re.compile("[0-9A-Za-z_\-\.]+$")
+    name = re.compile("[a-zA-Z0-9_\- ]+$")
+
+    expected_request = {'software_name': name,
+     'software_version': version_string,
+     'test_name': name,
+     'test_version': version_string
+    }
+
+    parsed_request = json.loads(request)
+    for k, regexp in expected_request.items():
+        try:
+            value_to_check = parsed_request[k]
+        except KeyError:
+            raise MissingField(k)
+        print "Matching %s with %s | %s" % (regexp, value_to_check, k)
+        if re.match(regexp, str(value_to_check)):
+            continue
+        else:
+            raise InvalidRequestField(k)
+    return parsed_request
+
+class NewReportHandlerFile(web.RequestHandler):
+    """
+    Responsible for creating and updating reports by writing to flat file.
+    """
+    def post(self):
+        """
+        Creates a new report with the input
+
+        * Request
+
+          {'software_name': 'XXX',
+           'software_version': 'XXX',
+           'test_name': 'XXX',
+           'test_version': 'XXX',
+           'progress': 'XXX',
+           'content': 'XXX'
+           }
+
+          Optional:
+            'test_helper': 'XXX'
+            'client_ip': 'XXX'
+
+          (not implemented, nor in client, nor in backend)
+          The idea behind these two fields is that it would be interesting to
+          also collect how the request was observed from the collectors point
+          of view.
+
+          We use as a unique key the client_ip address and a time window. We
+          then need to tell the test_helper that is selected the client_ip
+          address and tell it to expect a connection from a probe in that time
+          window.
+
+          Once the test_helper sees a connection from that client_ip it will
+          store for the testing session the data that it receives.
+          When the probe completes the report (or the time window is over) the
+          final report will include also the data collected from the
+          collectors view point.
+
+        * Response
+
+          {'backend_version': 'XXX', 'report_id': 'XXX'}
+
+        """
+        # XXX here we should validate and sanitize the request
+        try:
+            report_data = parseNewReportRequest(self.request.body)
+        except InvalidRequestField, e:
+            raise web.HTTPError(400, "Invalid Request Field %s" % e)
+        except MissingField, e:
+            raise web.HTTPError(400, "Missing Request Field %s" % e)
+
+        print "Parsed this data %s" % report_data
+        software_name = report_data['software_name']
+        software_version = report_data['software_version']
+        test_name = report_data['test_name']
+        test_version = report_data['test_version']
+        content = report_data['content']
+
+        report_id = generateReportID()
+
+        #report_filename = '_'.join((report_id,
+        #    report_data['software_name'],
+        #    report_data['software_version'],
+        #    report_data['test_name'],
+        #    report_data['test_version']))
+
+        # The report filename contains the timestamp of the report plus a
+        # random nonce
+        report_filename = os.path.join(config.main.report_dir, report_id)
+        report_filename += '.yamloo'
+
+        response = {'backend_version': backend_version,
+                'report_id': report_id
+                }
+
+        fp = open(report_filename, 'w+')
+        fp.write(report_data['content'])
+        fp.close()
+        self.write(response)
+
+    def put(self):
+        """
+        Update an already existing report.
+
+          {'report_id': 'XXX',
+           'content': 'XXX'
+          }
+        """
+        parsed_request = parseUpdateReportRequest(self.request.body)
+        report_id = parsed_request['report_id']
+        print "Got this request %s" % parsed_request
+
+        report_filename = os.path.join(config.main.report_dir, report_id)
+        report_filename += '.yamloo'
+        try:
+            with open(report_filename, 'a+') as f:
+                # XXX-Twisted change this to use t.i.a.fdesc and perhaps make a
+                # nice little object for it.
+                f.write(parsed_request['content'])
+        except IOError as e:
+            web.HTTPError(404, "Report not found")
+
+class PCAPReportHandler(web.RequestHandler):
+    def get(self):
+        pass
+
+    def post(self):
+        pass



More information about the tor-commits mailing list