[tor-commits] [oonib/master] Do not perform the conversions to JSON in the backend, but only write to JSON files when it's of file type JSON

art at torproject.org art at torproject.org
Mon May 9 17:57:02 UTC 2016


commit 1f2ec237404c7bc5fa0f5d6d8375256e07bda9f8
Author: Arturo Filastò <arturo at filasto.net>
Date:   Wed Jan 27 17:48:25 2016 +0100

    Do not perform the conversions to JSON in the backend, but only write to JSON files when it's of file type JSON
---
 oonib/__init__.py        |  22 +-------
 oonib/report/handlers.py | 128 ++++++++++++++++++++++-------------------------
 2 files changed, 61 insertions(+), 89 deletions(-)

diff --git a/oonib/__init__.py b/oonib/__init__.py
index 773f2db..2593f7d 100644
--- a/oonib/__init__.py
+++ b/oonib/__init__.py
@@ -61,25 +61,5 @@ def randomStr(length, num=True):
         chars += string.digits
     return ''.join(random.choice(chars) for x in range(length))
 
-def binary_to_base64_dict(data):
-    from base64 import b64encode
-    return {
-        "data": b64encode(data),
-        "format": "base64"
-    }
-
 def json_dumps(obj):
-    def _default(o):
-        if isinstance(o, set):
-            return list(o)
-        elif isinstance(o, str):
-            try:
-                o = unicode(o, 'ascii')
-            except UnicodeDecodeError:
-                try:
-                    o = unicode(o, 'utf-8')
-                except UnicodeDecodeError:
-                    o = binary_to_base64_dict(o)
-            return o
-
-    return json.dumps(obj, default=_default)
+    return json.dumps(obj)
diff --git a/oonib/report/handlers.py b/oonib/report/handlers.py
index 7671723..c3af0c6 100644
--- a/oonib/report/handlers.py
+++ b/oonib/report/handlers.py
@@ -18,6 +18,13 @@ from oonib.config import config
 
 def report_file_name(archive_dir, report_details):
     timestamp = datetime.fromtimestamp(report_details['start_time'])
+    if report_details['format'] == 'json':
+        ext = 'json'
+    elif report_details['format'] == 'yaml':
+        ext = 'yaml'
+    else:
+        ext = 'invalid'
+
     keys = dict(
         report_details.items(),
         iso8601_timestamp=otime.timestamp(timestamp),
@@ -26,9 +33,10 @@ def report_file_name(archive_dir, report_details):
         day=timestamp.strftime("%d"),
         hour=timestamp.strftime("%H"),
         minute=timestamp.strftime("%M"),
-        second=timestamp.strftime("%S")
+        second=timestamp.strftime("%S"),
+        ext=ext
     )
-    report_file_template = "{iso8601_timestamp}-{test_name}-{probe_asn}-{probe_cc}-probe-0.2.0.json"
+    report_file_template = "{iso8601_timestamp}-{test_name}-{probe_asn}-{probe_cc}-probe-0.2.0.{ext}"
     if config.main.report_file_template:
         report_file_template = config.main.report_file_template
     dst_filename = os.path.join(archive_dir, report_file_template.format(**keys))
@@ -45,9 +53,12 @@ class Report(object):
                  stale_time,
                  report_dir,
                  archive_dir,
-                 reports):
+                 reports,
+                 report_details):
         self.report_id = report_id
 
+        self.report_details = report_details
+
         self.stale_time = stale_time
         self.report_dir = report_dir
         self.archive_dir = archive_dir
@@ -75,13 +86,12 @@ class Report(object):
 
         report_filename = get_report_path(self.report_id)
         try:
-            with open(report_filename) as fd:
-                line = fd.readline()
-                report_details = json.loads(line.strip())
+            open(report_filename).close()
         except IOError:
             raise e.ReportNotFound
 
-        dst_filename = report_file_name(self.archive_dir, report_details)
+        dst_filename = report_file_name(self.archive_dir,
+                                        self.report_details)
         shutil.move(report_filename, dst_filename)
 
         if not self.delayed_call.called:
@@ -119,6 +129,7 @@ def parseNewReportRequest(request):
     version_string = re.compile("[0-9A-Za-z_\-\.]+$")
     name = re.compile("[a-zA-Z0-9_\- ]+$")
     probe_asn = re.compile("AS[0-9]+$")
+    probe_cc = re.compile("[A-Z]{2}$")
     test_helper = re.compile("[A-Za-z0-9_\-]+$")
 
     expected_request = {
@@ -139,7 +150,7 @@ def parseNewReportRequest(request):
         except KeyError:
             raise e.MissingField(k)
 
-        print "Matching %s with %s | %s" % (regexp, value_to_check, k)
+        log.debug("Matching %s with %s | %s" % (regexp, value_to_check, k))
         if re.match(regexp, str(value_to_check)):
             continue
         else:
@@ -152,39 +163,34 @@ def parseNewReportRequest(request):
     except KeyError:
         pass
 
-    parsed_request['format'] = parsed_request.get('format', 'yaml')
-
-    return parsed_request
-
 
-def validate_report_header(report_header):
-    required_keys = ['probe_asn', 'probe_cc', 'probe_ip', 'software_name',
-                     'software_version', 'test_name', 'test_version']
-    for key in required_keys:
-        if key not in report_header:
-            raise e.MissingReportHeaderKey(key)
-
-    if report_header['probe_asn'] is None:
-        report_header['probe_asn'] = 'AS0'
-
-    if not re.match('AS[0-9]+$', report_header['probe_asn']):
-        raise e.InvalidReportHeader('probe_asn')
-
-    # If no country is known, set it to be ZZ (user assigned value in ISO 3166)
-    if report_header['probe_cc'] is None:
-        report_header['probe_cc'] = 'ZZ'
-
-    if not re.match('[a-zA-Z]{2}$', report_header['probe_cc']):
-        raise e.InvalidReportHeader('probe_cc')
+    if 'start_time' not in parsed_request:
+        try:
+            header = yaml.safe_load(parsed_request['content'])
+            parsed_request['start_time'] = header['start_time']
+        except Exception as exc:
+            log.exception(exc)
+            raise e.InvalidRequestField("start_time")
 
-    if not re.match('[a-z_\-]+$', report_header['test_name']):
-        raise e.InvalidReportHeader('test_name')
+    try:
+        parsed_request['start_time'] = float(header['start_time'])
+    except ValueError as exc:
+        log.exception(exc)
+        raise e.InvalidRequestField("start_time")
 
-    if not re.match('([0-9]+\.)+[0-9]+$', report_header['test_version']):
-        raise e.InvalidReportHeader('test_version')
+    if 'probe_cc' not in parsed_request:
+        try:
+            header = yaml.safe_load(parsed_request['content'])
+            parsed_request['probe_cc'] = header['probe_cc']
+            if not re.match(probe_cc, parsed_request['probe_cc']):
+                raise Exception("Does not match the regexp")
+        except Exception as exc:
+            log.exception(exc)
+            raise e.InvalidRequestField("probe_cc")
 
-    return report_header
+    parsed_request['format'] = parsed_request.get('format', 'yaml')
 
+    return parsed_request
 
 class ReportHandler(OONIBHandler):
 
@@ -211,21 +217,14 @@ class UpdateReportMixin(object):
         content_format = parsed_request.get('format', 'yaml')
         if content_format == 'json':
             data = json_dumps(parsed_request['content'])
+            data += "\n"
         elif content_format == 'yaml':
-            try:
-                entry = yaml.safe_load_all(parsed_request['content']).next()
-                data = json_dumps(entry)
-            except Exception as exc:
-                log.error("Received an invalid entry")
-                log.msg(parsed_request['content'])
-                log.exception(exc)
-                raise e.OONIBError(400, "Invalid report entry")
+            data = parsed_request['content']
         else:
             raise e.InvalidFormatField
         try:
             with open(report_filename, 'a+') as fd:
                 fd.write(data)
-                fd.write("\n")
         except IOError:
             raise e.OONIBError(404, "Report not found")
         self.write({'status': 'success'})
@@ -288,12 +287,6 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
 
         log.debug("Parsed this data %s" % report_data)
 
-        software_name = str(report_data['software_name'])
-        software_version = str(report_data['software_version'])
-
-        probe_asn = str(report_data['probe_asn'])
-        probe_cc = str(report_data.get('probe_cc', 'ZZ'))
-
         self.testName = str(report_data['test_name'])
         self.testVersion = str(report_data['test_version'])
 
@@ -304,22 +297,21 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
                 raise e.InputHashNotProvided
             self.checkPolicy()
 
+        data = None
         if 'content' in report_data:
             if report_data['format'] == 'json':
                 content = report_data['content']
+                content['backend_version'] = config.backend_version
+                data = json_dumps(content)
             elif report_data['format'] == 'yaml':
-                content = None
+                content = yaml.safe_load(report_data['content'])
+                content['backend_version'] = config.backend_version
+                data = "---\n" + yaml.dump(content) + "...\n"
             else:
                 raise e.InvalidFormatField
-            content['backend_version'] = config.backend_version
-        else:
-            content = None
-
-        if not probe_asn:
-            probe_asn = "AS0"
 
         report_id = otime.timestamp() + '_' \
-            + probe_asn + '_' \
+            + report_data.get('probe_asn', 'AS0') + '_' \
             + randomStr(50)
 
         # The report filename contains the timestamp of the report plus a
@@ -340,15 +332,15 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
             except KeyError:
                 raise e.TestHelperNotFound
 
-        self.reports[report_id] = Report(report_id,
-                                         self.stale_time,
-                                         self.report_dir,
-                                         self.archive_dir,
-                                         self.reports)
-        if content:
-            # XXX make sure to validate the cotent to ensure it's possible to
-            # encode as JSON
-            data = json_dumps(content) + "\n"
+        self.reports[report_id] = Report(report_id=report_id,
+            stale_time=self.stale_time,
+            report_dir=self.report_dir,
+            archive_dir=self.archive_dir,
+            reports=self.reports,
+            report_details=report_data
+        )
+
+        if data is not None:
             self.writeToReport(report_filename, data)
         else:
             open(report_filename, 'w+').close()





More information about the tor-commits mailing list