commit 973297a0ad8f5093ec9c08954a4d8419f50bd5d7
Author: Arturo Filastò <arturo(a)filasto.net>
Date: Wed Jan 27 16:52:46 2016 +0100
Do the conversion of YAML to JSON on the submission stage
---
oonib/__init__.py | 24 ++++++++++++++
oonib/errors.py | 4 +++
oonib/onion.py | 2 +-
oonib/otime.py | 2 +-
oonib/report/handlers.py | 84 ++++++++++++++++++++++--------------------------
5 files changed, 69 insertions(+), 47 deletions(-)
diff --git a/oonib/__init__.py b/oonib/__init__.py
index ba6dea3..773f2db 100644
--- a/oonib/__init__.py
+++ b/oonib/__init__.py
@@ -7,6 +7,7 @@ __version__ = '1.1.4'
__all__ = ['Storage', 'randomStr']
+import json
import string
from random import SystemRandom
random = SystemRandom()
@@ -59,3 +60,26 @@ def randomStr(length, num=True):
if num:
chars += string.digits
return ''.join(random.choice(chars) for x in range(length))
+
+def binary_to_base64_dict(data):
+ from base64 import b64encode
+ return {
+ "data": b64encode(data),
+ "format": "base64"
+ }
+
+def json_dumps(obj):
+ def _default(o):
+ if isinstance(o, set):
+ return list(o)
+ elif isinstance(o, str):
+ try:
+ o = unicode(o, 'ascii')
+ except UnicodeDecodeError:
+ try:
+ o = unicode(o, 'utf-8')
+ except UnicodeDecodeError:
+ o = binary_to_base64_dict(o)
+ return o
+
+ return json.dumps(obj, default=_default)
diff --git a/oonib/errors.py b/oonib/errors.py
index 6d97190..b82f8fe 100644
--- a/oonib/errors.py
+++ b/oonib/errors.py
@@ -111,6 +111,10 @@ class TestHelperNotFound(OONIBError):
log_message = "test-helper-not-found"
+class InvalidFormatField(OONIBError):
+ status_code = 400
+ log_message = "invalid-format-field"
+
class ConfigFileNotSpecified(Exception):
pass
diff --git a/oonib/onion.py b/oonib/onion.py
index 9c94d02..c06dc6b 100644
--- a/oonib/onion.py
+++ b/oonib/onion.py
@@ -73,7 +73,7 @@ def startTor(torconfig):
if os.path.exists(config.main.tor_datadir):
torconfig.DataDirectory = os.path.abspath(config.main.tor_datadir)
else:
- raise Exception
+ raise Exception("Could not find tor datadir")
tor_log_file = os.path.join(torconfig.DataDirectory, "tor.log")
torconfig.Log = ["notice stdout", "notice file %s" % tor_log_file]
diff --git a/oonib/otime.py b/oonib/otime.py
index 67a6bc6..31f0cc7 100644
--- a/oonib/otime.py
+++ b/oonib/otime.py
@@ -95,5 +95,5 @@ def timestamp(t=None):
"""
if t is None:
t = datetime.utcnow()
- ISO8601 = "%Y-%m-%dT%H%M%SZ"
+ ISO8601 = "%Y%m%dT%H%M%SZ"
return t.strftime(ISO8601)
diff --git a/oonib/report/handlers.py b/oonib/report/handlers.py
index 50e0cb7..7671723 100644
--- a/oonib/report/handlers.py
+++ b/oonib/report/handlers.py
@@ -12,17 +12,12 @@ from oonib.handlers import OONIBHandler
from oonib.policy.handlers import Policy
from datetime import datetime
-from oonib import randomStr, otime, log
+from oonib import randomStr, otime, log, json_dumps
from oonib.config import config
def report_file_name(archive_dir, report_details):
timestamp = datetime.fromtimestamp(report_details['start_time'])
- ext = report_details.get("format")
- if ext == "json":
- ext = "json"
- else:
- ext = "yamloo"
keys = dict(
report_details.items(),
iso8601_timestamp=otime.timestamp(timestamp),
@@ -31,10 +26,9 @@ def report_file_name(archive_dir, report_details):
day=timestamp.strftime("%d"),
hour=timestamp.strftime("%H"),
minute=timestamp.strftime("%M"),
- second=timestamp.strftime("%S"),
- ext=ext
+ second=timestamp.strftime("%S")
)
- report_file_template = "{probe_cc}/{test_name}-{iso8601_timestamp}-{probe_asn}-probe.{ext}"
+ report_file_template = "{iso8601_timestamp}-{test_name}-{probe_asn}-{probe_cc}-probe-0.2.0.json"
if config.main.report_file_template:
report_file_template = config.main.report_file_template
dst_filename = os.path.join(archive_dir, report_file_template.format(**keys))
@@ -51,14 +45,13 @@ class Report(object):
stale_time,
report_dir,
archive_dir,
- reports, file_format="yaml"):
+ reports):
self.report_id = report_id
self.stale_time = stale_time
self.report_dir = report_dir
self.archive_dir = archive_dir
self.reports = reports
- self.file_format = file_format
self.refresh()
@@ -83,24 +76,14 @@ class Report(object):
report_filename = get_report_path(self.report_id)
try:
with open(report_filename) as fd:
- if self.file_format == "json":
- line = fd.readline()
- json.loads(line)
- else:
- g = yaml.safe_load_all(fd)
- report_details = g.next()
+ line = fd.readline()
+ report_details = json.loads(line.strip())
except IOError:
raise e.ReportNotFound
dst_filename = report_file_name(self.archive_dir, report_details)
shutil.move(report_filename, dst_filename)
- if self.file_format == "json":
- report_details["record_type"] = "footer"
- with open(dst_filename, "a+") as fd:
- json.dump(report_details, dst_filename)
- fd.write("\n")
-
if not self.delayed_call.called:
self.delayed_call.cancel()
del self.reports[self.report_id]
@@ -169,6 +152,8 @@ def parseNewReportRequest(request):
except KeyError:
pass
+ parsed_request['format'] = parsed_request.get('format', 'yaml')
+
return parsed_request
@@ -213,7 +198,6 @@ class ReportHandler(OONIBHandler):
class UpdateReportMixin(object):
-
def updateReport(self, report_id, parsed_request):
log.debug("Got this request %s" % parsed_request)
@@ -224,9 +208,24 @@ class UpdateReportMixin(object):
except KeyError:
raise e.OONIBError(404, "Report not found")
+ content_format = parsed_request.get('format', 'yaml')
+ if content_format == 'json':
+ data = json_dumps(parsed_request['content'])
+ elif content_format == 'yaml':
+ try:
+ entry = yaml.safe_load_all(parsed_request['content']).next()
+ data = json_dumps(entry)
+ except Exception as exc:
+ log.error("Received an invalid entry")
+ log.msg(parsed_request['content'])
+ log.exception(exc)
+ raise e.OONIBError(400, "Invalid report entry")
+ else:
+ raise e.InvalidFormatField
try:
with open(report_filename, 'a+') as fd:
- fd.write(parsed_request['content'])
+ fd.write(data)
+ fd.write("\n")
except IOError:
raise e.OONIBError(404, "Report not found")
self.write({'status': 'success'})
@@ -306,25 +305,15 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
self.checkPolicy()
if 'content' in report_data:
- content = yaml.safe_load(report_data['content'])
- report_header = validate_report_header(content)
-
+ if report_data['format'] == 'json':
+ content = report_data['content']
+ elif report_data['format'] == 'yaml':
+ content = None
+ else:
+ raise e.InvalidFormatField
+ content['backend_version'] = config.backend_version
else:
- content = {
- 'software_name': software_name,
- 'software_version': software_version,
- 'probe_asn': probe_asn,
- 'probe_cc': probe_cc,
- 'test_name': self.testName,
- 'test_version': self.testVersion,
- 'input_hashes': self.inputHashes,
- 'start_time': time.time()
- }
-
- content['backend_version'] = config.backend_version
-
- report_header = yaml.dump(content)
- content = "---\n" + report_header + '...\n'
+ content = None
if not probe_asn:
probe_asn = "AS0"
@@ -356,8 +345,13 @@ class NewReportHandlerFile(ReportHandler, UpdateReportMixin):
self.report_dir,
self.archive_dir,
self.reports)
-
- self.writeToReport(report_filename, content)
+ if content:
+ # XXX make sure to validate the cotent to ensure it's possible to
+ # encode as JSON
+ data = json_dumps(content) + "\n"
+ self.writeToReport(report_filename, data)
+ else:
+ open(report_filename, 'w+').close()
self.write(response)