commit b56ab41246a832b7cc5cc4bddc5eaa8f4c83c2cf Author: Isis Lovecruft isis@torproject.org Date: Sat May 18 15:58:49 2013 +0000
Remove oonib report archival script. --- scripts/archive_reports.py | 174 -------------------------------------------- 1 file changed, 174 deletions(-)
diff --git a/scripts/archive_reports.py b/scripts/archive_reports.py deleted file mode 100755 index c49bd7a..0000000 --- a/scripts/archive_reports.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python -import yaml -import sys -import glob -import fcntl -import os -import re -from ipaddr import IPAddress -from datetime import timedelta -from datetime import datetime -from ooni.otime import fromTimestamp, timestamp -from ooni.otime import InvalidTimestampFormat, utcDateNow -from ooni.utils import log - -############################################################################### -# You can set some config options here # -############################################################################### -report_age = 1 # hours -report_archive_dir = '/home/user/oonib/reports/archived' -report_source_dir = '/home/user/oonib/reports' -valid_test_versions = ['0.1', '0.1.1', '0.4', '0.1.3'] -default_probe_cc = '??' -target_permission = 0444 -path_permission = 0755 -retry_attempts = 100 -############################################################################### - -now = utcDateNow() -delta = timedelta(hours=report_age) - -def filter_reports_by_age(report): - try: - ts,__,__ = os.path.basename(report).split('_') - if now - fromTimestamp(ts) > delta: - return True - except (InvalidTimestampFormat, ValueError): - return False - -class InvalidReportField(Exception): - pass - -def validate_fields(fields): - log.debug("Report fields are: %s" % fields) - - # check report version - if fields['test_version'] not in valid_test_versions: - raise InvalidReportField('test_version') - - # check report CC - #XXX: confirm what value we use for default CC and whether - # or not we should support > 2 character CC - if fields['probe_cc'] is None: - fields['probe_cc'] = default_probe_cc - if not re.match('[A-Z?]{2,4}', fields['probe_cc'].upper()): - raise InvalidReportField('probe_cc') - - # check report ASN - if fields['probe_asn'] is None: - fields['probe_asn'] = 'AS0' - if not re.match('^AS[0-9]{1,10}', fields['probe_asn'].upper()): - raise InvalidReportField('probe_asn') - - # check report timestamp - try: - datetime_ts = datetime.fromtimestamp(fields['start_time']) - datetime_str = timestamp(datetime_ts) - except InvalidTimestampFormat: - raise InvalidReportField('start_time') - - # check report IP - try: - IPAddress(fields['probe_ip']) - except ValueError: - raise InvalidReportField('probe_ip') - - # all looks good! - -def get_report_header_fields(report_header): - required_fields = ['probe_asn', 'probe_cc', 'probe_ip', 'start_time', - 'test_name', 'test_version'] - try: - return dict([(k,report_header[k]) for k in required_fields ]) - except KeyError: - return None - -def get_test_name(fields): - test_name = fields['test_name'].lower().replace(' ', '_') - return test_name - -def get_target_or_fail(fields, report): - # set the target filename - reportFormatVersion = fields['test_version'] - CC = fields['probe_cc'] - # XXX: wouldn't hurt to check timestamp for sanity again? - dateInISO8601Format,__,__ = os.path.basename(report).split('_') - probeASNumber = fields['probe_asn'] - testName = get_test_name(fields) - - # make sure path reportFormatVersion/CC exists - path = os.path.abspath(report_archive_dir) - for component in [reportFormatVersion, CC]: - path = os.path.join(path, component) - if not os.path.isdir(path): - try: - os.mkdir(path, path_permission) - log.debug("mkdir path: %s" % path) - except OSError: - return None - - # if the target file already exists, try to find another filename - filename = "%s-%s-%s.yamloo" % (testName, dateInISO8601Format, probeASNumber) - target = os.path.join(path, filename) - - # try to get a unique filename. os.open as used below requires - # that the file not already exist - naming_attempts = 1 - while os.path.exists(target) and naming_attempts < retry_attempts: - filename = "%s-%s-%s.%d.yamloo" % (testName, dateInISO8601Format, - probeASNumber, naming_attempts) - target = os.path.join(path, filename) - naming_attempts = naming_attempts + 1 - - if naming_attempts >= retry_attempts: - log.err("Failed getting unique filename %d times; skipping" % i) - return None - return target - -# grab list of reports -reports = glob.glob(report_source_dir+'/*') -reports_to_archive = filter(filter_reports_by_age, reports) - -# iterate over the reports to archive -for report in reports_to_archive: - log.debug("Parsing report: %s" % report) - try: - #XXX: verify that os.fdopen works as expected - f = os.fdopen(os.open(report, os.O_RDONLY|os.O_EXCL|os.O_NONBLOCK)) - except IOError: - log.err("Unable to get exclusive lock on %s; skipping" % report) - continue - - # parse the header and validate it - yamloo = yaml.safe_load_all(f) - report_header = yamloo.next() - fields = get_report_header_fields(report_header) - try: - validate_fields(fields) - except InvalidReportField, field_name: - log.err("Report %s contains invalid field called %s" % (report, field_name)) - continue - except: - log.err("An unhandled error occurred while processing %s" % report) - continue - - # get a target filename or fail - target = get_target_or_fail(fields, report) - if not target: - continue - - log.debug("target: %s" % target) - - try: - #XXX: My system does not have os.O_EXLOCK. Verify this works as is. - g = os.fdopen(os.open(target, os.O_CREAT|os.O_EXCL|os.O_NONBLOCK)) - - os.rename(report, target) - os.chmod(target, target_permission) - f.close() - g.close() - - except IOError: - # unable to lock the file... still held open? - log.err("Failed to lock target file. Possible race condition!") - continue