September 2016 - tor-commits - lists.torproject.org

[ooni-probe/master] Use NGDeck also for the command line version of ooniprobe
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 76cc0d4989de0a777fa62f4de5b60f07e0701da7 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 26 02:05:58 2016 +0200 Use NGDeck also for the command line version of ooniprobe * Remove code that is now dead. --- bin/Makefile | 81 -------- bin/ooniprobe | 24 --- bin/ooniprobe-dev | 10 - bin/test/ooni/__init__.py | 1 - ooni/backend_client.py | 95 +-----… [View More]--- ooni/deck.py | 356 +++++++++++++++++++++------------ ooni/director.py | 2 - ooni/geoip.py | 6 +- ooni/reporter.py | 2 +- ooni/scripts/oonideckgen.py | 2 +- ooni/scripts/ooniprobe.py | 19 ++ ooni/scripts/oonireport.py | 2 +- ooni/templates/tcpt.py | 1 - ooni/tests/mocks.py | 2 +- ooni/tests/test_backend_client.py | 16 -- ooni/tests/test_deck.py | 243 +++++++++++----------- ooni/ui/cli.py | 124 +++++------- ooni/ui/web/client/index.html | 2 +- scripts/__init__.py | 0 scripts/set_caps/Makefile | 81 ++++++++ scripts/set_caps/__init__.py | 0 scripts/set_caps/test/__init__.py | 0 scripts/set_caps/test/ooni/__init__.py | 1 + 23 files changed, 517 insertions(+), 553 deletions(-) diff --git a/bin/Makefile b/bin/Makefile deleted file mode 100644 index bc80843..0000000 --- a/bin/Makefile +++ /dev/null @@ -1,81 +0,0 @@ -# Wrappers for running ooniprobe as a non-root user. -# -# Build-Depends: cython, pythonX.Y-dev, libcap2-bin -# Depends: libpythonX.Y -# -# $ make && make check -# $ sudo make install # after installing the rest of ooni-probe -# $ make installcheck_unsafe # runs complete tests as non-root -# -# `make` builds a program that has file capabilities set on it. This is just -# ./ooniprobe compiled into a C program using Cython, so that one can set -# capabilities directly on the resulting binary. This way, we avoid the need -# for a separate child python interpreter with its own capabilities. Another -# advantage is that libpython.so (needed by the program) would be automatically -# upgraded by the system package manager. The version of python is hard-coded -# into the wrapper at build time; making this dynamic is possible, but much -# more complex and not yet implemented. -# -# Execution may additionally be limited to a particular unix group by using -# chgrp(1) and chmod(1) to 'o-x,g+x' after installation. -# - -# GNU Makefile conventions, see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html -prefix = /usr/local -exec_prefix = $(prefix) -bindir = $(exec_prefix)/bin - -INSTALL = install -PYTHON = python -PYTHON_CONFIG = python-config -CYTHON = cython -SETCAP = setcap - -INSTALL_PROGRAM = $(INSTALL) -PY_CFLAGS = $(shell $(PYTHON_CONFIG) --cflags) -PY_LDFLAGS = $(shell $(PYTHON_CONFIG) --ldflags) - -BUILDDIR := ./build -SCRIPTDIR := . -TESTDIR := ./test -CAP_SCRIPT := ooniprobe -CAP_NEEDED := cap_net_admin,cap_net_raw - -# Unfortunately cython --embed ignores the arguments in the shebang line -# So we need to patch the generated code ourselves. -CYTHON_PRE_MAIN = extern int Py_IgnoreEnvironmentFlag; \ - Py_IgnoreEnvironmentFlag++; \ - extern int Py_NoUserSiteDirectory; \ - Py_NoUserSiteDirectory++; - -all: $(BUILDDIR)/$(CAP_SCRIPT) - -$(BUILDDIR)/$(CAP_SCRIPT): $(BUILDDIR)/$(CAP_SCRIPT).c Makefile - $(CC) $(PY_CFLAGS) $(PY_LDFLAGS) "$<" -o "$@" - -$(BUILDDIR)/$(CAP_SCRIPT).c: $(SCRIPTDIR)/$(CAP_SCRIPT) Makefile - mkdir -p "$(BUILDDIR)" - $(CYTHON) "$<" --embed=CYTHON_MAIN_SENTINEL -Werror -Wextra -o "$@" - sed -i \ - -e 's/$.*CYTHON_MAIN_SENTINEL.*{$/\1 $(CYTHON_PRE_MAIN)/g' \ - -e '/CYTHON_MAIN_SENTINEL[^{]*$$/,/{/s/{/{ $(CYTHON_PRE_MAIN)/g' \ - -e 's/CYTHON_MAIN_SENTINEL/main/g' "$@" - -check: $(BUILDDIR)/$(CAP_SCRIPT) - # test that setcapped binary ignores PYTHONPATH - BIN="$$(realpath "$<")" && cd "$(TESTDIR)" && PYTHONPATH=. $$BIN --version - -install: $(BUILDDIR)/$(CAP_SCRIPT) - mkdir -p "$(DESTDIR)$(bindir)" - $(INSTALL_PROGRAM) -t "$(DESTDIR)$(bindir)" "$(BUILDDIR)/$(CAP_SCRIPT)" - $(SETCAP) "$(CAP_NEEDED)"+eip "$(DESTDIR)$(bindir)/$(CAP_SCRIPT)" - -installcheck_unsafe: $(BUILDDIR)/$(CAP_SCRIPT) - # run a standard check. note that because of hardcoded paths (for security) - # this can only work after you've installed your development copy - "./$<" -i /usr/share/ooni/decks/complete.deck - -clean: - rm -rf "$(BUILDDIR)" - -.PHONY: clean all check install installcheck% diff --git a/bin/ooniprobe b/bin/ooniprobe deleted file mode 100755 index 0274899..0000000 --- a/bin/ooniprobe +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python -import sys - -from twisted.internet import reactor - -from ooni.ui.cli import runWithDaemonDirector, runWithDirector -from ooni.ui.cli import setupGlobalOptions - -exit_code=0 - -global_options = setupGlobalOptions(logging=True, start_tor=True, - check_incoherences=True) -if global_options['queue']: - d = runWithDaemonDirector(global_options) -else: - d = runWithDirector(global_options) -(a)d.addBoth -def cb(result): - global exit_code - if result is not None: - exit_code=1 - reactor.stop() -reactor.run() -sys.exit(exit_code) diff --git a/bin/ooniprobe-dev b/bin/ooniprobe-dev deleted file mode 100755 index c44a8a6..0000000 --- a/bin/ooniprobe-dev +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# Developer script for running ooniprobe directly from the repository. -# We don't automatically add "$PWD" to PYTHONPATH as that is a security risk -# when run as /usr/bin/ooniprobe on an end user's computer. -ROOTDIR=$(cd $(dirname $(dirname $0)) && pwd -P) -if [ $? -ne 0 ]; then - echo "$0: cannot determine toplevel directory" 1>&2 - exit 1 -fi -PYTHONPATH="$ROOTDIR" exec $ROOTDIR/bin/ooniprobe "$@" diff --git a/bin/test/ooni/__init__.py b/bin/test/ooni/__init__.py deleted file mode 100644 index f2cd353..0000000 --- a/bin/test/ooni/__init__.py +++ /dev/null @@ -1 +0,0 @@ -raise ValueError("test failed! wrapper did not ignore polluted PWD. either the wrapper is faulty, or ooni is still unpatched (Tor bug #13581)") diff --git a/ooni/backend_client.py b/ooni/backend_client.py index 506aba4..7721f6f 100644 --- a/ooni/backend_client.py +++ b/ooni/backend_client.py @@ -134,6 +134,8 @@ class OONIBClient(object): return finished def queryBackend(self, method, urn, query=None, retries=3): + log.debug("Querying backend {0}{1} with {2}".format(self.base_address, + urn, query)) bodyProducer = None if query: bodyProducer = StringProducer(json.dumps(query)) @@ -212,105 +214,12 @@ class CollectorClient(OONIBClient): return d - def getInput(self, input_hash): - from ooni.deck import InputFile - - input_file = InputFile(input_hash) - if input_file.descriptorCached: - return defer.succeed(input_file) - else: - d = self.queryBackend('GET', '/input/' + input_hash) - - @d.addCallback - def cb(descriptor): - input_file.load(descriptor) - input_file.save() - return input_file - - @d.addErrback - def err(err): - log.err("Failed to get descriptor for input %s" % input_hash) - log.exception(err) - - return d - - def getInputList(self): - return self.queryBackend('GET', '/input') - - def downloadInput(self, input_hash): - from ooni.deck import InputFile - - input_file = InputFile(input_hash) - - if input_file.fileCached: - return defer.succeed(input_file) - else: - d = self.download('/input/' + input_hash + '/file', input_file.cached_file) - - @d.addCallback - def cb(res): - input_file.verify() - return input_file - - @d.addErrback - def err(err): - log.err("Failed to download the input file %s" % input_hash) - log.exception(err) - - return d - def getInputPolicy(self): return self.queryBackend('GET', '/policy/input') def getNettestPolicy(self): return self.queryBackend('GET', '/policy/nettest') - def getDeckList(self): - return self.queryBackend('GET', '/deck') - - def getDeck(self, deck_hash): - from ooni.deck import Deck - - deck = Deck(deck_hash) - if deck.descriptorCached: - return defer.succeed(deck) - else: - d = self.queryBackend('GET', '/deck/' + deck_hash) - - @d.addCallback - def cb(descriptor): - deck.load(descriptor) - deck.save() - return deck - - @d.addErrback - def err(err): - log.err("Failed to get descriptor for deck %s" % deck_hash) - log.exception(err) - - return d - - def downloadDeck(self, deck_hash): - from ooni.deck import Deck - - deck = Deck(deck_hash) - if deck.fileCached: - return defer.succeed(deck) - else: - d = self.download('/deck/' + deck_hash + '/file', deck.cached_file) - - @d.addCallback - def cb(res): - deck.verify() - return deck - - @d.addErrback - def err(err): - log.err("Failed to download the deck %s" % deck_hash) - log.exception(err) - - return d - def createReport(self, test_details): request = { 'software_name': test_details['software_name'], diff --git a/ooni/deck.py b/ooni/deck.py index 1746d26..eacd256 100644 --- a/ooni/deck.py +++ b/ooni/deck.py @@ -24,59 +24,6 @@ from ooni.utils import log from ooni.results import generate_summary -class InputFile(object): - def __init__(self, input_hash, base_path=config.inputs_directory): - self.id = input_hash - cache_path = os.path.join(os.path.abspath(base_path), input_hash) - self.cached_file = cache_path - self.cached_descriptor = cache_path + '.desc' - - @property - def descriptorCached(self): - if os.path.exists(self.cached_descriptor): - with open(self.cached_descriptor) as f: - descriptor = json.load(f) - self.load(descriptor) - return True - return False - - @property - def fileCached(self): - if os.path.exists(self.cached_file): - try: - self.verify() - except AssertionError: - log.err("The input %s failed validation." - "Going to consider it not cached." % self.id) - return False - return True - return False - - def save(self): - with open(self.cached_descriptor, 'w+') as f: - json.dump({ - 'name': self.name, - 'id': self.id, - 'version': self.version, - 'author': self.author, - 'date': self.date, - 'description': self.description - }, f) - - def load(self, descriptor): - self.name = descriptor['name'] - self.version = descriptor['version'] - self.author = descriptor['author'] - self.date = descriptor['date'] - self.description = descriptor['description'] - - def verify(self): - digest = os.path.basename(self.cached_file) - with open(self.cached_file) as f: - file_hash = sha256(f.read()) - assert file_hash.hexdigest() == digest - - def nettest_to_path(path, allow_arbitrary_paths=False): """ Takes as input either a path or a nettest name. @@ -136,6 +83,7 @@ def get_preferred_bouncer(): else: return BouncerClient(bouncer_address) +<<<<<<< d0fb4f37530aeb6b69fbc2985019464f8ec10312 class Deck(InputFile): # this exists so we can mock it out in unittests _BouncerClient = BouncerClient @@ -274,6 +222,7 @@ class Deck(InputFile): i['test_options'][i['key']] = input_file.cached_file +(a)defer.inlineCallbacks def lookup_collector_and_test_helpers(net_test_loaders, bouncer, preferred_backend, @@ -329,19 +278,21 @@ def lookup_collector_and_test_helpers(net_test_loaders, net_test_loader.testName) collector, test_helpers = \ - find_collector_and_test_helpers(test_name=net_test_loader.testName, - test_version=net_test_loader.testVersion, - input_files=net_test_loader.inputFiles) + find_collector_and_test_helpers( + test_name=net_test_loader.testName, + test_version=net_test_loader.testVersion, + input_files=net_test_loader.inputFiles + ) for option, name in net_test_loader.missingTestHelpers: test_helper_address_or_settings = test_helpers[name] net_test_loader.localOptions[option] = test_helper_address_or_settings net_test_loader.testHelpers[option] = test_helper_address_or_settings - if not net_test_loader.collector: + if not net_test_loader.collector and not no_collector: + log.debug("Using collector {0}".format(collector)) net_test_loader.collector = collector - @defer.inlineCallbacks def get_reachable_test_helpers_and_collectors(net_tests, preferred_backend): for net_test in net_tests: @@ -579,10 +530,29 @@ def options_to_args(options, prepath=None): continue if k == "file": v = resolve_file_path(v, prepath) - args.append('--'+k) + if v == False or v == 0: + continue + if (len(k)) == 1: + args.append('-'+k) + else: + args.append('--'+k) + if isinstance(v, bool) or isinstance(v, int): + continue args.append(v) return args +def normalize_options(options): + """ + Takes some options that have a mixture of - and _ and returns the + equivalent options with only '_'. + """ + normalized_opts = {} + for k, v in options.items(): + normalized_key = k.replace('-', '_') + assert normalized_key not in normalized_opts, "The key {0} cannot be normalized".format(k) + normalized_opts[normalized_key] = v + return normalized_opts + class UnknownTaskKey(Exception): pass @@ -593,8 +563,14 @@ class DeckTask(object): _metadata_keys = ["name"] _supported_tasks = ["ooni"] - def __init__(self, data, parent_metadata={}, cwd=None): - self.parent_metadata = parent_metadata + def __init__(self, data, + parent_metadata={}, + global_options={}, + cwd=None, + arbitrary_paths=False): + + self.parent_metadata = normalize_options(parent_metadata) + self.global_options = global_options self.cwd = cwd self.data = deepcopy(data) @@ -605,10 +581,16 @@ class DeckTask(object): self.requires_tor = False self.requires_bouncer = False + # If this is set to true a deck can specify any path. It should only + # be run against trusted decks or when you create a deck + # programmaticaly to a run test specified from the command line. + self._arbitrary_paths = arbitrary_paths + self.ooni = { 'bouncer_client': None, 'test_details': {} } + self.output_path = None self._load(data) @@ -619,7 +601,8 @@ class DeckTask(object): raise MissingTaskDataKey(required_key) # This raises e.NetTestNotFound, we let it go onto the caller - nettest_path = nettest_to_path(task_data.pop("test_name")) + nettest_path = nettest_to_path(task_data.pop("test_name"), + self._arbitrary_paths) try: annotations = task_data.pop('annotations') @@ -631,8 +614,16 @@ class DeckTask(object): except KeyError: collector_address = self.parent_metadata.get('collector', None) + try: + self.output_path = task_data.pop('reportfile') + except KeyError: + self.output_path = self.global_options.get('reportfile', None) + + if task_data.get('no-collector', False): + collector_address = None + net_test_loader = NetTestLoader( - options_to_args(task_data), + options_to_args(task_data, self.cwd), annotations=annotations, test_file=nettest_path ) @@ -658,11 +649,14 @@ class DeckTask(object): self.requires_bouncer = True self.ooni['net_test_loader'] = net_test_loader - # Need to ensure that this is called only once we have looked up the - # probe IP address and have geoip data. - self.ooni['test_details'] = net_test_loader.getTestDetails() + + def _setup_ooni(self): + self.ooni['test_details'] = self.ooni['net_test_loader'].getTestDetails() self.id = generate_filename(self.ooni['test_details']) + def setup(self): + getattr(self, "_setup_"+self.type)() + def _load(self, data): for key in self._metadata_keys: try: @@ -678,11 +672,81 @@ class DeckTask(object): assert len(data) == 0 +class NotAnOption(Exception): + pass + +def subargs_to_options(subargs): + options = {} + + def parse_option_name(arg): + if arg.startswith("--"): + return arg[2:] + elif arg.startswith("-"): + return arg[1:] + raise NotAnOption + + subargs = iter(reversed(subargs)) + for subarg in subargs: + try: + value = subarg + name = parse_option_name(subarg) + options[name] = True + except NotAnOption: + try: + name = parse_option_name(subargs.next()) + options[name] = value + except StopIteration: + break + + return options + +def convert_legacy_deck(deck_data): + """ + I take a legacy deck list and convert it to the new deck format. + + :param deck_data: in the legacy format + :return: deck_data in the new format + """ + assert isinstance(deck_data, list), "Legacy decks are lists" + new_deck_data = {} + new_deck_data["name"] = "Legacy deck" + new_deck_data["description"] = "This is a legacy deck converted to the " \ + "new format" + new_deck_data["bouncer"] = None + new_deck_data["tasks"] = [] + for deck_item in deck_data: + deck_task = {"ooni": {}} + + options = deck_item["options"] + deck_task["ooni"]["test_name"] = options.pop("test_file") + deck_task["ooni"]["annotations"] = options.pop("annotations", {}) + deck_task["ooni"]["collector"] = options.pop("collector", None) + + # XXX here we end up picking only the last not none bouncer_address + bouncer_address = options.pop("bouncer", None) + if bouncer_address is not None: + new_deck_data["bouncer"] = bouncer_address + + subargs = options.pop("subargs", []) + for name, value in subargs_to_options(subargs).items(): + deck_task["ooni"][name] = value + + for name, value in options.items(): + deck_task["ooni"][name] = value + + new_deck_data["tasks"].append(deck_task) + + return new_deck_data + class NGDeck(object): - def __init__(self, deck_data=None, - deck_path=None, no_collector=False): + def __init__(self, + deck_data=None, + deck_path=None, + global_options={}, + no_collector=False, + arbitrary_paths=False): # Used to resolve relative paths inside of decks. - self.deck_directory = None + self.deck_directory = os.getcwd() self.requires_tor = False self.no_collector = no_collector self.name = "" @@ -690,8 +754,12 @@ class NGDeck(object): self.schedule = None self.metadata = {} + self.global_options = normalize_options(global_options) self.bouncer = None + self._arbitrary_paths = arbitrary_paths + self._is_setup = False + self._measurement_path = FilePath(config.measurements_directory) self._tasks = [] self.task_ids = [] @@ -701,35 +769,24 @@ class NGDeck(object): elif deck_data is not None: self.load(deck_data) - def open(self, deck_path): + def open(self, deck_path, global_options=None): with open(deck_path) as fh: deck_data = yaml.safe_load(fh) - self.load(deck_data) + self.deck_directory = os.path.abspath(os.path.dirname(deck_path)) + self.load(deck_data, global_options) - def write(self, fh): - """ - Writes a properly formatted deck to the supplied file handle. - :param fh: an open file handle - :return: - """ - deck_data = { - "name": self.name, - "description": self.description, - "tasks": [task.data for task in self._tasks] - } - if self.schedule is not None: - deck_data["schedule"] = self.schedule - for key, value in self.metadata.items(): - deck_data[key] = value + def load(self, deck_data, global_options=None): + if global_options is not None: + self.global_options = global_options - fh.write("---\n") - yaml.safe_dump(deck_data, fh, default_flow_style=False) + if isinstance(deck_data, list): + deck_data = convert_legacy_deck(deck_data) - def load(self, deck_data): self.name = deck_data.pop("name", "Un-named Deck") self.description = deck_data.pop("description", "No description") - bouncer_address = deck_data.pop("bouncer", None) + bouncer_address = self.global_options.get('bouncer', + deck_data.pop("bouncer", None)) if bouncer_address is None: self.bouncer = get_preferred_bouncer() elif isinstance(bouncer_address, dict): @@ -743,8 +800,17 @@ class NGDeck(object): for key, metadata in deck_data.items(): self.metadata[key] = metadata + # We override the task metadata with the global options if present + self.metadata.update(self.global_options) + for task_data in tasks_data: - deck_task = DeckTask(task_data, self.metadata, self.deck_directory) + deck_task = DeckTask( + data=task_data, + parent_metadata=self.metadata, + global_options=self.global_options, + cwd=self.deck_directory, + arbitrary_paths=self._arbitrary_paths + ) if deck_task.requires_tor: self.requires_tor = True if (deck_task.requires_bouncer and @@ -753,6 +819,32 @@ class NGDeck(object): self._tasks.append(deck_task) self.task_ids.append(deck_task.id) + if self.metadata.get('no_collector', False): + self.no_collector = True + + @property + def tasks(self): + return self._tasks + + def write(self, fh): + """ + Writes a properly formatted deck to the supplied file handle. + :param fh: an open file handle + :return: + """ + deck_data = { + "name": self.name, + "description": self.description, + "tasks": [task.data for task in self._tasks] + } + if self.schedule is not None: + deck_data["schedule"] = self.schedule + for key, value in self.metadata.items(): + deck_data[key] = value + + fh.write("---\n") + yaml.safe_dump(deck_data, fh, default_flow_style=False) + @defer.inlineCallbacks def query_bouncer(self): preferred_backend = config.advanced.get( @@ -772,60 +864,74 @@ class NGDeck(object): preferred_backend, self.no_collector ) - - def _measurement_completed(self, result, measurement_id): - log.msg("{0}".format(result)) - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.child("measurements.njson.progress").moveTo( - measurement_dir.child("measurements.njson") - ) - generate_summary( - measurement_dir.child("measurements.njson").path, - measurement_dir.child("summary.json").path - ) - measurement_dir.child("running.pid").remove() - - def _measurement_failed(self, failure, measurement_id): - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.child("running.pid").remove() - # XXX do we also want to delete measurements.njson.progress? + defer.returnValue(net_test_loaders) + + def _measurement_completed(self, result, task): + if not task.output_path: + measurement_id = task.id + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.child("measurements.njson.progress").moveTo( + measurement_dir.child("measurements.njson") + ) + generate_summary( + measurement_dir.child("measurements.njson").path, + measurement_dir.child("summary.json").path + ) + measurement_dir.child("running.pid").remove() + + def _measurement_failed(self, failure, task): + if not task.output_path: + # XXX do we also want to delete measurements.njson.progress? + measurement_id = task.id + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.child("running.pid").remove() return failure def _run_ooni_task(self, task, director): net_test_loader = task.ooni["net_test_loader"] test_details = task.ooni["test_details"] - measurement_id = task.id - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.createDirectory() + report_filename = task.output_path + if not task.output_path: + measurement_id = task.id - report_filename = measurement_dir.child("measurements.njson.progress").path - pid_file = measurement_dir.child("running.pid") + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.createDirectory() - with pid_file.open('w') as out_file: - out_file.write("{0}".format(os.getpid())) + report_filename = measurement_dir.child("measurements.njson.progress").path + pid_file = measurement_dir.child("running.pid") + + with pid_file.open('w') as out_file: + out_file.write("{0}".format(os.getpid())) d = director.start_net_test_loader( net_test_loader, report_filename, + collector_client=net_test_loader.collector, test_details=test_details ) - d.addCallback(self._measurement_completed, measurement_id) - d.addErrback(self._measurement_failed, measurement_id) + d.addCallback(self._measurement_completed, task) + d.addErrback(self._measurement_failed, task) return d + def setup(self): + """ + This method needs to be called before you are able to run a deck. + """ + for task in self._tasks: + task.setup() + self._is_setup = True + @defer.inlineCallbacks def run(self, director): - tasks = [] - preferred_backend = config.advanced.get("preferred_backend", "onion") + assert self._is_setup, "You must call setup() before you can run a " \ + "deck" + if self.requires_tor: + yield director.start_tor() yield self.query_bouncer() for task in self._tasks: - if task.requires_tor: - yield director.start_tor() - elif task.requires_bouncer and preferred_backend == "onion": - yield director.start_tor() if task.type == "ooni": - tasks.append(self._run_ooni_task(task, director)) - defer.returnValue(tasks) + yield self._run_ooni_task(task, director) + self._is_setup = False input_store = InputStore() diff --git a/ooni/director.py b/ooni/director.py index 793975e..c239601 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -424,9 +424,7 @@ class Director(object): log.debug("Setting SOCKS port as %s" % tor_config.SocksPort) try: yield start_tor(tor_config) - log.err("Calling tor callback") self._tor_starting.callback(self._tor_state) - log.err("called") except Exception as exc: log.err("Failed to start tor") log.exc(exc) diff --git a/ooni/geoip.py b/ooni/geoip.py index 28e0e1e..f118268 100644 --- a/ooni/geoip.py +++ b/ooni/geoip.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import re import os +import json import random from hashlib import sha256 @@ -137,11 +138,12 @@ class UbuntuGeoIP(HTTPGeoIPLookupper): return probe_ip class DuckDuckGoGeoIP(HTTPGeoIPLookupper): - url = "https://duckduckgo.com/?q=ip&ia=answer" + url = "https://api.duckduckgo.com/?q=ip&format=json" def parseResponse(self, response_body): + j = json.loads(response_body) regexp = "Your IP address is (.*) in " - probe_ip = re.search(regexp, response_body).group(1) + probe_ip = re.search(regexp, j['Answer']).group(1) return probe_ip class ProbeIP(object): diff --git a/ooni/reporter.py b/ooni/reporter.py index f07b3cf..e2f155f 100644 --- a/ooni/reporter.py +++ b/ooni/reporter.py @@ -534,7 +534,7 @@ class Report(object): collector: The address of the oonib collector for this report. - no_yamloo: + no_njson: If we should disable reporting to disk. """ self.test_details = test_details diff --git a/ooni/scripts/oonideckgen.py b/ooni/scripts/oonideckgen.py index fa675f9..b980a2c 100644 --- a/ooni/scripts/oonideckgen.py +++ b/ooni/scripts/oonideckgen.py @@ -92,7 +92,7 @@ def get_user_country_code(): @defer.inlineCallbacks -def oonideckgen(): +def oonideckgen(reactor): options = Options() try: options.parseOptions() diff --git a/ooni/scripts/ooniprobe.py b/ooni/scripts/ooniprobe.py new file mode 100644 index 0000000..24493da --- /dev/null +++ b/ooni/scripts/ooniprobe.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +from twisted.internet import task + +def ooniprobe(reactor): + from ooni.ui.cli import runWithDaemonDirector, runWithDirector + from ooni.ui.cli import setupGlobalOptions + + global_options = setupGlobalOptions(logging=True, start_tor=True, + check_incoherences=True) + if global_options['queue']: + return runWithDaemonDirector(global_options) + else: + return runWithDirector(global_options) + +def run(): + task.react(ooniprobe) + +if __name__ == "__main__": + run() diff --git a/ooni/scripts/oonireport.py b/ooni/scripts/oonireport.py index 6facadf..d75c925 100644 --- a/ooni/scripts/oonireport.py +++ b/ooni/scripts/oonireport.py @@ -202,7 +202,7 @@ def tor_check(): sys.exit(1) -def oonireport(args=sys.argv[1:]): +def oonireport(reactor, args=sys.argv[1:]): options = Options() try: options.parseOptions(args) diff --git a/ooni/templates/tcpt.py b/ooni/templates/tcpt.py index 5e1d21a..bb080e6 100644 --- a/ooni/templates/tcpt.py +++ b/ooni/templates/tcpt.py @@ -96,4 +96,3 @@ class TCPTest(NetTestCase): d2.addCallback(connected) d2.addErrback(errback) return d1 - diff --git a/ooni/tests/mocks.py b/ooni/tests/mocks.py index c15beec..d623e0b 100644 --- a/ooni/tests/mocks.py +++ b/ooni/tests/mocks.py @@ -191,7 +191,7 @@ class MockTaskManager(TaskManager): class MockBouncerClient(object): def __init__(self, *args, **kw): - pass + self.backend_type = "onion" def lookupTestHelpers(self, required_test_helpers): ret = { diff --git a/ooni/tests/test_backend_client.py b/ooni/tests/test_backend_client.py index 212abb6..fab1fa2 100644 --- a/ooni/tests/test_backend_client.py +++ b/ooni/tests/test_backend_client.py @@ -60,22 +60,6 @@ class TestEnd2EndBackendClient(ConfigTestCase): def test_download_input(self): yield self.collector_client.downloadInput(input_id) - @defer.inlineCallbacks - def test_get_deck_list(self): - deck_list = yield self.collector_client.getDeckList() - self.assertTrue(isinstance(deck_list, list)) - - @defer.inlineCallbacks - def test_get_deck_descriptor(self): - deck_descriptor = yield self.collector_client.getDeck(deck_id) - for key in ['name', 'description', - 'version', 'author', 'date', 'id']: - self.assertTrue(hasattr(deck_descriptor, key)) - - @defer.inlineCallbacks - def test_download_deck(self): - yield self.collector_client.downloadDeck(deck_id) - def test_lookup_invalid_helpers(self): bouncer_client = BouncerClient('http://127.0.0.1:8888') return self.failUnlessFailure( diff --git a/ooni/tests/test_deck.py b/ooni/tests/test_deck.py index 455f4e1..3d07959 100644 --- a/ooni/tests/test_deck.py +++ b/ooni/tests/test_deck.py @@ -1,15 +1,20 @@ import os +from StringIO import StringIO from copy import deepcopy +import yaml + +from mock import patch, MagicMock + from twisted.internet import defer from twisted.trial import unittest from hashlib import sha256 from ooni import errors from ooni.deck import input_store, lookup_collector_and_test_helpers -from ooni.nettest import NetTestLoader -from ooni.deck import InputFile, Deck, nettest_to_path, DeckTask, NGDeck +from ooni.deck import nettest_to_path, NGDeck +from ooni.deck import convert_legacy_deck from ooni.tests.bases import ConfigTestCase from ooni.tests.mocks import MockBouncerClient, MockCollectorClient @@ -80,45 +85,6 @@ class BaseTestCase(unittest.TestCase): """ super(BaseTestCase, self).setUp() - -class TestInputFile(BaseTestCase): - def tearDown(self): - if self.filename != "": - os.remove(self.filename) - - def test_file_cached(self): - self.filename = file_hash = sha256(self.dummy_deck_content).hexdigest() - input_file = InputFile(file_hash, base_path='.') - with open(file_hash, 'w+') as f: - f.write(self.dummy_deck_content) - assert input_file.fileCached - - def test_file_invalid_hash(self): - self.filename = invalid_hash = 'a' * 64 - with open(invalid_hash, 'w+') as f: - f.write("b" * 100) - input_file = InputFile(invalid_hash, base_path='.') - self.assertRaises(AssertionError, input_file.verify) - - def test_save_descriptor(self): - descriptor = { - 'name': 'spam', - 'id': 'spam', - 'version': 'spam', - 'author': 'spam', - 'date': 'spam', - 'description': 'spam' - } - file_id = 'a' * 64 - self.filename = file_id + '.desc' - input_file = InputFile(file_id, base_path='.') - input_file.load(descriptor) - input_file.save() - assert os.path.isfile(self.filename) - - assert input_file.descriptorCached - - class TestDeck(BaseTestCase, ConfigTestCase): def setUp(self): super(TestDeck, self).setUp() @@ -137,60 +103,47 @@ class TestDeck(BaseTestCase, ConfigTestCase): super(TestDeck, self).tearDown() def test_open_deck(self): - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") - deck.loadDeck(self.deck_file) - assert len(deck.netTestLoaders) == 1 + deck = NGDeck() + deck.open(self.deck_file) + assert len(deck.tasks.ooni['net_test_loaders']) == 1 def test_load_deck_with_global_options(self): global_options = { "annotations": {"spam": "ham"}, "collector": "httpo://thirteenchars123.onion" } - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") - deck.loadDeck(self.deck_file, - global_options=global_options) + deck = NGDeck(global_options=global_options) + deck.open(self.deck_file) self.assertEqual( - deck.netTestLoaders[0].annotations, + deck.tasks.ooni['net_test_loaders'][0].annotations, global_options['annotations'] ) self.assertEqual( - deck.netTestLoaders[0].collector.base_address, + deck.tasks.ooni['net_test_loaders'][0].collector.base_address, global_options['collector'].replace("httpo://", "http://") ) - def test_save_deck_descriptor(self): - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") - deck.loadDeck(self.deck_file) - deck.load({'name': 'spam', - 'id': 'spam', - 'version': 'spam', - 'author': 'spam', - 'date': 'spam', - 'description': 'spam' - }) - deck.save() - self.filename = self.deck_file + ".desc" - deck.verify() - + @patch('ooni.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector(self): - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") + deck = NGDeck() deck.bouncer = MockBouncerClient(FAKE_BOUNCER_ADDRESS) - deck._BouncerClient = MockBouncerClient - deck._CollectorClient = MockCollectorClient - deck.loadDeck(self.deck_file) + deck.open(self.deck_file) - self.assertEqual(len(deck.netTestLoaders[0].missingTestHelpers), 1) + self.assertEqual( + len(deck.tasks.ooni['net_test_loaders'][0].missingTestHelpers), 1) - yield lookup_collector_and_test_helpers(deck.preferred_backend, - deck.netTestLoaders) + yield lookup_collector_and_test_helpers( + net_test_loaders=deck.netTestLoaders, + preferred_backend=deck.preferred_backend, + bouncer=deck.bouncer + ) - self.assertEqual(deck.netTestLoaders[0].collector.settings['address'], - 'httpo://thirteenchars123.onion') + self.assertEqual( + deck.tasks.ooni['net_test_loaders'][0].collector.settings['address'], + 'httpo://thirteenchars123.onion' + ) self.assertEqual(deck.netTestLoaders[0].localOptions['backend'], '127.0.0.1') @@ -202,15 +155,15 @@ class TestDeck(BaseTestCase, ConfigTestCase): self.deck_file = os.path.join(self.cwd, deck_hash) with open(self.deck_file, 'w+') as f: f.write(self.dummy_deck_content_with_many_tests) - deck = Deck(decks_directory=".") - deck.loadDeck(self.deck_file) + deck = NGDeck() + deck.open(self.deck_file) self.assertEqual( - deck.netTestLoaders[0].localOptions['backend'], + deck.tasks[0].ooni['net_test_loader'].localOptions['backend'], '1.1.1.1' ) self.assertEqual( - deck.netTestLoaders[1].localOptions['backend'], + deck.tasks[1].ooni['net_test_loader'].localOptions['backend'], '2.2.2.2' ) @@ -222,58 +175,65 @@ class TestDeck(BaseTestCase, ConfigTestCase): nettest_to_path, "invalid_test") + @patch('ooni.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector_cloudfront(self): self.config.advanced.preferred_backend = "cloudfront" - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") - deck.bouncer = MockBouncerClient(FAKE_BOUNCER_ADDRESS) - deck._BouncerClient = MockBouncerClient - deck._CollectorClient = MockCollectorClient - deck.loadDeck(self.deck_file) - - self.assertEqual(len(deck.netTestLoaders[0].missingTestHelpers), 1) - - yield lookup_collector_and_test_helpers(deck.preferred_backend, - deck.netTestLoaders) + deck = NGDeck() + deck.open(self.deck_file) + first_net_test_loader = deck.tasks[0].ooni['net_test_loader'] + net_test_loaders = map(lambda task: task.ooni['net_test_loader'], + deck.tasks) + self.assertEqual(len(first_net_test_loader.missingTestHelpers), 1) + + yield lookup_collector_and_test_helpers( + net_test_loaders=net_test_loaders , + preferred_backend='cloudfront', + bouncer=deck.bouncer + ) self.assertEqual( - deck.netTestLoaders[0].collector.settings['address'], + first_net_test_loader.collector.settings['address'], 'https://address.cloudfront.net' ) self.assertEqual( - deck.netTestLoaders[0].collector.settings['front'], + first_net_test_loader.collector.settings['front'], 'front.cloudfront.net' ) self.assertEqual( - deck.netTestLoaders[0].localOptions['backend'], + first_net_test_loader.localOptions['backend'], '127.0.0.1' ) - + @patch('ooni.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector_https(self): self.config.advanced.preferred_backend = "https" - deck = Deck(bouncer=FAKE_BOUNCER_ADDRESS, - decks_directory=".") - deck.bouncer = MockBouncerClient(FAKE_BOUNCER_ADDRESS) - deck._BouncerClient = MockBouncerClient - deck._CollectorClient = MockCollectorClient - deck.loadDeck(self.deck_file) + deck = NGDeck() + deck.open(self.deck_file) + + first_net_test_loader = deck.tasks[0].ooni['net_test_loader'] + net_test_loaders = map(lambda task: task.ooni['net_test_loader'], + deck.tasks) - self.assertEqual(len(deck.netTestLoaders[0].missingTestHelpers), 1) + self.assertEqual(len(first_net_test_loader .missingTestHelpers), 1) - yield lookup_collector_and_test_helpers(deck.preferred_backend, - deck.netTestLoaders) + yield lookup_collector_and_test_helpers( + net_test_loaders=net_test_loaders, + preferred_backend='https', + bouncer=deck.bouncer + ) self.assertEqual( - deck.netTestLoaders[0].collector.settings['address'], + first_net_test_loader.collector.settings['address'], 'https://collector.ooni.io' ) self.assertEqual( - deck.netTestLoaders[0].localOptions['backend'], + first_net_test_loader.localOptions['backend'], '127.0.0.1' ) @@ -301,20 +261,65 @@ DECK_DATA = { deepcopy(TASK_DATA) ] } + +LEGACY_DECK = """ +- options: + annotations: null + bouncer: null + collector: null + no-collector: 0 + no-geoip: 0 + no-yamloo: 0 + reportfile: null + subargs: [--flag, --key, value] + test_file: manipulation/http_invalid_request_line + verbose: 0 +- options: + annotations: null + bouncer: null + collector: null + no-collector: 0 + no-geoip: 0 + no-yamloo: 0 + reportfile: null + subargs: [] + test_file: manipulation/http_header_field_manipulation + verbose: 0 +- options: + annotations: null + bouncer: null + collector: null + no-collector: 0 + no-geoip: 0 + no-yamloo: 0 + reportfile: null + subargs: [-f, /path/to/citizenlab-urls-global.txt] + test_file: blocking/web_connectivity + verbose: 0 +""" + class TestNGDeck(ConfigTestCase): - skip = True def test_deck_task(self): - if self.skip: - self.skipTest("Skip is set to true") - yield input_store.update("ZZ") - deck_task = DeckTask(TASK_DATA) - self.assertIsInstance(deck_task.ooni["net_test_loader"], - NetTestLoader) + #yield input_store.update("ZZ") + #deck_task = DeckTask(TASK_DATA) + #self.assertIsInstance(deck_task.ooni["net_test_loader"], + # NetTestLoader) + pass - @defer.inlineCallbacks def test_deck_load(self): - if self.skip: - self.skipTest("Skip is set to true") - yield input_store.update("ZZ") - deck = NGDeck(deck_data=DECK_DATA) - self.assertEqual(len(deck.tasks), 1) + #yield input_store.update("ZZ") + #deck = NGDeck(deck_data=DECK_DATA) + #self.assertEqual(len(deck.tasks), 1) + pass + + def test_convert_legacy_deck(self): + legacy_deck = yaml.safe_load(StringIO(LEGACY_DECK)) + ng_deck = convert_legacy_deck(legacy_deck) + self.assertEqual(len(ng_deck['tasks']), 3) + task_names = map(lambda task: task['ooni']['test_name'], + ng_deck['tasks']) + self.assertItemsEqual(task_names, [ + "manipulation/http_invalid_request_line", + "manipulation/http_header_field_manipulation", + "blocking/web_connectivity" + ]) diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index fe24bf6..6550b3d 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -26,19 +26,18 @@ class Options(usage.Options): optFlags = [["help", "h"], ["no-collector", "n", "Disable writing to collector"], - ["no-yamloo", "N", "Disable writing to YAML file"], + ["no-njson", "N", "Disable writing to disk"], ["no-geoip", "g", "Disable geoip lookup on start"], ["list", "s", "List the currently installed ooniprobe " "nettests"], - ["printdeck", "p", "Print the equivalent deck for the " - "provided command"], ["verbose", "v", "Show more verbose information"] ] optParameters = [ - ["reportfile", "o", None, "Specify the report file name to write to."], + ["reportfile", "o", None, "Specify the report file name to write " + "to."], ["testdeck", "i", None, "Specify as input a test deck: a yaml file " - "containing the tests to run and their " + "containing the tests to run and their " "arguments."], ["collector", "c", None, "Specify the address of the collector for " "test results. In most cases a user will " @@ -132,7 +131,8 @@ def director_startup_handled_failures(failure): errors.CouldNotFindTestCollector, errors.ProbeIPUnknown, errors.InvalidInputFile, - errors.ConfigFileIncoherent) + errors.ConfigFileIncoherent, + SystemExit) if isinstance(failure.value, errors.TorNotRunning): log.err("Tor does not appear to be running") @@ -236,64 +236,71 @@ def setupCollector(global_options, collector_client): return collector_client def createDeck(global_options, url=None): - from ooni.nettest import NetTestLoader - from ooni.deck import Deck, nettest_to_path - from ooni.backend_client import CollectorClient + from ooni.deck import NGDeck, subargs_to_options if url: log.msg("Creating deck for: %s" % (url)) - if global_options['no-yamloo']: - log.msg("Will not write to a yamloo report file") - - deck = Deck(bouncer=global_options['bouncer'], - no_collector=global_options['no-collector']) - + test_deck_path = global_options.pop('testdeck', None) + test_name = global_options.pop('test_file', None) + no_collector = global_options.pop('no-collector', False) try: - if global_options['testdeck']: - deck.loadDeck(global_options['testdeck'], global_options) + if test_deck_path is not None: + deck = NGDeck( + global_options=global_options, + no_collector=no_collector + ) + deck.open(test_deck_path) else: + deck = NGDeck( + global_options=global_options, + no_collector=no_collector, + arbitrary_paths=True + ) log.debug("No test deck detected") - test_file = nettest_to_path(global_options['test_file'], True) if url is not None: args = ('-u', url) else: args = tuple() if any(global_options['subargs']): args = global_options['subargs'] + args - net_test_loader = NetTestLoader(args, - test_file=test_file, - annotations=global_options['annotations']) - if global_options['collector']: - net_test_loader.collector = \ - CollectorClient(global_options['collector']) - deck.insert(net_test_loader) + + test_options = subargs_to_options(args) + test_options['test_name'] = test_name + deck.load({ + "tasks": [ + {"ooni": test_options} + ] + }) except errors.MissingRequiredOption as option_name: log.err('Missing required option: "%s"' % option_name) incomplete_net_test_loader = option_name.net_test_loader print incomplete_net_test_loader.usageOptions().getUsage() - sys.exit(2) + raise SystemExit(2) + except errors.NetTestNotFound as path: log.err('Requested NetTest file not found (%s)' % path) - sys.exit(3) + raise SystemExit(3) + except errors.OONIUsageError as e: log.err(e) print e.net_test_loader.usageOptions().getUsage() - sys.exit(4) + raise SystemExit(4) + except errors.HTTPSCollectorUnsupported: log.err("HTTPS collectors require a twisted version of at least 14.0.2.") - sys.exit(6) + raise SystemExit(6) except errors.InsecureBackend: log.err("Attempting to report to an insecure collector.") log.err("To enable reporting to insecure collector set the " "advanced->insecure_backend option to true in " "your ooniprobe.conf file.") - sys.exit(7) + raise SystemExit(7) except Exception as e: if config.advanced.debug: log.exception(e) log.err(e) - sys.exit(5) + raise SystemExit(5) return deck @@ -301,45 +308,21 @@ def createDeck(global_options, url=None): def runTestWithDirector(director, global_options, url=None, start_tor=True): deck = createDeck(global_options, url=url) - start_tor |= deck.requiresTor - - d = director.start(start_tor=start_tor, - check_incoherences=global_options['check_incoherences']) - - def setup_nettest(_): + d = director.start() + @defer.inlineCallbacks + def post_director_start(_): try: - return deck.setup() + deck.setup() + yield deck.run(director) except errors.UnableToLoadDeckInput as error: - return defer.failure.Failure(error) + raise defer.failure.Failure(error) except errors.NoReachableTestHelpers as error: - return defer.failure.Failure(error) + raise defer.failure.Failure(error) except errors.NoReachableCollectors as error: - return defer.failure.Failure(error) + raise defer.failure.Failure(error) + except SystemExit as error: + raise error - # Wait until director has started up (including bootstrapping Tor) - # before adding tests - @defer.inlineCallbacks - def post_director_start(_): - for net_test_loader in deck.netTestLoaders: - # Decks can specify different collectors - # for each net test, so that each NetTest - # may be paired with a test_helper and its collector - # However, a user can override this behavior by - # specifying a collector from the command-line (-c). - # If a collector is not specified in the deck, or the - # deck is a singleton, the default collector set in - # ooniprobe.conf will be used - collector_client = None - if not global_options['no-collector']: - collector_client = setupCollector(global_options, - net_test_loader.collector) - - yield director.start_net_test_loader(net_test_loader, - global_options['reportfile'], - collector_client, - global_options['no-yamloo']) - - d.addCallback(setup_nettest) d.addCallback(post_director_start) d.addErrback(director_startup_handled_failures) d.addErrback(director_startup_other_failures) @@ -379,14 +362,7 @@ def runWithDirector(global_options): print "Note: Third party tests require an external "\ "application to run properly." - sys.exit(0) - - elif global_options['printdeck']: - del global_options['printdeck'] - print "# Copy and paste the lines below into a test deck to run the specified test with the specified arguments" - print yaml.safe_dump([{'options': global_options}]).strip() - - sys.exit(0) + raise SystemExit(0) if global_options.get('annotations') is not None: global_options['annotations'] = setupAnnotations(global_options) @@ -427,7 +403,7 @@ def runWithDaemonDirector(global_options): except ImportError: print "Pika is required for queue connection." print "Install with \"pip install pika\"." - sys.exit(7) + raise SystemExit(7) director = Director() diff --git a/ooni/ui/web/client/index.html b/ooni/ui/web/client/index.html index e306ef2..e90ef83 100644 --- a/ooni/ui/web/client/index.html +++ b/ooni/ui/web/client/index.html @@ -13,5 +13,5 @@ <app> Loading... </app> - <script type="text/javascript" src="app.bundle.js?7ed7d7510803fa1a4ad8"></script></body> + <script type="text/javascript" src="app.bundle.js?6b15c1dd202a0f5a80e7"></script></body> </html> diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/set_caps/Makefile b/scripts/set_caps/Makefile new file mode 100644 index 0000000..bc80843 --- /dev/null +++ b/scripts/set_caps/Makefile @@ -0,0 +1,81 @@ +# Wrappers for running ooniprobe as a non-root user. +# +# Build-Depends: cython, pythonX.Y-dev, libcap2-bin +# Depends: libpythonX.Y +# +# $ make && make check +# $ sudo make install # after installing the rest of ooni-probe +# $ make installcheck_unsafe # runs complete tests as non-root +# +# `make` builds a program that has file capabilities set on it. This is just +# ./ooniprobe compiled into a C program using Cython, so that one can set +# capabilities directly on the resulting binary. This way, we avoid the need +# for a separate child python interpreter with its own capabilities. Another +# advantage is that libpython.so (needed by the program) would be automatically +# upgraded by the system package manager. The version of python is hard-coded +# into the wrapper at build time; making this dynamic is possible, but much +# more complex and not yet implemented. +# +# Execution may additionally be limited to a particular unix group by using +# chgrp(1) and chmod(1) to 'o-x,g+x' after installation. +# + +# GNU Makefile conventions, see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html +prefix = /usr/local +exec_prefix = $(prefix) +bindir = $(exec_prefix)/bin + +INSTALL = install +PYTHON = python +PYTHON_CONFIG = python-config +CYTHON = cython +SETCAP = setcap + +INSTALL_PROGRAM = $(INSTALL) +PY_CFLAGS = $(shell $(PYTHON_CONFIG) --cflags) +PY_LDFLAGS = $(shell $(PYTHON_CONFIG) --ldflags) + +BUILDDIR := ./build +SCRIPTDIR := . +TESTDIR := ./test +CAP_SCRIPT := ooniprobe +CAP_NEEDED := cap_net_admin,cap_net_raw + +# Unfortunately cython --embed ignores the arguments in the shebang line +# So we need to patch the generated code ourselves. +CYTHON_PRE_MAIN = extern int Py_IgnoreEnvironmentFlag; \ + Py_IgnoreEnvironmentFlag++; \ + extern int Py_NoUserSiteDirectory; \ + Py_NoUserSiteDirectory++; + +all: $(BUILDDIR)/$(CAP_SCRIPT) + +$(BUILDDIR)/$(CAP_SCRIPT): $(BUILDDIR)/$(CAP_SCRIPT).c Makefile + $(CC) $(PY_CFLAGS) $(PY_LDFLAGS) "$<" -o "$@" + +$(BUILDDIR)/$(CAP_SCRIPT).c: $(SCRIPTDIR)/$(CAP_SCRIPT) Makefile + mkdir -p "$(BUILDDIR)" + $(CYTHON) "$<" --embed=CYTHON_MAIN_SENTINEL -Werror -Wextra -o "$@" + sed -i \ + -e 's/$.*CYTHON_MAIN_SENTINEL.*{$/\1 $(CYTHON_PRE_MAIN)/g' \ + -e '/CYTHON_MAIN_SENTINEL[^{]*$$/,/{/s/{/{ $(CYTHON_PRE_MAIN)/g' \ + -e 's/CYTHON_MAIN_SENTINEL/main/g' "$@" + +check: $(BUILDDIR)/$(CAP_SCRIPT) + # test that setcapped binary ignores PYTHONPATH + BIN="$$(realpath "$<")" && cd "$(TESTDIR)" && PYTHONPATH=. $$BIN --version + +install: $(BUILDDIR)/$(CAP_SCRIPT) + mkdir -p "$(DESTDIR)$(bindir)" + $(INSTALL_PROGRAM) -t "$(DESTDIR)$(bindir)" "$(BUILDDIR)/$(CAP_SCRIPT)" + $(SETCAP) "$(CAP_NEEDED)"+eip "$(DESTDIR)$(bindir)/$(CAP_SCRIPT)" + +installcheck_unsafe: $(BUILDDIR)/$(CAP_SCRIPT) + # run a standard check. note that because of hardcoded paths (for security) + # this can only work after you've installed your development copy + "./$<" -i /usr/share/ooni/decks/complete.deck + +clean: + rm -rf "$(BUILDDIR)" + +.PHONY: clean all check install installcheck% diff --git a/scripts/set_caps/__init__.py b/scripts/set_caps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/set_caps/test/__init__.py b/scripts/set_caps/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/set_caps/test/ooni/__init__.py b/scripts/set_caps/test/ooni/__init__.py new file mode 100644 index 0000000..f2cd353 --- /dev/null +++ b/scripts/set_caps/test/ooni/__init__.py @@ -0,0 +1 @@ +raise ValueError("test failed! wrapper did not ignore polluted PWD. either the wrapper is faulty, or ooni is still unpatched (Tor bug #13581)") [View Less]

1 0

[ooni-probe/master] Move oonicli into ui/cli component
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 5a89a2c028fa5058bf56260676a9d0e7c9dc32c6 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 19 18:36:38 2016 +0200 Move oonicli into ui/cli component --- bin/ooniprobe | 4 +- ooni/oonicli.py | 520 --------------------------------------------- ooni/tests/test_oonicli.py | 12 +- ooni/ui/cli.py | 520 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 528 insertions(+), 528 deletions(-) diff --git a/bin/ooniprobe … [View More]b/bin/ooniprobe index ad97553..0274899 100755 --- a/bin/ooniprobe +++ b/bin/ooniprobe @@ -3,8 +3,8 @@ import sys from twisted.internet import reactor -from ooni.oonicli import setupGlobalOptions -from ooni.oonicli import runWithDaemonDirector, runWithDirector +from ooni.ui.cli import runWithDaemonDirector, runWithDirector +from ooni.ui.cli import setupGlobalOptions exit_code=0 diff --git a/ooni/oonicli.py b/ooni/oonicli.py deleted file mode 100644 index 2eb1d9f..0000000 --- a/ooni/oonicli.py +++ /dev/null @@ -1,520 +0,0 @@ -import sys - -import os -import json -import yaml -import random -import textwrap -import urlparse - -from twisted.python import usage -from twisted.internet import defer - -from ooni import errors, __version__ -from ooni.constants import CANONICAL_BOUNCER_ONION -from ooni.settings import config -from ooni.utils import log - -class LifetimeExceeded(Exception): pass - -class Options(usage.Options): - synopsis = """%s [options] [path to test].py - """ % (os.path.basename(sys.argv[0]),) - - longdesc = ("ooniprobe loads and executes a suite or a set of suites of" - " network tests. These are loaded from modules, packages and" - " files listed on the command line.") - - optFlags = [["help", "h"], - ["no-collector", "n", "Disable writing to collector"], - ["no-yamloo", "N", "Disable writing to YAML file"], - ["no-geoip", "g", "Disable geoip lookup on start"], - ["list", "s", "List the currently installed ooniprobe " - "nettests"], - ["printdeck", "p", "Print the equivalent deck for the " - "provided command"], - ["verbose", "v", "Show more verbose information"] - ] - - optParameters = [ - ["reportfile", "o", None, "Specify the report file name to write to."], - ["testdeck", "i", None, "Specify as input a test deck: a yaml file " - "containing the tests to run and their " - "arguments."], - ["collector", "c", None, "Specify the address of the collector for " - "test results. In most cases a user will " - "prefer to specify a bouncer over this."], - ["bouncer", "b", None, "Specify the bouncer used to " - "obtain the address of the " - "collector and test helpers."], - ["logfile", "l", None, "Write to this logs to this filename."], - ["pcapfile", "O", None, "Write a PCAP of the ooniprobe session to " - "this filename."], - ["configfile", "f", None, "Specify a path to the ooniprobe " - "configuration file."], - ["datadir", "d", None, "Specify a path to the ooniprobe data " - "directory."], - ["annotations", "a", None, "Annotate the report with a key:value[, " - "key:value] format."], - ["preferred-backend", "P", None, "Set the preferred backend to use " - "when submitting results and/or " - "communicating with test helpers. " - "Can be either onion, " - "https or cloudfront"], - ["queue", "Q", None, "AMQP Queue URL amqp://user:pass@host:port/vhost/queue"] - ] - - compData = usage.Completions( - extraActions=[usage.CompleteFiles( - "*.py", descr="file | module | package | TestCase | testMethod", - repeat=True)],) - - tracer = None - - def __init__(self): - usage.Options.__init__(self) - - def getUsage(self, width=None): - return super(Options, self).getUsage(width) + """ -To get started you may want to run: - -$ oonideckgen - -This will tell you how to run ooniprobe :) -""" - - def opt_spew(self): - """ - Print an insanely verbose log of everything that happens. - Useful when debugging freezes or locks in complex code. - """ - from twisted.python.util import spewer - sys.settrace(spewer) - - def opt_version(self): - """ - Display the ooniprobe version and exit. - """ - print "ooniprobe version:", __version__ - sys.exit(0) - - def parseArgs(self, *args): - if self['testdeck'] or self['list']: - return - try: - self['test_file'] = args[0] - self['subargs'] = args[1:] - except: - raise usage.UsageError("No test filename specified!") - - -def parseOptions(): - print "WARNING: running ooniprobe involves some risk that varies greatly" - print " from country to country. You should be aware of this when" - print " running the tool. Read more about this in the manpage or README." - cmd_line_options = Options() - if len(sys.argv) == 1: - cmd_line_options.getUsage() - try: - cmd_line_options.parseOptions() - except usage.UsageError as ue: - print cmd_line_options.getUsage() - raise SystemExit("%s: %s" % (sys.argv[0], ue)) - - return dict(cmd_line_options) - - -def director_startup_handled_failures(failure): - log.err("Could not start the director") - failure.trap(errors.TorNotRunning, - errors.InvalidOONIBCollectorAddress, - errors.UnableToLoadDeckInput, - errors.CouldNotFindTestHelper, - errors.CouldNotFindTestCollector, - errors.ProbeIPUnknown, - errors.InvalidInputFile, - errors.ConfigFileIncoherent) - - if isinstance(failure.value, errors.TorNotRunning): - log.err("Tor does not appear to be running") - log.err("Reporting with a collector is not possible") - log.msg( - "Try with a different collector or disable collector reporting with -n") - - elif isinstance(failure.value, errors.InvalidOONIBCollectorAddress): - log.err("Invalid format for oonib collector address.") - log.msg( - "Should be in the format http://<collector_address>:<port>") - log.msg("for example: ooniprobe -c httpo://nkvphnp3p6agi5qq.onion") - - elif isinstance(failure.value, errors.UnableToLoadDeckInput): - log.err("Unable to fetch the required inputs for the test deck.") - log.msg( - "Please file a ticket on our issue tracker: https://github.com/thetorproject/ooni-probe/issues") - - elif isinstance(failure.value, errors.CouldNotFindTestHelper): - log.err("Unable to obtain the required test helpers.") - log.msg( - "Try with a different bouncer or check that Tor is running properly.") - - elif isinstance(failure.value, errors.CouldNotFindTestCollector): - log.err("Could not find a valid collector.") - log.msg( - "Try with a different bouncer, specify a collector with -c or disable reporting to a collector with -n.") - - elif isinstance(failure.value, errors.ProbeIPUnknown): - log.err("Failed to lookup probe IP address.") - log.msg("Check your internet connection.") - - elif isinstance(failure.value, errors.InvalidInputFile): - log.err("Invalid input file \"%s\"" % failure.value) - - elif isinstance(failure.value, errors.ConfigFileIncoherent): - log.err("Incoherent config file") - - if config.advanced.debug: - log.exception(failure) - -def director_startup_other_failures(failure): - log.err("An unhandled exception occurred while starting the director!") - log.exception(failure) - -def setupGlobalOptions(logging, start_tor, check_incoherences): - global_options = parseOptions() - - config.global_options = global_options - config.set_paths() - config.initialize_ooni_home() - try: - config.read_config_file(check_incoherences=check_incoherences) - except errors.ConfigFileIncoherent: - sys.exit(6) - - if global_options['verbose']: - config.advanced.debug = True - - if not start_tor: - config.advanced.start_tor = False - - if logging: - log.start(global_options['logfile']) - - if config.privacy.includepcap or global_options['pcapfile']: - from ooni.utils.net import hasRawSocketPermission - if hasRawSocketPermission(): - from ooni.utils.txscapy import ScapyFactory - config.scapyFactory = ScapyFactory(config.advanced.interface) - else: - log.err("Insufficient Privileges to capture packets." - " See ooniprobe.conf privacy.includepcap") - sys.exit(2) - global_options['check_incoherences'] = check_incoherences - return global_options - -def setupAnnotations(global_options): - annotations={} - for annotation in global_options["annotations"].split(","): - pair = annotation.split(":") - if len(pair) == 2: - key = pair[0].strip() - value = pair[1].strip() - annotations[key] = value - else: - log.err("Invalid annotation: %s" % annotation) - sys.exit(1) - global_options["annotations"] = annotations - return annotations - -def setupCollector(global_options, collector_client): - from backend_client import CollectorClient - - if global_options['collector']: - collector_client = CollectorClient(global_options['collector']) - elif config.reports.get('collector', None) is not None: - collector_client = CollectorClient(config.reports['collector']) - if not collector_client.isSupported(): - raise errors.CollectorUnsupported - return collector_client - -def createDeck(global_options, url=None): - from ooni.nettest import NetTestLoader - from ooni.deck import Deck, nettest_to_path - from backend_client import CollectorClient - - if url: - log.msg("Creating deck for: %s" % (url)) - - if global_options['no-yamloo']: - log.msg("Will not write to a yamloo report file") - - deck = Deck(bouncer=global_options['bouncer'], - no_collector=global_options['no-collector']) - - try: - if global_options['testdeck']: - deck.loadDeck(global_options['testdeck'], global_options) - else: - log.debug("No test deck detected") - test_file = nettest_to_path(global_options['test_file'], True) - if url is not None: - args = ('-u', url) - else: - args = tuple() - if any(global_options['subargs']): - args = global_options['subargs'] + args - net_test_loader = NetTestLoader(args, - test_file=test_file, - annotations=global_options['annotations']) - if global_options['collector']: - net_test_loader.collector = \ - CollectorClient(global_options['collector']) - deck.insert(net_test_loader) - except errors.MissingRequiredOption as option_name: - log.err('Missing required option: "%s"' % option_name) - incomplete_net_test_loader = option_name.net_test_loader - print incomplete_net_test_loader.usageOptions().getUsage() - sys.exit(2) - except errors.NetTestNotFound as path: - log.err('Requested NetTest file not found (%s)' % path) - sys.exit(3) - except errors.OONIUsageError as e: - log.err(e) - print e.net_test_loader.usageOptions().getUsage() - sys.exit(4) - except errors.HTTPSCollectorUnsupported: - log.err("HTTPS collectors require a twisted version of at least 14.0.2.") - sys.exit(6) - except errors.InsecureBackend: - log.err("Attempting to report to an insecure collector.") - log.err("To enable reporting to insecure collector set the " - "advanced->insecure_backend option to true in " - "your ooniprobe.conf file.") - sys.exit(7) - except Exception as e: - if config.advanced.debug: - log.exception(e) - log.err(e) - sys.exit(5) - - return deck - - -def runTestWithDirector(director, global_options, url=None, start_tor=True): - deck = createDeck(global_options, url=url) - - start_tor |= deck.requiresTor - - d = director.start(start_tor=start_tor, - check_incoherences=global_options['check_incoherences']) - - def setup_nettest(_): - try: - return deck.setup() - except errors.UnableToLoadDeckInput as error: - return defer.failure.Failure(error) - except errors.NoReachableTestHelpers as error: - return defer.failure.Failure(error) - except errors.NoReachableCollectors as error: - return defer.failure.Failure(error) - - # Wait until director has started up (including bootstrapping Tor) - # before adding tests - @defer.inlineCallbacks - def post_director_start(_): - for net_test_loader in deck.netTestLoaders: - # Decks can specify different collectors - # for each net test, so that each NetTest - # may be paired with a test_helper and its collector - # However, a user can override this behavior by - # specifying a collector from the command-line (-c). - # If a collector is not specified in the deck, or the - # deck is a singleton, the default collector set in - # ooniprobe.conf will be used - collector_client = None - if not global_options['no-collector']: - collector_client = setupCollector(global_options, - net_test_loader.collector) - - yield director.startNetTest(net_test_loader, - global_options['reportfile'], - collector_client, - global_options['no-yamloo']) - - d.addCallback(setup_nettest) - d.addCallback(post_director_start) - d.addErrback(director_startup_handled_failures) - d.addErrback(director_startup_other_failures) - return d - -def runWithDirector(global_options): - """ - Instance the director, parse command line options and start an ooniprobe - test! - """ - from ooni.director import Director - start_tor = False - director = Director() - if global_options['list']: - net_tests = [net_test for net_test in director.getNetTests().items()] - print "" - print "Installed nettests" - print "==================" - for net_test_id, net_test in net_tests: - optList = [] - for name, details in net_test['arguments'].items(): - optList.append({'long': name, 'doc': details['description']}) - - desc = ('\n' + - net_test['name'] + - '\n' + - '-'*len(net_test['name']) + - '\n' + - '\n'.join(textwrap.wrap(net_test['description'], 80)) + - '\n\n' + - '$ ooniprobe {}/{}'.format(net_test['category'], - net_test['id']) + - '\n\n' + - ''.join(usage.docMakeChunks(optList)) - ) - print desc - print "Note: Third party tests require an external "\ - "application to run properly." - - sys.exit(0) - - elif global_options['printdeck']: - del global_options['printdeck'] - print "# Copy and paste the lines below into a test deck to run the specified test with the specified arguments" - print yaml.safe_dump([{'options': global_options}]).strip() - - sys.exit(0) - - if global_options.get('annotations') is not None: - global_options['annotations'] = setupAnnotations(global_options) - - if global_options.get('preferred-backend') is not None: - config.advanced.preferred_backend = global_options['preferred-backend'] - - if global_options['no-collector']: - log.msg("Not reporting using a collector") - global_options['collector'] = None - start_tor = False - elif config.advanced.get("preferred_backend", "onion") == "onion": - start_tor = True - - if (global_options['collector'] and - config.advanced.get("preferred_backend", "onion") == "onion"): - start_tor |= True - - return runTestWithDirector(director=director, - start_tor=start_tor, - global_options=global_options) - - -# this variant version of runWithDirector splits the process in two, -# allowing a single director instance to be reused with multiple decks. - -def runWithDaemonDirector(global_options): - """ - Instance the director, parse command line options and start an ooniprobe - test! - """ - from twisted.internet import reactor, protocol - from ooni.director import Director - try: - import pika - from pika import exceptions - from pika.adapters import twisted_connection - except ImportError: - print "Pika is required for queue connection." - print "Install with \"pip install pika\"." - sys.exit(7) - - director = Director() - - if global_options.get('annotations') is not None: - global_options['annotations'] = setupAnnotations(global_options) - - if global_options['no-collector']: - log.msg("Not reporting using a collector") - global_options['collector'] = None - start_tor = False - else: - start_tor = True - - finished = defer.Deferred() - - @defer.inlineCallbacks - def readmsg(_, channel, queue_object, consumer_tag, counter): - - # Wait for a message and decode it. - if counter >= lifetime: - log.msg("Counter") - queue_object.close(LifetimeExceeded()) - yield channel.basic_cancel(consumer_tag=consumer_tag) - finished.callback(None) - - else: - log.msg("Waiting for message") - - try: - ch, method, properties, body = yield queue_object.get() - log.msg("Got message") - data = json.loads(body) - counter += 1 - - log.msg("Received %d/%d: %s" % (counter, lifetime, data['url'],)) - # acknowledge the message - ch.basic_ack(delivery_tag=method.delivery_tag) - - d = runTestWithDirector(director=director, - start_tor=start_tor, - global_options=global_options, - url=data['url'].encode('utf8')) - # When the test has been completed, go back to waiting for a message. - d.addCallback(readmsg, channel, queue_object, consumer_tag, counter+1) - except exceptions.AMQPError,v: - log.msg("Error") - log.exception(v) - finished.errback(v) - - - - @defer.inlineCallbacks - def runQueue(connection, name, qos): - # Set up the queue consumer. When a message is received, run readmsg - channel = yield connection.channel() - yield channel.basic_qos(prefetch_count=qos) - queue_object, consumer_tag = yield channel.basic_consume( - queue=name, - no_ack=False) - readmsg(None, channel, queue_object, consumer_tag, 0) - - - - # Create the AMQP connection. This could be refactored to allow test URLs - # to be submitted through an HTTP server interface or something. - urlp = urlparse.urlparse(config.global_options['queue']) - urlargs = dict(urlparse.parse_qsl(urlp.query)) - - # random lifetime requests counter - lifetime = random.randint(820, 1032) - - # AMQP connection details are sent through the cmdline parameter '-Q' - creds = pika.PlainCredentials(urlp.username or 'guest', - urlp.password or 'guest') - parameters = pika.ConnectionParameters(urlp.hostname, - urlp.port or 5672, - urlp.path.rsplit('/',1)[0] or '/', - creds, - heartbeat_interval=120, - ) - cc = protocol.ClientCreator(reactor, - twisted_connection.TwistedProtocolConnection, - parameters) - d = cc.connectTCP(urlp.hostname, urlp.port or 5672) - d.addCallback(lambda protocol: protocol.ready) - # start the wait/process sequence. - d.addCallback(runQueue, urlp.path.rsplit('/',1)[-1], int(urlargs.get('qos',1))) - - return finished diff --git a/ooni/tests/test_oonicli.py b/ooni/tests/test_oonicli.py index 4a58736..8ca8d0c 100644 --- a/ooni/tests/test_oonicli.py +++ b/ooni/tests/test_oonicli.py @@ -1,17 +1,17 @@ +import exceptions import os import sys -import yaml +import yaml from twisted.internet import defer -import exceptions from ooni import errors +from ooni.settings import config from ooni.tests import is_internet_connected from ooni.tests.bases import ConfigTestCase -from ooni.settings import config -from ooni.oonicli import runWithDirector, setupGlobalOptions -from ooni.oonicli import setupAnnotations, setupCollector -from ooni.oonicli import createDeck +from ooni.ui.cli import createDeck +from ooni.ui.cli import runWithDirector, setupGlobalOptions +from ooni.ui.cli import setupAnnotations, setupCollector from ooni.utils.net import hasRawSocketPermission diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py new file mode 100644 index 0000000..2b402c2 --- /dev/null +++ b/ooni/ui/cli.py @@ -0,0 +1,520 @@ +import sys + +import os +import json +import yaml +import random +import textwrap +import urlparse + +from twisted.python import usage +from twisted.internet import defer + +from ooni import errors, __version__ +from ooni.constants import CANONICAL_BOUNCER_ONION +from ooni.settings import config +from ooni.utils import log + +class LifetimeExceeded(Exception): pass + +class Options(usage.Options): + synopsis = """%s [options] [path to test].py + """ % (os.path.basename(sys.argv[0]),) + + longdesc = ("ooniprobe loads and executes a suite or a set of suites of" + " network tests. These are loaded from modules, packages and" + " files listed on the command line.") + + optFlags = [["help", "h"], + ["no-collector", "n", "Disable writing to collector"], + ["no-yamloo", "N", "Disable writing to YAML file"], + ["no-geoip", "g", "Disable geoip lookup on start"], + ["list", "s", "List the currently installed ooniprobe " + "nettests"], + ["printdeck", "p", "Print the equivalent deck for the " + "provided command"], + ["verbose", "v", "Show more verbose information"] + ] + + optParameters = [ + ["reportfile", "o", None, "Specify the report file name to write to."], + ["testdeck", "i", None, "Specify as input a test deck: a yaml file " + "containing the tests to run and their " + "arguments."], + ["collector", "c", None, "Specify the address of the collector for " + "test results. In most cases a user will " + "prefer to specify a bouncer over this."], + ["bouncer", "b", None, "Specify the bouncer used to " + "obtain the address of the " + "collector and test helpers."], + ["logfile", "l", None, "Write to this logs to this filename."], + ["pcapfile", "O", None, "Write a PCAP of the ooniprobe session to " + "this filename."], + ["configfile", "f", None, "Specify a path to the ooniprobe " + "configuration file."], + ["datadir", "d", None, "Specify a path to the ooniprobe data " + "directory."], + ["annotations", "a", None, "Annotate the report with a key:value[, " + "key:value] format."], + ["preferred-backend", "P", None, "Set the preferred backend to use " + "when submitting results and/or " + "communicating with test helpers. " + "Can be either onion, " + "https or cloudfront"], + ["queue", "Q", None, "AMQP Queue URL amqp://user:pass@host:port/vhost/queue"] + ] + + compData = usage.Completions( + extraActions=[usage.CompleteFiles( + "*.py", descr="file | module | package | TestCase | testMethod", + repeat=True)],) + + tracer = None + + def __init__(self): + usage.Options.__init__(self) + + def getUsage(self, width=None): + return super(Options, self).getUsage(width) + """ +To get started you may want to run: + +$ oonideckgen + +This will tell you how to run ooniprobe :) +""" + + def opt_spew(self): + """ + Print an insanely verbose log of everything that happens. + Useful when debugging freezes or locks in complex code. + """ + from twisted.python.util import spewer + sys.settrace(spewer) + + def opt_version(self): + """ + Display the ooniprobe version and exit. + """ + print "ooniprobe version:", __version__ + sys.exit(0) + + def parseArgs(self, *args): + if self['testdeck'] or self['list']: + return + try: + self['test_file'] = args[0] + self['subargs'] = args[1:] + except: + raise usage.UsageError("No test filename specified!") + + +def parseOptions(): + print "WARNING: running ooniprobe involves some risk that varies greatly" + print " from country to country. You should be aware of this when" + print " running the tool. Read more about this in the manpage or README." + cmd_line_options = Options() + if len(sys.argv) == 1: + cmd_line_options.getUsage() + try: + cmd_line_options.parseOptions() + except usage.UsageError as ue: + print cmd_line_options.getUsage() + raise SystemExit("%s: %s" % (sys.argv[0], ue)) + + return dict(cmd_line_options) + + +def director_startup_handled_failures(failure): + log.err("Could not start the director") + failure.trap(errors.TorNotRunning, + errors.InvalidOONIBCollectorAddress, + errors.UnableToLoadDeckInput, + errors.CouldNotFindTestHelper, + errors.CouldNotFindTestCollector, + errors.ProbeIPUnknown, + errors.InvalidInputFile, + errors.ConfigFileIncoherent) + + if isinstance(failure.value, errors.TorNotRunning): + log.err("Tor does not appear to be running") + log.err("Reporting with a collector is not possible") + log.msg( + "Try with a different collector or disable collector reporting with -n") + + elif isinstance(failure.value, errors.InvalidOONIBCollectorAddress): + log.err("Invalid format for oonib collector address.") + log.msg( + "Should be in the format http://<collector_address>:<port>") + log.msg("for example: ooniprobe -c httpo://nkvphnp3p6agi5qq.onion") + + elif isinstance(failure.value, errors.UnableToLoadDeckInput): + log.err("Unable to fetch the required inputs for the test deck.") + log.msg( + "Please file a ticket on our issue tracker: https://github.com/thetorproject/ooni-probe/issues") + + elif isinstance(failure.value, errors.CouldNotFindTestHelper): + log.err("Unable to obtain the required test helpers.") + log.msg( + "Try with a different bouncer or check that Tor is running properly.") + + elif isinstance(failure.value, errors.CouldNotFindTestCollector): + log.err("Could not find a valid collector.") + log.msg( + "Try with a different bouncer, specify a collector with -c or disable reporting to a collector with -n.") + + elif isinstance(failure.value, errors.ProbeIPUnknown): + log.err("Failed to lookup probe IP address.") + log.msg("Check your internet connection.") + + elif isinstance(failure.value, errors.InvalidInputFile): + log.err("Invalid input file \"%s\"" % failure.value) + + elif isinstance(failure.value, errors.ConfigFileIncoherent): + log.err("Incoherent config file") + + if config.advanced.debug: + log.exception(failure) + +def director_startup_other_failures(failure): + log.err("An unhandled exception occurred while starting the director!") + log.exception(failure) + +def setupGlobalOptions(logging, start_tor, check_incoherences): + global_options = parseOptions() + + config.global_options = global_options + config.set_paths() + config.initialize_ooni_home() + try: + config.read_config_file(check_incoherences=check_incoherences) + except errors.ConfigFileIncoherent: + sys.exit(6) + + if global_options['verbose']: + config.advanced.debug = True + + if not start_tor: + config.advanced.start_tor = False + + if logging: + log.start(global_options['logfile']) + + if config.privacy.includepcap or global_options['pcapfile']: + from ooni.utils.net import hasRawSocketPermission + if hasRawSocketPermission(): + from ooni.utils.txscapy import ScapyFactory + config.scapyFactory = ScapyFactory(config.advanced.interface) + else: + log.err("Insufficient Privileges to capture packets." + " See ooniprobe.conf privacy.includepcap") + sys.exit(2) + global_options['check_incoherences'] = check_incoherences + return global_options + +def setupAnnotations(global_options): + annotations={} + for annotation in global_options["annotations"].split(","): + pair = annotation.split(":") + if len(pair) == 2: + key = pair[0].strip() + value = pair[1].strip() + annotations[key] = value + else: + log.err("Invalid annotation: %s" % annotation) + sys.exit(1) + global_options["annotations"] = annotations + return annotations + +def setupCollector(global_options, collector_client): + from ooni.backend_client import CollectorClient + + if global_options['collector']: + collector_client = CollectorClient(global_options['collector']) + elif config.reports.get('collector', None) is not None: + collector_client = CollectorClient(config.reports['collector']) + if not collector_client.isSupported(): + raise errors.CollectorUnsupported + return collector_client + +def createDeck(global_options, url=None): + from ooni.nettest import NetTestLoader + from ooni.deck import Deck, nettest_to_path + from ooni.backend_client import CollectorClient + + if url: + log.msg("Creating deck for: %s" % (url)) + + if global_options['no-yamloo']: + log.msg("Will not write to a yamloo report file") + + deck = Deck(bouncer=global_options['bouncer'], + no_collector=global_options['no-collector']) + + try: + if global_options['testdeck']: + deck.loadDeck(global_options['testdeck'], global_options) + else: + log.debug("No test deck detected") + test_file = nettest_to_path(global_options['test_file'], True) + if url is not None: + args = ('-u', url) + else: + args = tuple() + if any(global_options['subargs']): + args = global_options['subargs'] + args + net_test_loader = NetTestLoader(args, + test_file=test_file, + annotations=global_options['annotations']) + if global_options['collector']: + net_test_loader.collector = \ + CollectorClient(global_options['collector']) + deck.insert(net_test_loader) + except errors.MissingRequiredOption as option_name: + log.err('Missing required option: "%s"' % option_name) + incomplete_net_test_loader = option_name.net_test_loader + print incomplete_net_test_loader.usageOptions().getUsage() + sys.exit(2) + except errors.NetTestNotFound as path: + log.err('Requested NetTest file not found (%s)' % path) + sys.exit(3) + except errors.OONIUsageError as e: + log.err(e) + print e.net_test_loader.usageOptions().getUsage() + sys.exit(4) + except errors.HTTPSCollectorUnsupported: + log.err("HTTPS collectors require a twisted version of at least 14.0.2.") + sys.exit(6) + except errors.InsecureBackend: + log.err("Attempting to report to an insecure collector.") + log.err("To enable reporting to insecure collector set the " + "advanced->insecure_backend option to true in " + "your ooniprobe.conf file.") + sys.exit(7) + except Exception as e: + if config.advanced.debug: + log.exception(e) + log.err(e) + sys.exit(5) + + return deck + + +def runTestWithDirector(director, global_options, url=None, start_tor=True): + deck = createDeck(global_options, url=url) + + start_tor |= deck.requiresTor + + d = director.start(start_tor=start_tor, + check_incoherences=global_options['check_incoherences']) + + def setup_nettest(_): + try: + return deck.setup() + except errors.UnableToLoadDeckInput as error: + return defer.failure.Failure(error) + except errors.NoReachableTestHelpers as error: + return defer.failure.Failure(error) + except errors.NoReachableCollectors as error: + return defer.failure.Failure(error) + + # Wait until director has started up (including bootstrapping Tor) + # before adding tests + @defer.inlineCallbacks + def post_director_start(_): + for net_test_loader in deck.netTestLoaders: + # Decks can specify different collectors + # for each net test, so that each NetTest + # may be paired with a test_helper and its collector + # However, a user can override this behavior by + # specifying a collector from the command-line (-c). + # If a collector is not specified in the deck, or the + # deck is a singleton, the default collector set in + # ooniprobe.conf will be used + collector_client = None + if not global_options['no-collector']: + collector_client = setupCollector(global_options, + net_test_loader.collector) + + yield director.startNetTest(net_test_loader, + global_options['reportfile'], + collector_client, + global_options['no-yamloo']) + + d.addCallback(setup_nettest) + d.addCallback(post_director_start) + d.addErrback(director_startup_handled_failures) + d.addErrback(director_startup_other_failures) + return d + +def runWithDirector(global_options): + """ + Instance the director, parse command line options and start an ooniprobe + test! + """ + from ooni.director import Director + start_tor = False + director = Director() + if global_options['list']: + net_tests = [net_test for net_test in director.getNetTests().items()] + print "" + print "Installed nettests" + print "==================" + for net_test_id, net_test in net_tests: + optList = [] + for name, details in net_test['arguments'].items(): + optList.append({'long': name, 'doc': details['description']}) + + desc = ('\n' + + net_test['name'] + + '\n' + + '-'*len(net_test['name']) + + '\n' + + '\n'.join(textwrap.wrap(net_test['description'], 80)) + + '\n\n' + + '$ ooniprobe {}/{}'.format(net_test['category'], + net_test['id']) + + '\n\n' + + ''.join(usage.docMakeChunks(optList)) + ) + print desc + print "Note: Third party tests require an external "\ + "application to run properly." + + sys.exit(0) + + elif global_options['printdeck']: + del global_options['printdeck'] + print "# Copy and paste the lines below into a test deck to run the specified test with the specified arguments" + print yaml.safe_dump([{'options': global_options}]).strip() + + sys.exit(0) + + if global_options.get('annotations') is not None: + global_options['annotations'] = setupAnnotations(global_options) + + if global_options.get('preferred-backend') is not None: + config.advanced.preferred_backend = global_options['preferred-backend'] + + if global_options['no-collector']: + log.msg("Not reporting using a collector") + global_options['collector'] = None + start_tor = False + elif config.advanced.get("preferred_backend", "onion") == "onion": + start_tor = True + + if (global_options['collector'] and + config.advanced.get("preferred_backend", "onion") == "onion"): + start_tor |= True + + return runTestWithDirector(director=director, + start_tor=start_tor, + global_options=global_options) + + +# this variant version of runWithDirector splits the process in two, +# allowing a single director instance to be reused with multiple decks. + +def runWithDaemonDirector(global_options): + """ + Instance the director, parse command line options and start an ooniprobe + test! + """ + from twisted.internet import reactor, protocol + from ooni.director import Director + try: + import pika + from pika import exceptions + from pika.adapters import twisted_connection + except ImportError: + print "Pika is required for queue connection." + print "Install with \"pip install pika\"." + sys.exit(7) + + director = Director() + + if global_options.get('annotations') is not None: + global_options['annotations'] = setupAnnotations(global_options) + + if global_options['no-collector']: + log.msg("Not reporting using a collector") + global_options['collector'] = None + start_tor = False + else: + start_tor = True + + finished = defer.Deferred() + + @defer.inlineCallbacks + def readmsg(_, channel, queue_object, consumer_tag, counter): + + # Wait for a message and decode it. + if counter >= lifetime: + log.msg("Counter") + queue_object.close(LifetimeExceeded()) + yield channel.basic_cancel(consumer_tag=consumer_tag) + finished.callback(None) + + else: + log.msg("Waiting for message") + + try: + ch, method, properties, body = yield queue_object.get() + log.msg("Got message") + data = json.loads(body) + counter += 1 + + log.msg("Received %d/%d: %s" % (counter, lifetime, data['url'],)) + # acknowledge the message + ch.basic_ack(delivery_tag=method.delivery_tag) + + d = runTestWithDirector(director=director, + start_tor=start_tor, + global_options=global_options, + url=data['url'].encode('utf8')) + # When the test has been completed, go back to waiting for a message. + d.addCallback(readmsg, channel, queue_object, consumer_tag, counter+1) + except exceptions.AMQPError,v: + log.msg("Error") + log.exception(v) + finished.errback(v) + + + + @defer.inlineCallbacks + def runQueue(connection, name, qos): + # Set up the queue consumer. When a message is received, run readmsg + channel = yield connection.channel() + yield channel.basic_qos(prefetch_count=qos) + queue_object, consumer_tag = yield channel.basic_consume( + queue=name, + no_ack=False) + readmsg(None, channel, queue_object, consumer_tag, 0) + + + + # Create the AMQP connection. This could be refactored to allow test URLs + # to be submitted through an HTTP server interface or something. + urlp = urlparse.urlparse(config.global_options['queue']) + urlargs = dict(urlparse.parse_qsl(urlp.query)) + + # random lifetime requests counter + lifetime = random.randint(820, 1032) + + # AMQP connection details are sent through the cmdline parameter '-Q' + creds = pika.PlainCredentials(urlp.username or 'guest', + urlp.password or 'guest') + parameters = pika.ConnectionParameters(urlp.hostname, + urlp.port or 5672, + urlp.path.rsplit('/',1)[0] or '/', + creds, + heartbeat_interval=120, + ) + cc = protocol.ClientCreator(reactor, + twisted_connection.TwistedProtocolConnection, + parameters) + d = cc.connectTCP(urlp.hostname, urlp.port or 5672) + d.addCallback(lambda protocol: protocol.ready) + # start the wait/process sequence. + d.addCallback(runQueue, urlp.path.rsplit('/',1)[-1], int(urlargs.get('qos',1))) + + return finished [View Less]

1 0

[ooni-probe/master] Import croniter library from: https://github.com/taichino/croniter/commit/194d6f2e1593a2fbbc9b2abf0dcda91242e90ad5
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 8ea58487e6683214464ed0056dc0cf0f4517abe3 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 26 16:53:55 2016 +0200 Import croniter library from: https://github.com/taichino/croniter/commit/194d6f2e1593a2fbbc9b2abf0dcda91… --- ooni/contrib/__init__.py | 0 ooni/contrib/croniter.py | 419 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 419 insertions(+) diff --git a/ooni/contrib/__init__.py b/ooni/contrib/__init__.py new file mode 100644 index … [View More]0000000..e69de29 diff --git a/ooni/contrib/croniter.py b/ooni/contrib/croniter.py new file mode 100644 index 0000000..327326f --- /dev/null +++ b/ooni/contrib/croniter.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import absolute_import, print_function +import re +from time import time +import datetime +from dateutil.relativedelta import relativedelta +from dateutil.tz import tzutc + +search_re = re.compile(r'^([^-]+)-([^-/]+)(/(.*))?$') +only_int_re = re.compile(r'^\d+$') +any_int_re = re.compile(r'^\d+') +star_or_int_re = re.compile(r'^(\d+|\*)$') + +__all__ = ('croniter',) + + +class croniter(object): + MONTHS_IN_YEAR = 12 + RANGES = ( + (0, 59), + (0, 23), + (1, 31), + (1, 12), + (0, 6), + (0, 59) + ) + DAYS = ( + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 + ) + + ALPHACONV = ( + {}, + {}, + {"l": "l"}, + {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6, + 'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12}, + {'sun': 0, 'mon': 1, 'tue': 2, 'wed': 3, 'thu': 4, 'fri': 5, 'sat': 6}, + {} + ) + + LOWMAP = ( + {}, + {}, + {0: 1}, + {0: 1}, + {7: 0}, + {}, + ) + + bad_length = 'Exactly 5 or 6 columns has to be specified for iterator' \ + 'expression.' + + def __init__(self, expr_format, start_time=None, ret_type=float): + self._ret_type = ret_type + if start_time is None: + start_time = time() + + self.tzinfo = None + if isinstance(start_time, datetime.datetime): + self.tzinfo = start_time.tzinfo + start_time = self._datetime_to_timestamp(start_time) + + self.cur = start_time + self.exprs = expr_format.split() + + if len(self.exprs) != 5 and len(self.exprs) != 6: + raise ValueError(self.bad_length) + + expanded = [] + + for i, expr in enumerate(self.exprs): + e_list = expr.split(',') + res = [] + + while len(e_list) > 0: + e = e_list.pop() + t = re.sub(r'^\*(/.+)$', r'%d-%d\1' % ( + self.RANGES[i][0], + self.RANGES[i][1]), + str(e)) + m = search_re.search(t) + + if m: + (low, high, step) = m.group(1), m.group(2), m.group(4) or 1 + + if not any_int_re.search(low): + low = "{0}".format(self.ALPHACONV[i][low.lower()]) + + if not any_int_re.search(high): + high = "{0}".format(self.ALPHACONV[i][high.lower()]) + + if ( + not low or not high or int(low) > int(high) + or not only_int_re.search(str(step)) + ): + raise ValueError( + "[{0}] is not acceptable".format(expr_format)) + + low, high, step = map(int, [low, high, step]) + e_list += range(low, high + 1, step) + # other solution + #try: + # for j in xrange(int(low), int(high) + 1): + # if j % int(step) == 0: + # e_list.append(j) + #except NameError: + # for j in range(int(low), int(high) + 1): + # if j % int(step) == 0: + # e_list.append(j) + else: + if not star_or_int_re.search(t): + t = self.ALPHACONV[i][t.lower()] + + try: + t = int(t) + except: + pass + + if t in self.LOWMAP[i]: + t = self.LOWMAP[i][t] + + if ( + t not in ["*", "l"] + and (int(t) < self.RANGES[i][0] or + int(t) > self.RANGES[i][1]) + ): + raise ValueError( + "[{0}] is not acceptable, out of range".format( + expr_format)) + + res.append(t) + + res.sort() + expanded.append(['*'] if (len(res) == 1 + and res[0] == '*') + else res) + self.expanded = expanded + + def get_next(self, ret_type=None): + return self._get_next(ret_type or self._ret_type, is_prev=False) + + def get_prev(self, ret_type=None): + return self._get_next(ret_type or self._ret_type, is_prev=True) + + def get_current(self, ret_type=None): + ret_type = ret_type or self._ret_type + if ret_type == datetime.datetime: + return self._timestamp_to_datetime(self.cur) + return self.cur + + def _datetime_to_timestamp(self, d): + """ + Converts a `datetime` object `d` into a UNIX timestamp. + """ + if d.tzinfo is not None: + d = d.replace(tzinfo=None) - d.utcoffset() + + return self._timedelta_to_seconds(d - datetime.datetime(1970, 1, 1)) + + def _timestamp_to_datetime(self, timestamp): + """ + Converts a UNIX timestamp `timestamp` into a `datetime` object. + """ + result = datetime.datetime.utcfromtimestamp(timestamp) + if self.tzinfo: + result = result.replace(tzinfo=tzutc()).astimezone(self.tzinfo) + + return result + + @classmethod + def _timedelta_to_seconds(cls, td): + """ + Converts a 'datetime.timedelta' object `td` into seconds contained in + the duration. + Note: We cannot use `timedelta.total_seconds()` because this is not + supported by Python 2.6. + """ + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) \ + / 10**6 + + # iterator protocol, to enable direct use of croniter + # objects in a loop, like "for dt in croniter('5 0 * * *'): ..." + # or for combining multiple croniters into single + # dates feed using 'itertools' module + def __iter__(self): + return self + __next__ = next = get_next + + def all_next(self, ret_type=None): + '''Generator of all consecutive dates. Can be used instead of + implicit call to __iter__, whenever non-default + 'ret_type' has to be specified. + ''' + while True: + yield self._get_next(ret_type or self._ret_type, is_prev=False) + + def all_prev(self, ret_type=None): + '''Generator of all previous dates.''' + while True: + yield self._get_next(ret_type or self._ret_type, is_prev=True) + + iter = all_next # alias, you can call .iter() instead of .all_next() + + def _get_next(self, ret_type=None, is_prev=False): + expanded = self.expanded[:] + + ret_type = ret_type or self._ret_type + + if ret_type not in (float, datetime.datetime): + raise TypeError("Invalid ret_type, only 'float' or 'datetime' " + "is acceptable.") + + if expanded[2][0] != '*' and expanded[4][0] != '*': + bak = expanded[4] + expanded[4] = ['*'] + t1 = self._calc(self.cur, expanded, is_prev) + expanded[4] = bak + expanded[2] = ['*'] + + t2 = self._calc(self.cur, expanded, is_prev) + if not is_prev: + result = t1 if t1 < t2 else t2 + else: + result = t1 if t1 > t2 else t2 + else: + result = self._calc(self.cur, expanded, is_prev) + self.cur = result + + if ret_type == datetime.datetime: + result = self._timestamp_to_datetime(result) + + return result + + def _calc(self, now, expanded, is_prev): + if is_prev: + nearest_diff_method = self._get_prev_nearest_diff + sign = -1 + else: + nearest_diff_method = self._get_next_nearest_diff + sign = 1 + + offset = len(expanded) == 6 and 1 or 60 + dst = now = self._timestamp_to_datetime(now + sign * offset) + + day, month, year = dst.day, dst.month, dst.year + current_year = now.year + DAYS = self.DAYS + + def proc_month(d): + if expanded[3][0] != '*': + diff_month = nearest_diff_method( + d.month, expanded[3], self.MONTHS_IN_YEAR) + days = DAYS[month - 1] + if month == 2 and self.is_leap(year) is True: + days += 1 + + reset_day = 1 + + if diff_month is not None and diff_month != 0: + if is_prev: + d += relativedelta(months=diff_month) + reset_day = DAYS[d.month - 1] + d += relativedelta( + day=reset_day, hour=23, minute=59, second=59) + else: + d += relativedelta(months=diff_month, day=reset_day, + hour=0, minute=0, second=0) + return True, d + return False, d + + def proc_day_of_month(d): + if expanded[2][0] != '*': + days = DAYS[month - 1] + if month == 2 and self.is_leap(year) is True: + days += 1 + if 'l' in expanded[2] and days==d.day: + return False, d + + if is_prev: + days_in_prev_month = DAYS[ + (month - 2) % self.MONTHS_IN_YEAR] + diff_day = nearest_diff_method( + d.day, expanded[2], days_in_prev_month) + else: + diff_day = nearest_diff_method(d.day, expanded[2], days) + + if diff_day is not None and diff_day != 0: + if is_prev: + d += relativedelta( + days=diff_day, hour=23, minute=59, second=59) + else: + d += relativedelta( + days=diff_day, hour=0, minute=0, second=0) + return True, d + return False, d + + def proc_day_of_week(d): + if expanded[4][0] != '*': + diff_day_of_week = nearest_diff_method( + d.isoweekday() % 7, expanded[4], 7) + if diff_day_of_week is not None and diff_day_of_week != 0: + if is_prev: + d += relativedelta(days=diff_day_of_week, + hour=23, minute=59, second=59) + else: + d += relativedelta(days=diff_day_of_week, + hour=0, minute=0, second=0) + return True, d + return False, d + + def proc_hour(d): + if expanded[1][0] != '*': + diff_hour = nearest_diff_method(d.hour, expanded[1], 24) + if diff_hour is not None and diff_hour != 0: + if is_prev: + d += relativedelta( + hours=diff_hour, minute=59, second=59) + else: + d += relativedelta(hours=diff_hour, minute=0, second=0) + return True, d + return False, d + + def proc_minute(d): + if expanded[0][0] != '*': + diff_min = nearest_diff_method(d.minute, expanded[0], 60) + if diff_min is not None and diff_min != 0: + if is_prev: + d += relativedelta(minutes=diff_min, second=59) + else: + d += relativedelta(minutes=diff_min, second=0) + return True, d + return False, d + + def proc_second(d): + if len(expanded) == 6: + if expanded[5][0] != '*': + diff_sec = nearest_diff_method(d.second, expanded[5], 60) + if diff_sec is not None and diff_sec != 0: + d += relativedelta(seconds=diff_sec) + return True, d + else: + d += relativedelta(second=0) + return False, d + + procs = [proc_month, + proc_day_of_month, + proc_day_of_week, + proc_hour, + proc_minute, + proc_second] + + while abs(year - current_year) <= 1: + next = False + for proc in procs: + (changed, dst) = proc(dst) + if changed: + day, month, year = dst.day, dst.month, dst.year + next = True + break + if next: + continue + return self._datetime_to_timestamp(dst.replace(microsecond=0)) + + raise Exception("failed to find prev date") + + def _get_next_nearest(self, x, to_check): + small = [item for item in to_check if item < x] + large = [item for item in to_check if item >= x] + large.extend(small) + return large[0] + + def _get_prev_nearest(self, x, to_check): + small = [item for item in to_check if item <= x] + large = [item for item in to_check if item > x] + small.reverse() + large.reverse() + small.extend(large) + return small[0] + + def _get_next_nearest_diff(self, x, to_check, range_val): + for i, d in enumerate(to_check): + if d == "l": + # if 'l' then it is the last day of month + # => its value of range_val + d = range_val + if d >= x: + return d - x + return to_check[0] - x + range_val + + def _get_prev_nearest_diff(self, x, to_check, range_val): + candidates = to_check[:] + candidates.reverse() + for d in candidates: + if d != 'l' and d <= x: + return d - x + if 'l' in candidates: + return -x + candidate = candidates[0] + for c in candidates: + if c < range_val: + candidate = c + break + + return (candidate - x - range_val) + + def is_leap(self, year): + if year % 400 == 0 or (year % 4 == 0 and year % 100 != 0): + return True + else: + return False + +if __name__ == '__main__': + + base = datetime.datetime(2010, 1, 25) + itr = croniter('0 0 1 * *', base) + n1 = itr.get_next(datetime.datetime) + print(n1) [View Less]

1 0

[ooni-probe/master] Import the needed dateutil modules
by art＠torproject.org 19 Sep '16

19 Sep '16

commit d24f57a7b56baa02a04e912b21a7eeb0d37590b4 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 26 18:31:14 2016 +0200 Import the needed dateutil modules --- ooni/contrib/__init__.py | 1 + ooni/contrib/croniter.py | 4 +- ooni/contrib/dateutil/__init__.py | 0 ooni/contrib/dateutil/relativedelta.py | 539 +++++++++++++ ooni/contrib/dateutil/tz/__init__.py | 4 + ooni/contrib/dateutil/tz/_common.py | 100 +++ ooni/… [View More]contrib/dateutil/tz/tz.py | 1339 ++++++++++++++++++++++++++++++++ ooni/contrib/dateutil/tz/win.py | 354 +++++++++ 8 files changed, 2339 insertions(+), 2 deletions(-) diff --git a/ooni/contrib/__init__.py b/ooni/contrib/__init__.py index e69de29..50b6b54 100644 --- a/ooni/contrib/__init__.py +++ b/ooni/contrib/__init__.py @@ -0,0 +1 @@ +from ._crontab import CronTab diff --git a/ooni/contrib/croniter.py b/ooni/contrib/croniter.py index 327326f..5864603 100644 --- a/ooni/contrib/croniter.py +++ b/ooni/contrib/croniter.py @@ -5,8 +5,8 @@ from __future__ import absolute_import, print_function import re from time import time import datetime -from dateutil.relativedelta import relativedelta -from dateutil.tz import tzutc +from .dateutil.relativedelta import relativedelta +from .dateutil.tz import tzutc search_re = re.compile(r'^([^-]+)-([^-/]+)(/(.*))?$') only_int_re = re.compile(r'^\d+$') diff --git a/ooni/contrib/dateutil/__init__.py b/ooni/contrib/dateutil/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ooni/contrib/dateutil/relativedelta.py b/ooni/contrib/dateutil/relativedelta.py new file mode 100644 index 0000000..c1c9cc0 --- /dev/null +++ b/ooni/contrib/dateutil/relativedelta.py @@ -0,0 +1,539 @@ +# -*- coding: utf-8 -*- +import datetime +import calendar + +import operator +from math import copysign + +from six import integer_types +from warnings import warn + +__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"] + + +class weekday(object): + __slots__ = ["weekday", "n"] + + def __init__(self, weekday, n=None): + self.weekday = weekday + self.n = n + + def __call__(self, n): + if n == self.n: + return self + else: + return self.__class__(self.weekday, n) + + def __eq__(self, other): + try: + if self.weekday != other.weekday or self.n != other.n: + return False + except AttributeError: + return False + return True + + def __repr__(self): + s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday] + if not self.n: + return s + else: + return "%s(%+d)" % (s, self.n) + +MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)]) + + +class relativedelta(object): + """ + The relativedelta type is based on the specification of the excellent + work done by M.-A. Lemburg in his + `mx.DateTime <http://www.egenix.com/files/python/mxDateTime.html>`_ extension. + However, notice that this type does *NOT* implement the same algorithm as + his work. Do *NOT* expect it to behave like mx.DateTime's counterpart. + + There are two different ways to build a relativedelta instance. The + first one is passing it two date/datetime classes:: + + relativedelta(datetime1, datetime2) + + The second one is passing it any number of the following keyword arguments:: + + relativedelta(arg1=x,arg2=y,arg3=z...) + + year, month, day, hour, minute, second, microsecond: + Absolute information (argument is singular); adding or subtracting a + relativedelta with absolute information does not perform an aritmetic + operation, but rather REPLACES the corresponding value in the + original datetime with the value(s) in relativedelta. + + years, months, weeks, days, hours, minutes, seconds, microseconds: + Relative information, may be negative (argument is plural); adding + or subtracting a relativedelta with relative information performs + the corresponding aritmetic operation on the original datetime value + with the information in the relativedelta. + + weekday: + One of the weekday instances (MO, TU, etc). These instances may + receive a parameter N, specifying the Nth weekday, which could + be positive or negative (like MO(+1) or MO(-2). Not specifying + it is the same as specifying +1. You can also use an integer, + where 0=MO. + + leapdays: + Will add given days to the date found, if year is a leap + year, and the date found is post 28 of february. + + yearday, nlyearday: + Set the yearday or the non-leap year day (jump leap days). + These are converted to day/month/leapdays information. + + Here is the behavior of operations with relativedelta: + + 1. Calculate the absolute year, using the 'year' argument, or the + original datetime year, if the argument is not present. + + 2. Add the relative 'years' argument to the absolute year. + + 3. Do steps 1 and 2 for month/months. + + 4. Calculate the absolute day, using the 'day' argument, or the + original datetime day, if the argument is not present. Then, + subtract from the day until it fits in the year and month + found after their operations. + + 5. Add the relative 'days' argument to the absolute day. Notice + that the 'weeks' argument is multiplied by 7 and added to + 'days'. + + 6. Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds, + microsecond/microseconds. + + 7. If the 'weekday' argument is present, calculate the weekday, + with the given (wday, nth) tuple. wday is the index of the + weekday (0-6, 0=Mon), and nth is the number of weeks to add + forward or backward, depending on its signal. Notice that if + the calculated date is already Monday, for example, using + (0, 1) or (0, -1) won't change the day. + """ + + def __init__(self, dt1=None, dt2=None, + years=0, months=0, days=0, leapdays=0, weeks=0, + hours=0, minutes=0, seconds=0, microseconds=0, + year=None, month=None, day=None, weekday=None, + yearday=None, nlyearday=None, + hour=None, minute=None, second=None, microsecond=None): + + # Check for non-integer values in integer-only quantities + if any(x is not None and x != int(x) for x in (years, months)): + raise ValueError("Non-integer years and months are " + "ambiguous and not currently supported.") + + if dt1 and dt2: + # datetime is a subclass of date. So both must be date + if not (isinstance(dt1, datetime.date) and + isinstance(dt2, datetime.date)): + raise TypeError("relativedelta only diffs datetime/date") + + # We allow two dates, or two datetimes, so we coerce them to be + # of the same type + if (isinstance(dt1, datetime.datetime) != + isinstance(dt2, datetime.datetime)): + if not isinstance(dt1, datetime.datetime): + dt1 = datetime.datetime.fromordinal(dt1.toordinal()) + elif not isinstance(dt2, datetime.datetime): + dt2 = datetime.datetime.fromordinal(dt2.toordinal()) + + self.years = 0 + self.months = 0 + self.days = 0 + self.leapdays = 0 + self.hours = 0 + self.minutes = 0 + self.seconds = 0 + self.microseconds = 0 + self.year = None + self.month = None + self.day = None + self.weekday = None + self.hour = None + self.minute = None + self.second = None + self.microsecond = None + self._has_time = 0 + + # Get year / month delta between the two + months = (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month) + self._set_months(months) + + # Remove the year/month delta so the timedelta is just well-defined + # time units (seconds, days and microseconds) + dtm = self.__radd__(dt2) + + # If we've overshot our target, make an adjustment + if dt1 < dt2: + compare = operator.gt + increment = 1 + else: + compare = operator.lt + increment = -1 + + while compare(dt1, dtm): + months += increment + self._set_months(months) + dtm = self.__radd__(dt2) + + # Get the timedelta between the "months-adjusted" date and dt1 + delta = dt1 - dtm + self.seconds = delta.seconds + delta.days * 86400 + self.microseconds = delta.microseconds + else: + # Relative information + self.years = years + self.months = months + self.days = days + weeks * 7 + self.leapdays = leapdays + self.hours = hours + self.minutes = minutes + self.seconds = seconds + self.microseconds = microseconds + + # Absolute information + self.year = year + self.month = month + self.day = day + self.hour = hour + self.minute = minute + self.second = second + self.microsecond = microsecond + + if any(x is not None and int(x) != x + for x in (year, month, day, hour, + minute, second, microsecond)): + # For now we'll deprecate floats - later it'll be an error. + warn("Non-integer value passed as absolute information. " + + "This is not a well-defined condition and will raise " + + "errors in future versions.", DeprecationWarning) + + + if isinstance(weekday, integer_types): + self.weekday = weekdays[weekday] + else: + self.weekday = weekday + + yday = 0 + if nlyearday: + yday = nlyearday + elif yearday: + yday = yearday + if yearday > 59: + self.leapdays = -1 + if yday: + ydayidx = [31, 59, 90, 120, 151, 181, 212, + 243, 273, 304, 334, 366] + for idx, ydays in enumerate(ydayidx): + if yday <= ydays: + self.month = idx+1 + if idx == 0: + self.day = yday + else: + self.day = yday-ydayidx[idx-1] + break + else: + raise ValueError("invalid year day (%d)" % yday) + + self._fix() + + def _fix(self): + if abs(self.microseconds) > 999999: + s = _sign(self.microseconds) + div, mod = divmod(self.microseconds * s, 1000000) + self.microseconds = mod * s + self.seconds += div * s + if abs(self.seconds) > 59: + s = _sign(self.seconds) + div, mod = divmod(self.seconds * s, 60) + self.seconds = mod * s + self.minutes += div * s + if abs(self.minutes) > 59: + s = _sign(self.minutes) + div, mod = divmod(self.minutes * s, 60) + self.minutes = mod * s + self.hours += div * s + if abs(self.hours) > 23: + s = _sign(self.hours) + div, mod = divmod(self.hours * s, 24) + self.hours = mod * s + self.days += div * s + if abs(self.months) > 11: + s = _sign(self.months) + div, mod = divmod(self.months * s, 12) + self.months = mod * s + self.years += div * s + if (self.hours or self.minutes or self.seconds or self.microseconds + or self.hour is not None or self.minute is not None or + self.second is not None or self.microsecond is not None): + self._has_time = 1 + else: + self._has_time = 0 + + @property + def weeks(self): + return self.days // 7 + @weeks.setter + def weeks(self, value): + self.days = self.days - (self.weeks * 7) + value * 7 + + def _set_months(self, months): + self.months = months + if abs(self.months) > 11: + s = _sign(self.months) + div, mod = divmod(self.months * s, 12) + self.months = mod * s + self.years = div * s + else: + self.years = 0 + + def normalized(self): + """ + Return a version of this object represented entirely using integer + values for the relative attributes. + + >>> relativedelta(days=1.5, hours=2).normalized() + relativedelta(days=1, hours=14) + + :return: + Returns a :class:`dateutil.relativedelta.relativedelta` object. + """ + # Cascade remainders down (rounding each to roughly nearest microsecond) + days = int(self.days) + + hours_f = round(self.hours + 24 * (self.days - days), 11) + hours = int(hours_f) + + minutes_f = round(self.minutes + 60 * (hours_f - hours), 10) + minutes = int(minutes_f) + + seconds_f = round(self.seconds + 60 * (minutes_f - minutes), 8) + seconds = int(seconds_f) + + microseconds = round(self.microseconds + 1e6 * (seconds_f - seconds)) + + # Constructor carries overflow back up with call to _fix() + return self.__class__(years=self.years, months=self.months, + days=days, hours=hours, minutes=minutes, + seconds=seconds, microseconds=microseconds, + leapdays=self.leapdays, year=self.year, + month=self.month, day=self.day, + weekday=self.weekday, hour=self.hour, + minute=self.minute, second=self.second, + microsecond=self.microsecond) + + def __add__(self, other): + if isinstance(other, relativedelta): + return self.__class__(years=other.years + self.years, + months=other.months + self.months, + days=other.days + self.days, + hours=other.hours + self.hours, + minutes=other.minutes + self.minutes, + seconds=other.seconds + self.seconds, + microseconds=(other.microseconds + + self.microseconds), + leapdays=other.leapdays or self.leapdays, + year=other.year or self.year, + month=other.month or self.month, + day=other.day or self.day, + weekday=other.weekday or self.weekday, + hour=other.hour or self.hour, + minute=other.minute or self.minute, + second=other.second or self.second, + microsecond=(other.microsecond or + self.microsecond)) + if not isinstance(other, datetime.date): + return NotImplemented + elif self._has_time and not isinstance(other, datetime.datetime): + other = datetime.datetime.fromordinal(other.toordinal()) + year = (self.year or other.year)+self.years + month = self.month or other.month + if self.months: + assert 1 <= abs(self.months) <= 12 + month += self.months + if month > 12: + year += 1 + month -= 12 + elif month < 1: + year -= 1 + month += 12 + day = min(calendar.monthrange(year, month)[1], + self.day or other.day) + repl = {"year": year, "month": month, "day": day} + for attr in ["hour", "minute", "second", "microsecond"]: + value = getattr(self, attr) + if value is not None: + repl[attr] = value + days = self.days + if self.leapdays and month > 2 and calendar.isleap(year): + days += self.leapdays + ret = (other.replace(**repl) + + datetime.timedelta(days=days, + hours=self.hours, + minutes=self.minutes, + seconds=self.seconds, + microseconds=self.microseconds)) + if self.weekday: + weekday, nth = self.weekday.weekday, self.weekday.n or 1 + jumpdays = (abs(nth) - 1) * 7 + if nth > 0: + jumpdays += (7 - ret.weekday() + weekday) % 7 + else: + jumpdays += (ret.weekday() - weekday) % 7 + jumpdays *= -1 + ret += datetime.timedelta(days=jumpdays) + return ret + + def __radd__(self, other): + return self.__add__(other) + + def __rsub__(self, other): + return self.__neg__().__radd__(other) + + def __sub__(self, other): + if not isinstance(other, relativedelta): + return NotImplemented # In case the other object defines __rsub__ + return self.__class__(years=self.years - other.years, + months=self.months - other.months, + days=self.days - other.days, + hours=self.hours - other.hours, + minutes=self.minutes - other.minutes, + seconds=self.seconds - other.seconds, + microseconds=self.microseconds - other.microseconds, + leapdays=self.leapdays or other.leapdays, + year=self.year or other.year, + month=self.month or other.month, + day=self.day or other.day, + weekday=self.weekday or other.weekday, + hour=self.hour or other.hour, + minute=self.minute or other.minute, + second=self.second or other.second, + microsecond=self.microsecond or other.microsecond) + + def __neg__(self): + return self.__class__(years=-self.years, + months=-self.months, + days=-self.days, + hours=-self.hours, + minutes=-self.minutes, + seconds=-self.seconds, + microseconds=-self.microseconds, + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + + def __bool__(self): + return not (not self.years and + not self.months and + not self.days and + not self.hours and + not self.minutes and + not self.seconds and + not self.microseconds and + not self.leapdays and + self.year is None and + self.month is None and + self.day is None and + self.weekday is None and + self.hour is None and + self.minute is None and + self.second is None and + self.microsecond is None) + # Compatibility with Python 2.x + __nonzero__ = __bool__ + + def __mul__(self, other): + try: + f = float(other) + except TypeError: + return NotImplemented + + return self.__class__(years=int(self.years * f), + months=int(self.months * f), + days=int(self.days * f), + hours=int(self.hours * f), + minutes=int(self.minutes * f), + seconds=int(self.seconds * f), + microseconds=int(self.microseconds * f), + leapdays=self.leapdays, + year=self.year, + month=self.month, + day=self.day, + weekday=self.weekday, + hour=self.hour, + minute=self.minute, + second=self.second, + microsecond=self.microsecond) + + __rmul__ = __mul__ + + def __eq__(self, other): + if not isinstance(other, relativedelta): + return NotImplemented + if self.weekday or other.weekday: + if not self.weekday or not other.weekday: + return False + if self.weekday.weekday != other.weekday.weekday: + return False + n1, n2 = self.weekday.n, other.weekday.n + if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)): + return False + return (self.years == other.years and + self.months == other.months and + self.days == other.days and + self.hours == other.hours and + self.minutes == other.minutes and + self.seconds == other.seconds and + self.microseconds == other.microseconds and + self.leapdays == other.leapdays and + self.year == other.year and + self.month == other.month and + self.day == other.day and + self.hour == other.hour and + self.minute == other.minute and + self.second == other.second and + self.microsecond == other.microsecond) + + def __ne__(self, other): + return not self.__eq__(other) + + def __div__(self, other): + try: + reciprocal = 1 / float(other) + except TypeError: + return NotImplemented + + return self.__mul__(reciprocal) + + __truediv__ = __div__ + + def __repr__(self): + l = [] + for attr in ["years", "months", "days", "leapdays", + "hours", "minutes", "seconds", "microseconds"]: + value = getattr(self, attr) + if value: + l.append("{attr}={value:+g}".format(attr=attr, value=value)) + for attr in ["year", "month", "day", "weekday", + "hour", "minute", "second", "microsecond"]: + value = getattr(self, attr) + if value is not None: + l.append("{attr}={value}".format(attr=attr, value=repr(value))) + return "{classname}({attrs})".format(classname=self.__class__.__name__, + attrs=", ".join(l)) + +def _sign(x): + return int(copysign(1, x)) + +# vim:ts=4:sw=4:et diff --git a/ooni/contrib/dateutil/tz/__init__.py b/ooni/contrib/dateutil/tz/__init__.py new file mode 100644 index 0000000..1cba7b9 --- /dev/null +++ b/ooni/contrib/dateutil/tz/__init__.py @@ -0,0 +1,4 @@ +from .tz import * + +__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange", + "tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"] diff --git a/ooni/contrib/dateutil/tz/_common.py b/ooni/contrib/dateutil/tz/_common.py new file mode 100644 index 0000000..6e862a9 --- /dev/null +++ b/ooni/contrib/dateutil/tz/_common.py @@ -0,0 +1,100 @@ +from six import PY3 +from six.moves import _thread + +import datetime +import copy + +__all__ = ['tzname_in_python2'] + +def tzname_in_python2(namefunc): + """Change unicode output into bytestrings in Python 2 + + tzname() API changed in Python 3. It used to return bytes, but was changed + to unicode strings + """ + def adjust_encoding(*args, **kwargs): + name = namefunc(*args, **kwargs) + if name is not None and not PY3: + name = name.encode() + + return name + + return adjust_encoding + + +class _tzinfo(datetime.tzinfo): + """ + Base class for all `dateutil` `tzinfo` objects. + """ + + def __init__(self, *args, **kwargs): + super(_tzinfo, self).__init__(*args, **kwargs) + + self._fold = None + + def _as_fold_naive(self): + tzi = copy.copy(self) + tzi._fold = None + + return tzi + + def _fold_status(self, dt_utc, dt_wall): + """ + Determine the fold status of a "wall" datetime, given a representation + of the same datetime as a (naive) UTC datetime. This is calculated based + on the assumption that ``dt.utcoffset() - dt.dst()`` is constant for all + datetimes, and that this offset is the actual number of hours separating + ``dt_utc`` and ``dt_wall``. + + :param dt_utc: + Representation of the datetime as UTC + + :param dt_wall: + Representation of the datetime as "wall time". This parameter must + either have a `fold` attribute or have a fold-naive + :class:`datetime.tzinfo` attached, otherwise the calculation may + fail. + """ + _fold = getattr(dt_wall, 'fold', None) # PEP 495 + + if _fold is None: + # This is always true on the DST side, but _fold has no meaning + # outside of ambiguous times. + _fold = (dt_wall - dt_utc) != (dt_utc.utcoffset() - dt_utc.dst()) + + return _fold + + def fromutc(self, dt): + """ + Given a timezone-aware datetime in a given timezone, calculates a + timezone-aware datetime in a new timezone. + + Since this is the one time that we *know* we have an unambiguous + datetime object, we take this opportunity to determine whether the + datetime is ambiguous and in a "fold" state (e.g. if it's the first + occurance, chronologically, of the ambiguous datetime). + + .. caution :: + + This creates a stateful ``tzinfo`` object that may not behave as + expected when performing arithmetic on timezone-aware datetimes. + + :param dt: + A timezone-aware :class:`datetime.dateime` object. + """ + # Use a fold-naive version of this tzinfo for calculations + tzi = self._as_fold_naive() + dt = dt.replace(tzinfo=tzi) + + dt_wall = super(_tzinfo, tzi).fromutc(dt) + + # Calculate the fold status given the two datetimes. + _fold = self._fold_status(dt, dt_wall) + + # Set the default fold value for ambiguous dates + if _fold != self._fold: + tzi._fold = _fold + else: + dt_wall = dt_wall.replace(tzinfo=self) + + return dt_wall diff --git a/ooni/contrib/dateutil/tz/tz.py b/ooni/contrib/dateutil/tz/tz.py new file mode 100644 index 0000000..6db3cba --- /dev/null +++ b/ooni/contrib/dateutil/tz/tz.py @@ -0,0 +1,1339 @@ +# -*- coding: utf-8 -*- +""" +This module offers timezone implementations subclassing the abstract +:py:`datetime.tzinfo` type. There are classes to handle tzfile format files +(usually are in :file:`/etc/localtime`, :file:`/usr/share/zoneinfo`, etc), TZ +environment string (in all known formats), given ranges (with help from +relative deltas), local machine timezone, fixed offset timezone, and UTC +timezone. +""" +import datetime +import struct +import time +import sys +import os +import bisect +import copy + +from operator import itemgetter + +from contextlib import contextmanager + +from six import string_types, PY3 +from ._common import tzname_in_python2, _tzinfo + +try: + from .win import tzwin, tzwinlocal +except ImportError: + tzwin = tzwinlocal = None + +ZERO = datetime.timedelta(0) +EPOCH = datetime.datetime.utcfromtimestamp(0) +EPOCHORDINAL = EPOCH.toordinal() + +class tzutc(datetime.tzinfo): + """ + This is a tzinfo object that represents the UTC time zone. + """ + def utcoffset(self, dt): + return ZERO + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return "UTC" + + def __eq__(self, other): + if not isinstance(other, (tzutc, tzoffset)): + return NotImplemented + + return (isinstance(other, tzutc) or + (isinstance(other, tzoffset) and other._offset == ZERO)) + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +class tzoffset(datetime.tzinfo): + """ + A simple class for representing a fixed offset from UTC. + + :param name: + The timezone name, to be returned when ``tzname()`` is called. + + :param offset: + The time zone offset in seconds. + """ + def __init__(self, name, offset): + self._name = name + self._offset = datetime.timedelta(seconds=offset) + + def utcoffset(self, dt): + return self._offset + + def dst(self, dt): + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._name + + def __eq__(self, other): + if not isinstance(other, tzoffset): + return NotImplemented + + return self._offset == other._offset + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s, %s)" % (self.__class__.__name__, + repr(self._name), + int(_total_seconds(self._offset))) + + __reduce__ = object.__reduce__ + + +class tzlocal(_tzinfo): + """ + A :class:`tzinfo` subclass built around the ``time`` timezone functions. + """ + def __init__(self): + super(tzlocal, self).__init__() + + self._std_offset = datetime.timedelta(seconds=-time.timezone) + if time.daylight: + self._dst_offset = datetime.timedelta(seconds=-time.altzone) + else: + self._dst_offset = self._std_offset + + self._dst_saved = self._dst_offset - self._std_offset + self._hasdst = bool(self._dst_saved) + + def utcoffset(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + if dt is None and self._hasdst: + return None + + if self._isdst(dt): + return self._dst_offset - self._std_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return time.tzname[self._isdst(dt)] + + def _isdst(self, dt): + # We can't use mktime here. It is unstable when deciding if + # the hour near to a change is DST or not. + # + # timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour, + # dt.minute, dt.second, dt.weekday(), 0, -1)) + # return time.localtime(timestamp).tm_isdst + # + # The code above yields the following result: + # + # >>> import tz, datetime + # >>> t = tz.tzlocal() + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRST' + # >>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname() + # 'BRDT' + # >>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname() + # 'BRDT' + # + # Here is a more stable implementation: + # + if not self._hasdst: + return False + + dstval = self._naive_is_dst(dt) + + # Check for ambiguous times: + if not dstval and self._fold is not None: + dst_fold_offset = self._naive_is_dst(dt - self._dst_saved) + + if dst_fold_offset: + return self._fold + + return dstval + + def _naive_is_dst(self, dt): + timestamp = _datetime_to_timestamp(dt) + return time.localtime(timestamp + time.timezone).tm_isdst + + def __eq__(self, other): + if not isinstance(other, tzlocal): + return NotImplemented + + return (self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset) + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s()" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +class _ttinfo(object): + __slots__ = ["offset", "delta", "isdst", "abbr", + "isstd", "isgmt", "dstoffset"] + + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, None) + + def __repr__(self): + l = [] + for attr in self.__slots__: + value = getattr(self, attr) + if value is not None: + l.append("%s=%s" % (attr, repr(value))) + return "%s(%s)" % (self.__class__.__name__, ", ".join(l)) + + def __eq__(self, other): + if not isinstance(other, _ttinfo): + return NotImplemented + + return (self.offset == other.offset and + self.delta == other.delta and + self.isdst == other.isdst and + self.abbr == other.abbr and + self.isstd == other.isstd and + self.isgmt == other.isgmt and + self.dstoffset == other.dstoffset) + + def __ne__(self, other): + return not (self == other) + + def __getstate__(self): + state = {} + for name in self.__slots__: + state[name] = getattr(self, name, None) + return state + + def __setstate__(self, state): + for name in self.__slots__: + if name in state: + setattr(self, name, state[name]) + + +class _tzfile(object): + """ + Lightweight class for holding the relevant transition and time zone + information read from binary tzfiles. + """ + attrs = ['trans_list', 'trans_idx', 'ttinfo_list', + 'ttinfo_std', 'ttinfo_dst', 'ttinfo_before', 'ttinfo_first'] + + def __init__(self, **kwargs): + for attr in self.attrs: + setattr(self, attr, kwargs.get(attr, None)) + + +class tzfile(_tzinfo): + """ + This is a ``tzinfo`` subclass that allows one to use the ``tzfile(5)`` + format timezone files to extract current and historical zone information. + + :param fileobj: + This can be an opened file stream or a file name that the time zone + information can be read from. + + :param filename: + This is an optional parameter specifying the source of the time zone + information in the event that ``fileobj`` is a file object. If omitted + and ``fileobj`` is a file stream, this parameter will be set either to + ``fileobj``'s ``name`` attribute or to ``repr(fileobj)``. + + See `Sources for Time Zone and Daylight Saving Time Data + <http://www.twinsun.com/tz/tz-link.htm>`_ for more information. Time zone + files can be compiled from the `IANA Time Zone database files + <https://www.iana.org/time-zones>`_ with the `zic time zone compiler + <https://www.freebsd.org/cgi/man.cgi?query=zic&sektion=8>`_ + """ + + def __init__(self, fileobj, filename=None): + super(tzfile, self).__init__() + + file_opened_here = False + if isinstance(fileobj, string_types): + self._filename = fileobj + fileobj = open(fileobj, 'rb') + file_opened_here = True + elif filename is not None: + self._filename = filename + elif hasattr(fileobj, "name"): + self._filename = fileobj.name + else: + self._filename = repr(fileobj) + + if fileobj is not None: + if not file_opened_here: + fileobj = _ContextWrapper(fileobj) + + with fileobj as file_stream: + tzobj = self._read_tzfile(file_stream) + + self._set_tzdata(tzobj) + + def _set_tzdata(self, tzobj): + """ Set the time zone data of this object from a _tzfile object """ + # Copy the relevant attributes over as private attributes + for attr in _tzfile.attrs: + setattr(self, '_' + attr, getattr(tzobj, attr)) + + def _read_tzfile(self, fileobj): + out = _tzfile() + + # From tzfile(5): + # + # The time zone information files used by tzset(3) + # begin with the magic characters "TZif" to identify + # them as time zone information files, followed by + # sixteen bytes reserved for future use, followed by + # six four-byte values of type long, written in a + # ``standard'' byte order (the high-order byte + # of the value is written first). + if fileobj.read(4).decode() != "TZif": + raise ValueError("magic not found") + + fileobj.read(16) + + ( + # The number of UTC/local indicators stored in the file. + ttisgmtcnt, + + # The number of standard/wall indicators stored in the file. + ttisstdcnt, + + # The number of leap seconds for which data is + # stored in the file. + leapcnt, + + # The number of "transition times" for which data + # is stored in the file. + timecnt, + + # The number of "local time types" for which data + # is stored in the file (must not be zero). + typecnt, + + # The number of characters of "time zone + # abbreviation strings" stored in the file. + charcnt, + + ) = struct.unpack(">6l", fileobj.read(24)) + + # The above header is followed by tzh_timecnt four-byte + # values of type long, sorted in ascending order. + # These values are written in ``standard'' byte order. + # Each is used as a transition time (as returned by + # time(2)) at which the rules for computing local time + # change. + + if timecnt: + out.trans_list = list(struct.unpack(">%dl" % timecnt, + fileobj.read(timecnt*4))) + else: + out.trans_list = [] + + # Next come tzh_timecnt one-byte values of type unsigned + # char; each one tells which of the different types of + # ``local time'' types described in the file is associated + # with the same-indexed transition time. These values + # serve as indices into an array of ttinfo structures that + # appears next in the file. + + if timecnt: + out.trans_idx = struct.unpack(">%dB" % timecnt, + fileobj.read(timecnt)) + else: + out.trans_idx = [] + + # Each ttinfo structure is written as a four-byte value + # for tt_gmtoff of type long, in a standard byte + # order, followed by a one-byte value for tt_isdst + # and a one-byte value for tt_abbrind. In each + # structure, tt_gmtoff gives the number of + # seconds to be added to UTC, tt_isdst tells whether + # tm_isdst should be set by localtime(3), and + # tt_abbrind serves as an index into the array of + # time zone abbreviation characters that follow the + # ttinfo structure(s) in the file. + + ttinfo = [] + + for i in range(typecnt): + ttinfo.append(struct.unpack(">lbb", fileobj.read(6))) + + abbr = fileobj.read(charcnt).decode() + + # Then there are tzh_leapcnt pairs of four-byte + # values, written in standard byte order; the + # first value of each pair gives the time (as + # returned by time(2)) at which a leap second + # occurs; the second gives the total number of + # leap seconds to be applied after the given time. + # The pairs of values are sorted in ascending order + # by time. + + # Not used, for now (but read anyway for correct file position) + if leapcnt: + leap = struct.unpack(">%dl" % (leapcnt*2), + fileobj.read(leapcnt*8)) + + # Then there are tzh_ttisstdcnt standard/wall + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as standard + # time or wall clock time, and are used when + # a time zone file is used in handling POSIX-style + # time zone environment variables. + + if ttisstdcnt: + isstd = struct.unpack(">%db" % ttisstdcnt, + fileobj.read(ttisstdcnt)) + + # Finally, there are tzh_ttisgmtcnt UTC/local + # indicators, each stored as a one-byte value; + # they tell whether the transition times associated + # with local time types were specified as UTC or + # local time, and are used when a time zone file + # is used in handling POSIX-style time zone envi- + # ronment variables. + + if ttisgmtcnt: + isgmt = struct.unpack(">%db" % ttisgmtcnt, + fileobj.read(ttisgmtcnt)) + + # Build ttinfo list + out.ttinfo_list = [] + for i in range(typecnt): + gmtoff, isdst, abbrind = ttinfo[i] + # Round to full-minutes if that's not the case. Python's + # datetime doesn't accept sub-minute timezones. Check + # http://python.org/sf/1447945 for some information. + gmtoff = 60 * ((gmtoff + 30) // 60) + tti = _ttinfo() + tti.offset = gmtoff + tti.dstoffset = datetime.timedelta(0) + tti.delta = datetime.timedelta(seconds=gmtoff) + tti.isdst = isdst + tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)] + tti.isstd = (ttisstdcnt > i and isstd[i] != 0) + tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0) + out.ttinfo_list.append(tti) + + # Replace ttinfo indexes for ttinfo objects. + out.trans_idx = [out.ttinfo_list[idx] for idx in out.trans_idx] + + # Set standard, dst, and before ttinfos. before will be + # used when a given time is before any transitions, + # and will be set to the first non-dst ttinfo, or to + # the first dst, if all of them are dst. + out.ttinfo_std = None + out.ttinfo_dst = None + out.ttinfo_before = None + if out.ttinfo_list: + if not out.trans_list: + out.ttinfo_std = out.ttinfo_first = out.ttinfo_list[0] + else: + for i in range(timecnt-1, -1, -1): + tti = out.trans_idx[i] + if not out.ttinfo_std and not tti.isdst: + out.ttinfo_std = tti + elif not out.ttinfo_dst and tti.isdst: + out.ttinfo_dst = tti + + if out.ttinfo_std and out.ttinfo_dst: + break + else: + if out.ttinfo_dst and not out.ttinfo_std: + out.ttinfo_std = out.ttinfo_dst + + for tti in out.ttinfo_list: + if not tti.isdst: + out.ttinfo_before = tti + break + else: + out.ttinfo_before = out.ttinfo_list[0] + + # Now fix transition times to become relative to wall time. + # + # I'm not sure about this. In my tests, the tz source file + # is setup to wall time, and in the binary file isstd and + # isgmt are off, so it should be in wall time. OTOH, it's + # always in gmt time. Let me know if you have comments + # about this. + laststdoffset = None + for i, tti in enumerate(out.trans_idx): + if not tti.isdst: + offset = tti.offset + laststdoffset = offset + else: + if laststdoffset is not None: + # Store the DST offset as well and update it in the list + tti.dstoffset = tti.offset - laststdoffset + out.trans_idx[i] = tti + + offset = laststdoffset or 0 + + out.trans_list[i] += offset + + # In case we missed any DST offsets on the way in for some reason, make + # a second pass over the list, looking for the /next/ DST offset. + laststdoffset = None + for i in reversed(range(len(out.trans_idx))): + tti = out.trans_idx[i] + if tti.isdst: + if not (tti.dstoffset or laststdoffset is None): + tti.dstoffset = tti.offset - laststdoffset + else: + laststdoffset = tti.offset + + if not isinstance(tti.dstoffset, datetime.timedelta): + tti.dstoffset = datetime.timedelta(seconds=tti.dstoffset) + + out.trans_idx[i] = tti + + out.trans_idx = tuple(out.trans_idx) + out.trans_list = tuple(out.trans_list) + + return out + + def _find_last_transition(self, dt): + # If there's no list, there are no transitions to find + if not self._trans_list: + return None + + timestamp = _datetime_to_timestamp(dt) + + # Find where the timestamp fits in the transition list - if the + # timestamp is a transition time, it's part of the "after" period. + idx = bisect.bisect_right(self._trans_list, timestamp) + + # We want to know when the previous transition was, so subtract off 1 + return idx - 1 + + def _get_ttinfo(self, idx): + # For no list or after the last transition, default to _ttinfo_std + if idx is None or (idx + 1) == len(self._trans_list): + return self._ttinfo_std + + # If there is a list and the time is before it, return _ttinfo_before + if idx < 0: + return self._ttinfo_before + + return self._trans_idx[idx] + + def _find_ttinfo(self, dt): + idx = self._resolve_ambiguous_time(dt) + + return self._get_ttinfo(idx) + + def _resolve_ambiguous_time(self, dt, idx=None): + if idx is None: + idx = self._find_last_transition(dt) + + # If we're fold-naive or we have no transitions, return the index. + if self._fold is None or idx is None: + return idx + + timestamp = _datetime_to_timestamp(dt) + tti = self._get_ttinfo(idx) + + if idx > 0: + # Calculate the difference in offsets from the current to previous + od = self._get_ttinfo(idx - 1).offset - tti.offset + tt = self._trans_list[idx] # Transition time + + if timestamp < tt + od: + if self._fold: + return idx - 1 + else: + return idx + + if idx < len(self._trans_list): + # Calculate the difference in offsets from the previous to current + od = self._get_ttinfo(idx + 1).offset - tti.offset + tt = self._trans_list[idx + 1] + + if timestamp > tt - od: + if self._fold: + return idx + 1 + else: + return idx + + return idx + + def utcoffset(self, dt): + if dt is None: + return None + + if not self._ttinfo_std: + return ZERO + + return self._find_ttinfo(dt).delta + + def dst(self, dt): + if not self._ttinfo_dst: + return ZERO + + tti = self._find_ttinfo(dt) + + if not tti.isdst: + return ZERO + + # The documentation says that utcoffset()-dst() must + # be constant for every dt. + return tti.dstoffset + + @tzname_in_python2 + def tzname(self, dt): + if not self._ttinfo_std: + return None + return self._find_ttinfo(dt).abbr + + def __eq__(self, other): + if not isinstance(other, tzfile): + return NotImplemented + return (self._trans_list == other._trans_list and + self._trans_idx == other._trans_idx and + self._ttinfo_list == other._ttinfo_list) + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._filename)) + + def __reduce__(self): + return self.__reduce_ex__(None) + + def __reduce_ex__(self, protocol): + return (self.__class__, (None, self._filename), self.__dict__) + + +class tzrange(_tzinfo): + """ + The ``tzrange`` object is a time zone specified by a set of offsets and + abbreviations, equivalent to the way the ``TZ`` variable can be specified + in POSIX-like systems, but using Python delta objects to specify DST + start, end and offsets. + + :param stdabbr: + The abbreviation for standard time (e.g. ``'EST'``). + + :param stdoffset: + An integer or :class:`datetime.timedelta` object or equivalent + specifying the base offset from UTC. + + If unspecified, +00:00 is used. + + :param dstabbr: + The abbreviation for DST / "Summer" time (e.g. ``'EDT'``). + + If specified, with no other DST information, DST is assumed to occur + and the default behavior or ``dstoffset``, ``start`` and ``end`` is + used. If unspecified and no other DST information is specified, it + is assumed that this zone has no DST. + + If this is unspecified and other DST information is *is* specified, + DST occurs in the zone but the time zone abbreviation is left + unchanged. + + :param dstoffset: + A an integer or :class:`datetime.timedelta` object or equivalent + specifying the UTC offset during DST. If unspecified and any other DST + information is specified, it is assumed to be the STD offset +1 hour. + + :param start: + A :class:`relativedelta.relativedelta` object or equivalent specifying + the time and time of year that daylight savings time starts. To specify, + for example, that DST starts at 2AM on the 2nd Sunday in March, pass: + + ``relativedelta(hours=2, month=3, day=1, weekday=SU(+2))`` + + If unspecified and any other DST information is specified, the default + value is 2 AM on the first Sunday in April. + + :param end: + A :class:`relativedelta.relativedelta` object or equivalent representing + the time and time of year that daylight savings time ends, with the + same specification method as in ``start``. One note is that this should + point to the first time in the *standard* zone, so if a transition + occurs at 2AM in the DST zone and the clocks are set back 1 hour to 1AM, + set the `hours` parameter to +1. + + + **Examples:** + + .. testsetup:: tzrange + + from dateutil.tz import tzrange, tzstr + + .. doctest:: tzrange + + >>> tzstr('EST5EDT') == tzrange("EST", -18000, "EDT") + True + + >>> from dateutil.relativedelta import * + >>> range1 = tzrange("EST", -18000, "EDT") + >>> range2 = tzrange("EST", -18000, "EDT", -14400, + ... relativedelta(hours=+2, month=4, day=1, + ... weekday=SU(+1)), + ... relativedelta(hours=+1, month=10, day=31, + ... weekday=SU(-1))) + >>> tzstr('EST5EDT') == range1 == range2 + True + + """ + def __init__(self, stdabbr, stdoffset=None, + dstabbr=None, dstoffset=None, + start=None, end=None): + super(tzrange, self).__init__() + + global relativedelta + from dateutil import relativedelta + + self._std_abbr = stdabbr + self._dst_abbr = dstabbr + + try: + stdoffset = _total_seconds(stdoffset) + except (TypeError, AttributeError): + pass + + try: + dstoffset = _total_seconds(dstoffset) + except (TypeError, AttributeError): + pass + + if stdoffset is not None: + self._std_offset = datetime.timedelta(seconds=stdoffset) + else: + self._std_offset = ZERO + + if dstoffset is not None: + self._dst_offset = datetime.timedelta(seconds=dstoffset) + elif dstabbr and stdoffset is not None: + self._dst_offset = self._std_offset+datetime.timedelta(hours=+1) + else: + self._dst_offset = ZERO + + if dstabbr and start is None: + self._start_delta = relativedelta.relativedelta( + hours=+2, month=4, day=1, weekday=relativedelta.SU(+1)) + else: + self._start_delta = start + + if dstabbr and end is None: + self._end_delta = relativedelta.relativedelta( + hours=+1, month=10, day=31, weekday=relativedelta.SU(-1)) + else: + self._end_delta = end + + self._dst_base_offset = self._dst_offset - self._std_offset + + def utcoffset(self, dt): + if dt is None: + return None + + if self._isdst(dt): + return self._dst_offset + else: + return self._std_offset + + def dst(self, dt): + if self._isdst(dt): + return self._dst_offset - self._std_offset + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + if self._isdst(dt): + return self._dst_abbr + else: + return self._std_abbr + + def _isdst(self, dt): + transitions = self._transitions(dt.year) + + if transitions is None: + return False + + start, end = transitions + + dt = dt.replace(tzinfo=None) + + # Handle ambiguous dates + if self._fold is not None: + if end <= dt < end + self._dst_base_offset: + return self._fold + + if start < end: + return start <= dt < end + else: + return not end <= dt < start + + def _transitions(self, year): + if not self._start_delta: + return None + + base_year = datetime.datetime(year, 1, 1) + + start = base_year + self._start_delta + end = base_year + self._end_delta + + return (start, end) + + def __eq__(self, other): + if not isinstance(other, tzrange): + return NotImplemented + + return (self._std_abbr == other._std_abbr and + self._dst_abbr == other._dst_abbr and + self._std_offset == other._std_offset and + self._dst_offset == other._dst_offset and + self._start_delta == other._start_delta and + self._end_delta == other._end_delta) + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return "%s(...)" % self.__class__.__name__ + + __reduce__ = object.__reduce__ + + +class tzstr(tzrange): + """ + ``tzstr`` objects are time zone objects specified by a time-zone string as + it would be passed to a ``TZ`` variable on POSIX-style systems (see + the `GNU C Library: TZ Variable`_ for more details). + + There is one notable exception, which is that POSIX-style time zones use an + inverted offset format, so normally ``GMT+3`` would be parsed as an offset + 3 hours *behind* GMT. The ``tzstr`` time zone object will parse this as an + offset 3 hours *ahead* of GMT. If you would like to maintain the POSIX + behavior, pass a ``True`` value to ``posix_offset``. + + The :class:`tzrange` object provides the same functionality, but is + specified using :class:`relativedelta.relativedelta` objects. rather than + strings. + + :param s: + A time zone string in ``TZ`` variable format. This can be a + :class:`bytes` (2.x: :class:`str`), :class:`str` (2.x: :class:`unicode`) + or a stream emitting unicode characters (e.g. :class:`StringIO`). + + :param posix_offset: + Optional. If set to ``True``, interpret strings such as ``GMT+3`` or + ``UTC+3`` as being 3 hours *behind* UTC rather than ahead, per the + POSIX standard. + + .. _`GNU C Library: TZ Variable`: + https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + """ + def __init__(self, s, posix_offset=False): + global parser + from dateutil import parser + + self._s = s + + res = parser._parsetz(s) + if res is None: + raise ValueError("unknown string format") + + # Here we break the compatibility with the TZ variable handling. + # GMT-3 actually *means* the timezone -3. + if res.stdabbr in ("GMT", "UTC") and not posix_offset: + res.stdoffset *= -1 + + # We must initialize it first, since _delta() needs + # _std_offset and _dst_offset set. Use False in start/end + # to avoid building it two times. + tzrange.__init__(self, res.stdabbr, res.stdoffset, + res.dstabbr, res.dstoffset, + start=False, end=False) + + if not res.dstabbr: + self._start_delta = None + self._end_delta = None + else: + self._start_delta = self._delta(res.start) + if self._start_delta: + self._end_delta = self._delta(res.end, isend=1) + + def _delta(self, x, isend=0): + kwargs = {} + if x.month is not None: + kwargs["month"] = x.month + if x.weekday is not None: + kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week) + if x.week > 0: + kwargs["day"] = 1 + else: + kwargs["day"] = 31 + elif x.day: + kwargs["day"] = x.day + elif x.yday is not None: + kwargs["yearday"] = x.yday + elif x.jyday is not None: + kwargs["nlyearday"] = x.jyday + if not kwargs: + # Default is to start on first sunday of april, and end + # on last sunday of october. + if not isend: + kwargs["month"] = 4 + kwargs["day"] = 1 + kwargs["weekday"] = relativedelta.SU(+1) + else: + kwargs["month"] = 10 + kwargs["day"] = 31 + kwargs["weekday"] = relativedelta.SU(-1) + if x.time is not None: + kwargs["seconds"] = x.time + else: + # Default is 2AM. + kwargs["seconds"] = 7200 + if isend: + # Convert to standard time, to follow the documented way + # of working with the extra hour. See the documentation + # of the tzinfo class. + delta = self._dst_offset - self._std_offset + kwargs["seconds"] -= delta.seconds + delta.days * 86400 + return relativedelta.relativedelta(**kwargs) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + + +class _tzicalvtzcomp(object): + def __init__(self, tzoffsetfrom, tzoffsetto, isdst, + tzname=None, rrule=None): + self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom) + self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto) + self.tzoffsetdiff = self.tzoffsetto - self.tzoffsetfrom + self.isdst = isdst + self.tzname = tzname + self.rrule = rrule + + +class _tzicalvtz(_tzinfo): + def __init__(self, tzid, comps=[]): + super(_tzicalvtz, self).__init__() + + self._tzid = tzid + self._comps = comps + self._cachedate = [] + self._cachecomp = [] + + def _find_comp(self, dt): + if len(self._comps) == 1: + return self._comps[0] + + dt = dt.replace(tzinfo=None) + + try: + return self._cachecomp[self._cachedate.index((dt, self._fold))] + except ValueError: + pass + + + lastcompdt = None + lastcomp = None + + for comp in self._comps: + compdt = self._find_compdt(comp, dt) + + if compdt and (not lastcompdt or lastcompdt < compdt): + lastcompdt = compdt + lastcomp = comp + + if not lastcomp: + # RFC says nothing about what to do when a given + # time is before the first onset date. We'll look for the + # first standard component, or the first component, if + # none is found. + for comp in self._comps: + if not comp.isdst: + lastcomp = comp + break + else: + lastcomp = comp[0] + + self._cachedate.insert(0, (dt, self._fold)) + self._cachecomp.insert(0, lastcomp) + + if len(self._cachedate) > 10: + self._cachedate.pop() + self._cachecomp.pop() + + return lastcomp + + def _find_compdt(self, comp, dt): + if comp.tzoffsetdiff < ZERO and not self._fold: + dt -= comp.tzoffsetdiff + + compdt = comp.rrule.before(dt, inc=True) + + return compdt + + def utcoffset(self, dt): + if dt is None: + return None + + return self._find_comp(dt).tzoffsetto + + def dst(self, dt): + comp = self._find_comp(dt) + if comp.isdst: + return comp.tzoffsetdiff + else: + return ZERO + + @tzname_in_python2 + def tzname(self, dt): + return self._find_comp(dt).tzname + + def __repr__(self): + return "<tzicalvtz %s>" % repr(self._tzid) + + __reduce__ = object.__reduce__ + + +class tzical(object): + """ + This object is designed to parse an iCalendar-style ``VTIMEZONE`` structure + as set out in `RFC 2445`_ Section 4.6.5 into one or more `tzinfo` objects. + + :param `fileobj`: + A file or stream in iCalendar format, which should be UTF-8 encoded + with CRLF endings. + + .. _`RFC 2445`: https://www.ietf.org/rfc/rfc2445.txt + """ + def __init__(self, fileobj): + global rrule + from dateutil import rrule + + if isinstance(fileobj, string_types): + self._s = fileobj + # ical should be encoded in UTF-8 with CRLF + fileobj = open(fileobj, 'r') + file_opened_here = True + else: + self._s = getattr(fileobj, 'name', repr(fileobj)) + fileobj = _ContextWrapper(fileobj) + + self._vtz = {} + + with fileobj as fobj: + self._parse_rfc(fobj.read()) + + def keys(self): + """ + Retrieves the available time zones as a list. + """ + return list(self._vtz.keys()) + + def get(self, tzid=None): + """ + Retrieve a :py:class:`datetime.tzinfo` object by its ``tzid``. + + :param tzid: + If there is exactly one time zone available, omitting ``tzid`` + or passing :py:const:`None` value returns it. Otherwise a valid + key (which can be retrieved from :func:`keys`) is required. + + :raises ValueError: + Raised if ``tzid`` is not specified but there are either more + or fewer than 1 zone defined. + + :returns: + Returns either a :py:class:`datetime.tzinfo` object representing + the relevant time zone or :py:const:`None` if the ``tzid`` was + not found. + """ + if tzid is None: + if len(self._vtz) == 0: + raise ValueError("no timezones defined") + elif len(self._vtz) > 1: + raise ValueError("more than one timezone available") + tzid = next(iter(self._vtz)) + + return self._vtz.get(tzid) + + def _parse_offset(self, s): + s = s.strip() + if not s: + raise ValueError("empty offset") + if s[0] in ('+', '-'): + signal = (-1, +1)[s[0] == '+'] + s = s[1:] + else: + signal = +1 + if len(s) == 4: + return (int(s[:2]) * 3600 + int(s[2:]) * 60) * signal + elif len(s) == 6: + return (int(s[:2]) * 3600 + int(s[2:4]) * 60 + int(s[4:])) * signal + else: + raise ValueError("invalid offset: " + s) + + def _parse_rfc(self, s): + lines = s.splitlines() + if not lines: + raise ValueError("empty string") + + # Unfold + i = 0 + while i < len(lines): + line = lines[i].rstrip() + if not line: + del lines[i] + elif i > 0 and line[0] == " ": + lines[i-1] += line[1:] + del lines[i] + else: + i += 1 + + tzid = None + comps = [] + invtz = False + comptype = None + for line in lines: + if not line: + continue + name, value = line.split(':', 1) + parms = name.split(';') + if not parms: + raise ValueError("empty property name") + name = parms[0].upper() + parms = parms[1:] + if invtz: + if name == "BEGIN": + if value in ("STANDARD", "DAYLIGHT"): + # Process component + pass + else: + raise ValueError("unknown component: "+value) + comptype = value + founddtstart = False + tzoffsetfrom = None + tzoffsetto = None + rrulelines = [] + tzname = None + elif name == "END": + if value == "VTIMEZONE": + if comptype: + raise ValueError("component not closed: "+comptype) + if not tzid: + raise ValueError("mandatory TZID not found") + if not comps: + raise ValueError( + "at least one component is needed") + # Process vtimezone + self._vtz[tzid] = _tzicalvtz(tzid, comps) + invtz = False + elif value == comptype: + if not founddtstart: + raise ValueError("mandatory DTSTART not found") + if tzoffsetfrom is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + if tzoffsetto is None: + raise ValueError( + "mandatory TZOFFSETFROM not found") + # Process component + rr = None + if rrulelines: + rr = rrule.rrulestr("\n".join(rrulelines), + compatible=True, + ignoretz=True, + cache=True) + comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto, + (comptype == "DAYLIGHT"), + tzname, rr) + comps.append(comp) + comptype = None + else: + raise ValueError("invalid component end: "+value) + elif comptype: + if name == "DTSTART": + rrulelines.append(line) + founddtstart = True + elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"): + rrulelines.append(line) + elif name == "TZOFFSETFROM": + if parms: + raise ValueError( + "unsupported %s parm: %s " % (name, parms[0])) + tzoffsetfrom = self._parse_offset(value) + elif name == "TZOFFSETTO": + if parms: + raise ValueError( + "unsupported TZOFFSETTO parm: "+parms[0]) + tzoffsetto = self._parse_offset(value) + elif name == "TZNAME": + if parms: + raise ValueError( + "unsupported TZNAME parm: "+parms[0]) + tzname = value + elif name == "COMMENT": + pass + else: + raise ValueError("unsupported property: "+name) + else: + if name == "TZID": + if parms: + raise ValueError( + "unsupported TZID parm: "+parms[0]) + tzid = value + elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"): + pass + else: + raise ValueError("unsupported property: "+name) + elif name == "BEGIN" and value == "VTIMEZONE": + tzid = None + comps = [] + invtz = True + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self._s)) + +if sys.platform != "win32": + TZFILES = ["/etc/localtime", "localtime"] + TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"] +else: + TZFILES = [] + TZPATHS = [] + + +def gettz(name=None): + tz = None + if not name: + try: + name = os.environ["TZ"] + except KeyError: + pass + if name is None or name == ":": + for filepath in TZFILES: + if not os.path.isabs(filepath): + filename = filepath + for path in TZPATHS: + filepath = os.path.join(path, filename) + if os.path.isfile(filepath): + break + else: + continue + if os.path.isfile(filepath): + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = tzlocal() + else: + if name.startswith(":"): + name = name[:-1] + if os.path.isabs(name): + if os.path.isfile(name): + tz = tzfile(name) + else: + tz = None + else: + for path in TZPATHS: + filepath = os.path.join(path, name) + if not os.path.isfile(filepath): + filepath = filepath.replace(' ', '_') + if not os.path.isfile(filepath): + continue + try: + tz = tzfile(filepath) + break + except (IOError, OSError, ValueError): + pass + else: + tz = None + if tzwin is not None: + try: + tz = tzwin(name) + except WindowsError: + tz = None + if not tz: + from dateutil.zoneinfo import gettz + tz = gettz(name) + if not tz: + for c in name: + # name must have at least one offset to be a tzstr + if c in "0123456789": + try: + tz = tzstr(name) + except ValueError: + pass + break + else: + if name in ("GMT", "UTC"): + tz = tzutc() + elif name in time.tzname: + tz = tzlocal() + return tz + +def _total_seconds(td): + # Python 2.6 doesn't have a total_seconds() method on timedelta objects + return ((td.seconds + td.days * 86400) * 1000000 + + td.microseconds) // 1000000 + +_total_seconds = getattr(datetime.timedelta, 'total_seconds', _total_seconds) + +def _datetime_to_timestamp(dt): + """ + Convert a :class:`datetime.datetime` object to an epoch timestamp in seconds + since January 1, 1970, ignoring the time zone. + """ + return _total_seconds((dt.replace(tzinfo=None) - EPOCH)) + +class _ContextWrapper(object): + """ + Class for wrapping contexts so that they are passed through in a + with statement. + """ + def __init__(self, context): + self.context = context + + def __enter__(self): + return self.context + + def __exit__(*args, **kwargs): + pass + +# vim:ts=4:sw=4:et diff --git a/ooni/contrib/dateutil/tz/win.py b/ooni/contrib/dateutil/tz/win.py new file mode 100644 index 0000000..7203c78 --- /dev/null +++ b/ooni/contrib/dateutil/tz/win.py @@ -0,0 +1,354 @@ +# This code was originally contributed by Jeffrey Harris. +import datetime +import struct + +from six.moves import winreg +from six import text_type + +try: + import ctypes + from ctypes import wintypes +except ValueError: + # ValueError is raised on non-Windows systems for some horrible reason. + raise ImportError("Running tzwin on non-Windows system") + +from ._common import tzname_in_python2, _tzinfo + +__all__ = ["tzwin", "tzwinlocal", "tzres"] + +ONEWEEK = datetime.timedelta(7) + +TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones" +TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones" +TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation" + + +def _settzkeyname(): + handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) + try: + winreg.OpenKey(handle, TZKEYNAMENT).Close() + TZKEYNAME = TZKEYNAMENT + except WindowsError: + TZKEYNAME = TZKEYNAME9X + handle.Close() + return TZKEYNAME + +TZKEYNAME = _settzkeyname() + + +class tzres(object): + """ + Class for accessing `tzres.dll`, which contains timezone name related + resources. + + ..versionadded:: 2.5.0 + """ + p_wchar = ctypes.POINTER(wintypes.WCHAR) # Pointer to a wide char + + def __init__(self, tzres_loc='tzres.dll'): + # Load the user32 DLL so we can load strings from tzres + user32 = ctypes.WinDLL('user32') + + # Specify the LoadStringW function + user32.LoadStringW.argtypes = (wintypes.HINSTANCE, + wintypes.UINT, + wintypes.LPWSTR, + ctypes.c_int) + + self.LoadStringW = user32.LoadStringW + self._tzres = ctypes.WinDLL(tzres_loc) + self.tzres_loc = tzres_loc + + def load_name(self, offset): + """ + Load a timezone name from a DLL offset (integer). + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.load_name(112)) + 'Eastern Standard Time' + + :param offset: + A positive integer value referring to a string from the tzres dll. + + ..note: + Offsets found in the registry are generally of the form + `(a)tzres.dll,-114`. The offset in this case if 114, not -114. + + """ + resource = self.p_wchar() + lpBuffer = ctypes.cast(ctypes.byref(resource), wintypes.LPWSTR) + nchar = self.LoadStringW(self._tzres._handle, offset, lpBuffer, 0) + return resource[:nchar] + + def name_from_string(self, tzname_str): + """ + Parse strings as returned from the Windows registry into the time zone + name as defined in the registry. + + >>> from dateutil.tzwin import tzres + >>> tzr = tzres() + >>> print(tzr.name_from_string('@tzres.dll,-251')) + 'Dateline Daylight Time' + >>> print(tzr.name_from_string('Eastern Standard Time')) + 'Eastern Standard Time' + + :param tzname_str: + A timezone name string as returned from a Windows registry key. + + :return: + Returns the localized timezone string from tzres.dll if the string + is of the form `(a)tzres.dll,-offset`, else returns the input string. + """ + if not tzname_str.startswith('@'): + return tzname_str + + name_splt = tzname_str.split(',-') + try: + offset = int(name_splt[1]) + except: + raise ValueError("Malformed timezone string.") + + return self.load_name(offset) + + +class tzwinbase(_tzinfo): + """tzinfo class based on win32's timezones available in the registry.""" + def __eq__(self, other): + # Compare on all relevant dimensions, including name. + if not isinstance(other, tzwinbase): + return NotImplemented + + return (self._stdoffset == other._stdoffset and + self._dstoffset == other._dstoffset and + self._stddayofweek == other._stddayofweek and + self._dstdayofweek == other._dstdayofweek and + self._stdweeknumber == other._stdweeknumber and + self._dstweeknumber == other._dstweeknumber and + self._stdhour == other._stdhour and + self._dsthour == other._dsthour and + self._stdminute == other._stdminute and + self._dstminute == other._dstminute and + self._stdname == other._stdname and + self._dstname == other._dstname) + + def __ne__(self, other): + return not (self == other) + + def utcoffset(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return datetime.timedelta(minutes=self._dstoffset) + else: + return datetime.timedelta(minutes=self._stdoffset) + + def dst(self, dt): + isdst = self._isdst(dt) + + if isdst is None: + return None + elif isdst: + return self._dst_base_offset + else: + return datetime.timedelta(0) + + @tzname_in_python2 + def tzname(self, dt): + if self._isdst(dt): + return self._dstname + else: + return self._stdname + + @staticmethod + def list(): + """Return a list of all time zones known to the system.""" + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZKEYNAME) as tzkey: + result = [winreg.EnumKey(tzkey, i) + for i in range(winreg.QueryInfoKey(tzkey)[0])] + return result + + def display(self): + return self._display + + def transitions(self, year): + """ Gets the transition day and times for a given year """ + dston = picknthweekday(year, self._dstmonth, self._dstdayofweek, + self._dsthour, self._dstminute, + self._dstweeknumber) + + dstoff = picknthweekday(year, self._stdmonth, self._stddayofweek, + self._stdhour, self._stdminute, + self._stdweeknumber) + + # Ambiguous dates default to the STD side + dstoff -= self._dst_base_offset + + return dston, dstoff + + def _isdst(self, dt): + if not self._dstmonth: + # dstmonth == 0 signals the zone has no daylight saving time + return False + elif dt is None: + return None + + dston, dstoff = self.transitions(dt.year) + + naive_dt = dt.replace(tzinfo=None) + + # Check to see if we're in an ambiguous time + if self._fold is not None: + dst_base_offset = self._dst_base_offset + if dstoff <= naive_dt < dstoff + dst_base_offset: + return self._fold + + if dston < dstoff: + return dston <= naive_dt < dstoff + else: + return not dstoff <= naive_dt < dston + + @property + def _dst_base_offset(self): + # Get the offset between DST and STD + return datetime.timedelta(minutes=(self._dstoffset - self._stdoffset)) + + +class tzwin(tzwinbase): + + def __init__(self, name): + super(tzwin, self).__init__() + + self._name = name + + # multiple contexts only possible in 2.7 and 3.1, we still support 2.6 + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + tzkeyname = text_type("{kn}\{name}").format(kn=TZKEYNAME, name=name) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + keydict = valuestodict(tzkey) + + self._stdname = keydict["Std"] + self._dstname = keydict["Dlt"] + + self._display = keydict["Display"] + + # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm + tup = struct.unpack("=3l16h", keydict["TZI"]) + self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1 + self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1 + + # for the meaning see the win32 TIME_ZONE_INFORMATION structure docs + # http://msdn.microsoft.com/en-us/library/windows/desktop/ms725481(v=vs.85).a… + (self._stdmonth, + self._stddayofweek, # Sunday = 0 + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[4:9] + + (self._dstmonth, + self._dstdayofweek, # Sunday = 0 + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[12:17] + + def __repr__(self): + return "tzwin(%s)" % repr(self._name) + + def __reduce__(self): + return (self.__class__, (self._name,)) + + +class tzwinlocal(tzwinbase): + def __init__(self): + super(tzwinlocal, self).__init__() + with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle: + with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey: + keydict = valuestodict(tzlocalkey) + + self._stdname = keydict["StandardName"] + self._dstname = keydict["DaylightName"] + + try: + tzkeyname = text_type('{kn}\{sn}').format(kn=TZKEYNAME, + sn=self._stdname) + with winreg.OpenKey(handle, tzkeyname) as tzkey: + _keydict = valuestodict(tzkey) + self._display = _keydict["Display"] + except OSError: + self._display = None + + self._stdoffset = -keydict["Bias"]-keydict["StandardBias"] + self._dstoffset = self._stdoffset-keydict["DaylightBias"] + + # For reasons unclear, in this particular key, the day of week has been + # moved to the END of the SYSTEMTIME structure. + tup = struct.unpack("=8h", keydict["StandardStart"]) + + (self._stdmonth, + self._stdweeknumber, # Last = 5 + self._stdhour, + self._stdminute) = tup[1:5] + + self._stddayofweek = tup[7] + + tup = struct.unpack("=8h", keydict["DaylightStart"]) + + (self._dstmonth, + self._dstweeknumber, # Last = 5 + self._dsthour, + self._dstminute) = tup[1:5] + + self._dstdayofweek = tup[7] + + def __repr__(self): + return "tzwinlocal()" + + def __str__(self): + # str will return the standard name, not the daylight name. + return "tzwinlocal(%s)" % repr(self._stdname) + + def __reduce__(self): + return (self.__class__, ()) + + +def picknthweekday(year, month, dayofweek, hour, minute, whichweek): + """ dayofweek == 0 means Sunday, whichweek 5 means last instance """ + first = datetime.datetime(year, month, 1, hour, minute) + + # This will work if dayofweek is ISO weekday (1-7) or Microsoft-style (0-6), + # Because 7 % 7 = 0 + weekdayone = first.replace(day=((dayofweek - first.isoweekday()) % 7) + 1) + wd = weekdayone + ((whichweek - 1) * ONEWEEK) + if (wd.month != month): + wd -= ONEWEEK + + return wd + + +def valuestodict(key): + """Convert a registry key's values to a dictionary.""" + dout = {} + size = winreg.QueryInfoKey(key)[1] + tz_res = None + + for i in range(size): + key_name, value, dtype = winreg.EnumValue(key, i) + if dtype == winreg.REG_DWORD or dtype == winreg.REG_DWORD_LITTLE_ENDIAN: + # If it's a DWORD (32-bit integer), it's stored as unsigned - convert + # that to a proper signed integer + if value & (1 << 31): + value = value - (1 << 32) + elif dtype == winreg.REG_SZ: + # If it's a reference to the tzres DLL, load the actual string + if value.startswith('@tzres'): + tz_res = tz_res or tzres() + value = tz_res.name_from_string(value) + + value = value.rstrip('\x00') # Remove trailing nulls + + dout[key_name] = value + + return dout [View Less]

1 0

[ooni-probe/master] Create a set of scheduled tasks to be run by the agent in background
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 546d1e3b2d9b1cbf0bebe2b032acc0d8b87e13e4 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 26 21:18:04 2016 +0200 Create a set of scheduled tasks to be run by the agent in background * Remove unneeded filename hash and sprinkle notes on it's future deprecation * Fix some bugs in the resources update procedure --- ooni/agent/scheduler.py | 144 ++++++++++++++++++++- ooni/contrib/__init__.py | 2 +- … [View More]ooni/contrib/croniter.py | 11 ++ ooni/deck.py | 38 +++--- ooni/director.py | 35 +++-- ooni/geoip.py | 12 +- ooni/nettest.py | 25 +--- .../manipulation/http_invalid_request_line.py | 14 +- ooni/resources.py | 71 ++++++---- ooni/settings.py | 2 + ooni/ui/cli.py | 3 +- ooni/ui/web/server.py | 34 +---- 12 files changed, 271 insertions(+), 120 deletions(-) diff --git a/ooni/agent/scheduler.py b/ooni/agent/scheduler.py index 1004597..ace3bc5 100644 --- a/ooni/agent/scheduler.py +++ b/ooni/agent/scheduler.py @@ -1,5 +1,126 @@ +from datetime import datetime + from twisted.application import service -from twisted.internet import task +from twisted.internet import task, defer +from twisted.python.filepath import FilePath + +from ooni import resources +from ooni.utils import log +from ooni.deck import input_store +from ooni.settings import config +from ooni.contrib import croniter + +class ScheduledTask(object): + _time_format = "%Y-%m-%dT%H:%M:%SZ" + schedule = None + identifier = None + + def __init__(self, schedule=None): + if schedule is not None: + self.schedule = schedule + + assert self.identifier is not None, "self.identifier must be set" + assert self.schedule is not None, "self.schedule must be set" + scheduler_directory = config.scheduler_directory + + self._last_run = FilePath(scheduler_directory).child(self.identifier) + self._last_run_lock = defer.DeferredFilesystemLock( + FilePath(scheduler_directory).child(self.identifier + ".lock").path + ) + + @property + def should_run(self): + current_time = datetime.utcnow() + next_cycle = croniter(self.schedule, self.last_run).get_next(datetime) + if next_cycle <= current_time: + return True + return False + + @property + def last_run(self): + if not self._last_run.exists(): + return datetime.fromtimestamp(0) + with self._last_run.open('r') as in_file: + date_str = in_file.read() + return datetime.strptime(date_str, self._time_format) + + def _update_last_run(self): + with self._last_run.open('w') as out_file: + current_time = datetime.utcnow() + out_file.write(current_time.strftime(self._time_format)) + + def task(self): + raise NotImplemented + + @defer.inlineCallbacks + def run(self): + yield self._last_run_lock.deferUntilLocked() + if not self.should_run: + self._last_run_lock.unlock() + defer.returnValue(None) + try: + yield self.task() + self._update_last_run() + except: + raise + finally: + self._last_run_lock.unlock() + +class UpdateInputsAndResources(ScheduledTask): + identifier = "ooni-update-inputs" + schedule = "@daily" + + @defer.inlineCallbacks + def task(self): + log.debug("Updating the inputs") + yield resources.check_for_update(config.probe_ip.geodata['countrycode']) + yield input_store.update(config.probe_ip.geodata['countrycode']) + +class UpdateProbeIp(ScheduledTask): + identifier = "ooni-update-probe-ip" + schedule = "@hourly" + # XXX we need to ensure this is always run the first time ooniprobe or + # ooniprobe-agent is started or implement on disk caching of the users + # IP address. + + def task(self): + log.debug("Updating the probe IP") + return config.probe_ip.lookup() + +class CleanupInProgressReports(ScheduledTask): + identifier = 'ooni-cleanup-reports' + schedule = '@daily' + +class UploadMissingReports(ScheduledTask): + identifier = 'ooni-cleanup-reports' + schedule = '@weekly' + +# Order mattters +SYSTEM_TASKS = [ + UpdateProbeIp, + UpdateInputsAndResources +] + +(a)defer.inlineCallbacks +def run_system_tasks(no_geoip=False, no_input_store=False): + task_classes = SYSTEM_TASKS[:] + + if no_geoip: + log.debug("Not updating probe IP") + task_classes.pop(UpdateProbeIp) + + if no_input_store: + log.debug("Not updating the inputs") + task_classes.pop(UpdateInputsAndResources) + + for task_class in task_classes: + task = task_class() + log.debug("Running task {0}".format(task.identifier)) + try: + yield task.run() + except Exception as exc: + log.err("Failed to run task {0}".format(task.identifier)) + log.exception(exc) class SchedulerService(service.MultiService): """ @@ -10,16 +131,35 @@ class SchedulerService(service.MultiService): self.director = director self.interval = interval self._looping_call = task.LoopingCall(self._should_run) + self._scheduled_tasks = [] + + def schedule(self, task): + self._scheduled_tasks.append(task) + + def _task_failed(self, failure, task): + log.msg("Failed to run {0}".format(task.identifier)) + log.exception(failure) + + def _task_success(self, result, task): + log.msg("Ran {0}".format(task.identifier)) def _should_run(self): """ This function is called every self.interval seconds to check which periodic tasks should be run. """ - pass + for task in self._scheduled_tasks: + log.debug("Running task {0}".format(task.identifier)) + d = task.run() + d.addErrback(self._task_failed, task) + d.addCallback(self._task_success, task) def startService(self): service.MultiService.startService(self) + + self.schedule(UpdateProbeIp()) + self.schedule(UpdateInputsAndResources()) + self._looping_call.start(self.interval) def stopService(self): diff --git a/ooni/contrib/__init__.py b/ooni/contrib/__init__.py index 50b6b54..28aad30 100644 --- a/ooni/contrib/__init__.py +++ b/ooni/contrib/__init__.py @@ -1 +1 @@ -from ._crontab import CronTab +from .croniter import croniter diff --git a/ooni/contrib/croniter.py b/ooni/contrib/croniter.py index 5864603..653dbbf 100644 --- a/ooni/contrib/croniter.py +++ b/ooni/contrib/croniter.py @@ -49,6 +49,15 @@ class croniter(object): {}, ) + ALIASES = { + '@yearly': '0 0 1 1 *', + '@annually': '0 0 1 1 *', + '@monthly': '0 0 1 * *', + '@weekly': '0 0 * * 0', + '@daily': '0 0 * * *', + '@hourly': '0 * * * *', + } + bad_length = 'Exactly 5 or 6 columns has to be specified for iterator' \ 'expression.' @@ -63,6 +72,8 @@ class croniter(object): start_time = self._datetime_to_timestamp(start_time) self.cur = start_time + if expr_format in self.ALIASES: + expr_format = self.ALIASES[expr_format] self.exprs = expr_format.split() if len(self.exprs) != 5 and len(self.exprs) != 6: diff --git a/ooni/deck.py b/ooni/deck.py index 4794d30..0434d81 100644 --- a/ooni/deck.py +++ b/ooni/deck.py @@ -4,7 +4,6 @@ import csv import json from copy import deepcopy -from hashlib import sha256 import yaml @@ -236,7 +235,8 @@ def lookup_collector_and_test_helpers(net_test_loaders, 'name': net_test_loader.testName, 'version': net_test_loader.testVersion, 'test-helpers': [], - 'input-hashes': [x['hash'] for x in net_test_loader.inputFiles] + # XXX deprecate this very soon + 'input-hashes': [] } if not net_test_loader.collector and not no_collector: requires_collector = True @@ -262,15 +262,16 @@ def lookup_collector_and_test_helpers(net_test_loaders, log.err("Could not find any reachable test helpers") raise - def find_collector_and_test_helpers(test_name, test_version, input_files): - input_files = [u""+x['hash'] for x in input_files] + def find_collector_and_test_helpers(test_name, test_version): + # input_files = [u""+x['hash'] for x in input_files] for net_test in provided_net_tests: if net_test['name'] != test_name: continue if net_test['version'] != test_version: continue - if set(net_test['input-hashes']) != set(input_files): - continue + # XXX remove the notion of policies based on input file hashes + # if set(net_test['input-hashes']) != set(input_files): + # continue return net_test['collector'], net_test['test-helpers'] for net_test_loader in net_test_loaders: @@ -280,8 +281,8 @@ def lookup_collector_and_test_helpers(net_test_loaders, collector, test_helpers = \ find_collector_and_test_helpers( test_name=net_test_loader.testName, - test_version=net_test_loader.testVersion, - input_files=net_test_loader.inputFiles + test_version=net_test_loader.testVersion + # input_files=net_test_loader.inputFiles ) for option, name in net_test_loader.missingTestHelpers: @@ -455,6 +456,7 @@ class InputStore(object): "id": "citizenlab_{0}_urls".format(cc), "type": "file/url" }, out_fh) + self._cache_stale = True @defer.inlineCallbacks def create(self, country_code=None): @@ -523,13 +525,11 @@ def resolve_file_path(v, prepath=None): return FilePath(prepath).preauthChild(v).path return v -def options_to_args(options, prepath=None): +def options_to_args(options): args = [] for k, v in options.items(): if v is None: continue - if k == "file": - v = resolve_file_path(v, prepath) if v == False or v == 0: continue if (len(k)) == 1: @@ -625,7 +625,7 @@ class DeckTask(object): collector_address = None net_test_loader = NetTestLoader( - options_to_args(task_data, self.cwd), + options_to_args(task_data), annotations=annotations, test_file=nettest_path ) @@ -653,6 +653,9 @@ class DeckTask(object): self.ooni['net_test_loader'] = net_test_loader def _setup_ooni(self): + for input_file in self.ooni['net_test_loader'].inputFiles: + file_path = resolve_file_path(input_file['filename'], self.cwd) + input_file['test_options'][input_file['key']] = file_path self.ooni['test_details'] = self.ooni['net_test_loader'].getTestDetails() self.id = generate_filename(self.ooni['test_details']) @@ -670,15 +673,8 @@ class DeckTask(object): if task_type not in self._supported_tasks: raise UnknownTaskKey(task_type) self.type = task_type - try: - getattr(self, "_load_"+task_type)(task_data) - except InputNotFound: - log.debug( - "Will skip running this test because I can't find the input" - ) - self._skip = True - - assert len(data) == 0 + getattr(self, "_load_"+task_type)(task_data) + assert len(data) == 0, "Got an unidentified key" class NotAnOption(Exception): pass diff --git a/ooni/director.py b/ooni/director.py index 84bc9aa..1ab076b 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -13,6 +13,7 @@ from ooni.settings import config from ooni.nettest import normalizeTestName from ooni.deck import InputStore +from ooni.agent.scheduler import run_system_tasks from ooni.utils.onion import start_tor, connect_to_control_port class DirectorEvent(object): @@ -139,12 +140,15 @@ class Director(object): self._tor_starting.addCallback(self._tor_startup_success) def _tor_startup_failure(self, failure): + log.msg("Failed to start tor") + log.exception(failure) self._reset_tor_state() self.notify(DirectorEvent("error", "Failed to start Tor")) return failure def _tor_startup_success(self, result): + log.msg("Tor has started") self._tor_state = 'running' self.notify(DirectorEvent("success", "Successfully started Tor")) @@ -187,22 +191,21 @@ class Director(object): if start_tor: yield self.start_tor(check_incoherences) - if config.global_options.get('no-geoip'): + no_geoip = config.global_options.get('no-geoip', False) + if no_geoip: aux = [False] if config.global_options.get('annotations') is not None: annotations = [k.lower() for k in config.global_options['annotations'].keys()] aux = map(lambda x: x in annotations, ["city", "country", "asn"]) if not all(aux): log.msg("You should add annotations for the country, city and ASN") - else: - yield config.probe_ip.lookup() - self.notify(DirectorEvent("success", - "Looked up Probe IP")) - if create_input_store: - yield self.input_store.create(config.probe_ip.geodata["countrycode"]) - self.notify(DirectorEvent("success", - "Created input store")) + self.notify(DirectorEvent("success", + "Running system tasks")) + yield run_system_tasks(no_geoip=no_geoip, + no_input_store=not create_input_store) + self.notify(DirectorEvent("success", + "Ran system tasks")) @defer.inlineCallbacks def start(self, start_tor=False, check_incoherences=True, @@ -284,7 +287,8 @@ class Director(object): def netTestDone(self, net_test): self.notify(DirectorEvent("success", - "Successfully ran net_test")) + "Successfully ran test {0}".format( + net_test.testDetails['test_name']))) self.activeNetTests.remove(net_test) if len(self.activeNetTests) == 0: self.allTestsDone.callback(None) @@ -371,13 +375,18 @@ class Director(object): log.debug("Tor is already running") defer.returnValue(self._tor_state) elif self._tor_state == 'starting': + log.debug("Tor is starting") yield self._tor_starting defer.returnValue(self._tor_state) log.msg("Starting Tor...") self._tor_state = 'starting' if check_incoherences: - yield config.check_tor() + try: + yield config.check_tor() + except Exception as exc: + self._tor_starting.errback(Failure(exc)) + raise exc if config.advanced.start_tor and config.tor_state is None: tor_config = TorConfig() @@ -438,3 +447,7 @@ class Director(object): self._tor_starting.callback(self._tor_state) except Exception as exc: self._tor_starting.errback(Failure(exc)) + else: + # This happens when we require tor to not be started and the + # socks port is set. + self._tor_starting.callback(self._tor_state) diff --git a/ooni/geoip.py b/ooni/geoip.py index f118268..2a7ec92 100644 --- a/ooni/geoip.py +++ b/ooni/geoip.py @@ -31,8 +31,12 @@ class GeoIPDataFilesNotFound(Exception): def IPToLocation(ipaddr): from ooni.settings import config - country_file = config.get_data_file_path('GeoIP/GeoIP.dat') - asn_file = config.get_data_file_path('GeoIP/GeoIPASNum.dat') + country_file = config.get_data_file_path( + 'resources/maxmind-geoip/GeoIP.dat' + ) + asn_file = config.get_data_file_path( + 'resources/maxmind-geoip/GeoIPASNum.dat' + ) location = {'city': None, 'countrycode': 'ZZ', 'asn': 'AS0'} if not asn_file or not country_file: @@ -69,7 +73,9 @@ def database_version(): } for key in version.keys(): - geoip_file = config.get_data_file_path("GeoIP/" + key + ".dat") + geoip_file = config.get_data_file_path( + "resources/maxmind-geoip/" + key + ".dat" + ) if not geoip_file or not os.path.isfile(geoip_file): continue timestamp = os.stat(geoip_file).st_mtime diff --git a/ooni/nettest.py b/ooni/nettest.py index 2a33a2f..88e4953 100644 --- a/ooni/nettest.py +++ b/ooni/nettest.py @@ -2,7 +2,6 @@ import os import re import time import sys -from hashlib import sha256 from twisted.internet import defer from twisted.trial.runner import filenameToModule @@ -199,6 +198,7 @@ class NetTestLoader(object): 'probe_city': config.probe_ip.geodata['city'], 'software_name': 'ooniprobe', 'software_version': ooniprobe_version, + # XXX only sanitize the input files 'options': sanitize_options(self.options), 'annotations': self.annotations, 'data_format_version': '0.2.0', @@ -206,8 +206,8 @@ class NetTestLoader(object): 'test_version': self.testVersion, 'test_helpers': self.testHelpers, 'test_start_time': otime.timestampNowLongUTC(), - 'input_hashes': [input_file['hash'] - for input_file in self.inputFiles], + # XXX We should deprecate this key very soon + 'input_hashes': [], 'report_id': self.reportId } @@ -235,29 +235,14 @@ class NetTestLoader(object): input_file = { 'key': key, 'test_options': self.localOptions, - 'hash': None, - - 'url': None, - 'address': None, - 'filename': None } m = ONION_INPUT_REGEXP.match(filename) if m: - input_file['url'] = filename - input_file['address'] = m.group(1) - input_file['hash'] = m.group(2) + raise e.InvalidInputFile("Input files hosted on hidden services " + "are not longer supported") else: input_file['filename'] = filename - try: - with open(filename) as f: - h = sha256() - for l in f: - h.update(l) - except Exception as exc: - log.exception(exc) - raise e.InvalidInputFile(filename) - input_file['hash'] = h.hexdigest() self.inputFiles.append(input_file) def _accumulateTestOptions(self, test_class): diff --git a/ooni/nettests/manipulation/http_invalid_request_line.py b/ooni/nettests/manipulation/http_invalid_request_line.py index 94b0b99..be0497c 100644 --- a/ooni/nettests/manipulation/http_invalid_request_line.py +++ b/ooni/nettests/manipulation/http_invalid_request_line.py @@ -42,14 +42,14 @@ class HTTPInvalidRequestLine(tcpt.TCPTest): self.address = self.localOptions['backend'] self.report['tampering'] = None - def check_for_manipulation(self, response, payload): + def check_for_manipulation(self, response, payload, manipulation_type): log.debug("Checking if %s == %s" % (response, payload)) if response != payload: - log.msg("Detected manipulation!") + log.msg("{0}: Detected manipulation!".format(manipulation_type)) log.msg(response) self.report['tampering'] = True else: - log.msg("No manipulation detected.") + log.msg("{0}: No manipulation detected.".format(manipulation_type)) self.report['tampering'] = False def test_random_invalid_method(self): @@ -75,7 +75,7 @@ class HTTPInvalidRequestLine(tcpt.TCPTest): payload = randomSTR(4) + " / HTTP/1.1\n\r" d = self.sendPayload(payload) - d.addCallback(self.check_for_manipulation, payload) + d.addCallback(self.check_for_manipulation, payload, 'random_invalid_method') return d def test_random_invalid_field_count(self): @@ -91,7 +91,7 @@ class HTTPInvalidRequestLine(tcpt.TCPTest): payload += "\n\r" d = self.sendPayload(payload) - d.addCallback(self.check_for_manipulation, payload) + d.addCallback(self.check_for_manipulation, payload, 'random_invalid_field_count') return d def test_random_big_request_method(self): @@ -103,7 +103,7 @@ class HTTPInvalidRequestLine(tcpt.TCPTest): payload = randomStr(1024) + ' / HTTP/1.1\n\r' d = self.sendPayload(payload) - d.addCallback(self.check_for_manipulation, payload) + d.addCallback(self.check_for_manipulation, payload, 'random_big_request_method') return d def test_random_invalid_version_number(self): @@ -116,5 +116,5 @@ class HTTPInvalidRequestLine(tcpt.TCPTest): payload += '\n\r' d = self.sendPayload(payload) - d.addCallback(self.check_for_manipulation, payload) + d.addCallback(self.check_for_manipulation, payload, 'random_invalid_version_number') return d diff --git a/ooni/resources.py b/ooni/resources.py index d49e679..d67908c 100644 --- a/ooni/resources.py +++ b/ooni/resources.py @@ -1,5 +1,9 @@ +import os +import gzip import json +import shutil +from twisted.python.runtime import platform from twisted.python.filepath import FilePath from twisted.internet import defer from twisted.web.client import downloadPage, getPage @@ -66,11 +70,27 @@ def get_out_of_date_resources(current_manifest, new_manifest, # the manifest claims we have a more up to date version. # This happens if an update by country_code happened and a new # country code is now required. + if filename.endswith(".gz"): + filename = filename[:-3] if not _resources.child(pre_path).child(filename).exists(): paths_to_update.append(info) return paths_to_update, paths_to_delete +def gunzip(file_path): + tmp_location = FilePath(file_path).temporarySibling() + in_file = gzip.open(file_path) + with tmp_location.open('w') as out_file: + shutil.copyfileobj(in_file, out_file) + in_file.close() + rename(tmp_location.path, file_path) + +def rename(src, dst): + # Best effort atomic renaming + if platform.isWindows() and os.path.exists(dst): + os.unlink(dst) + os.rename(src, dst) + @defer.inlineCallbacks def check_for_update(country_code=None): """ @@ -88,44 +108,48 @@ def check_for_update(country_code=None): current_version = get_current_version() latest_version = yield get_latest_version() - # We are already at the latest version - if current_version == latest_version: - defer.returnValue(latest_version) - resources_dir = FilePath(config.resources_directory) resources_dir.makedirs(ignoreExistingDirectory=True) current_manifest = resources_dir.child("manifest.json") - new_manifest = current_manifest.temporarySibling() - new_manifest.alwaysCreate = 0 - - temporary_files.append((current_manifest, new_manifest)) - - try: - yield downloadPage( - get_download_url(latest_version, "manifest.json"), - new_manifest.path - ) - except: - cleanup() - raise UpdateFailure("Failed to download manifest") - - new_manifest_data = json.loads(new_manifest.getContent()) - if current_manifest.exists(): with current_manifest.open("r") as f: - current_manifest_data = json.loads(f) + current_manifest_data = json.load(f) else: current_manifest_data = { "resources": [] } + # We should download a newer manifest + if current_version < latest_version: + new_manifest = current_manifest.temporarySibling() + new_manifest.alwaysCreate = 0 + + temporary_files.append((current_manifest, new_manifest)) + + try: + yield downloadPage( + get_download_url(latest_version, "manifest.json"), + new_manifest.path + ) + except: + cleanup() + raise UpdateFailure("Failed to download manifest") + + new_manifest_data = json.loads(new_manifest.getContent()) + else: + new_manifest_data = current_manifest_data + to_update, to_delete = get_out_of_date_resources( current_manifest_data, new_manifest_data, country_code) try: for resource in to_update: + gzipped = False pre_path, filename = resource["path"].split("/") + if filename.endswith(".gz"): + filename = filename[:-3] + gzipped = True dst_file = resources_dir.child(pre_path).child(filename) dst_file.parent().makedirs(ignoreExistingDirectory=True) src_file = dst_file.temporarySibling() @@ -135,8 +159,9 @@ def check_for_update(country_code=None): # The paths for the download require replacing "/" with "." download_url = get_download_url(latest_version, resource["path"].replace("/", ".")) - print("Downloading {0}".format(download_url)) yield downloadPage(download_url, src_file.path) + if gzipped: + gunzip(src_file.path) except Exception as exc: cleanup() log.exception(exc) @@ -145,7 +170,7 @@ def check_for_update(country_code=None): for dst_file, src_file in temporary_files: log.msg("Moving {0} to {1}".format(src_file.path, dst_file.path)) - src_file.moveTo(dst_file) + rename(src_file.path, dst_file.path) for resource in to_delete: log.msg("Deleting old resources") diff --git a/ooni/settings.py b/ooni/settings.py index b73e2f2..632dbe4 100644 --- a/ooni/settings.py +++ b/ooni/settings.py @@ -101,6 +101,8 @@ class OConfig(object): else: self.inputs_directory = os.path.join(self.ooni_home, 'inputs') + self.scheduler_directory = os.path.join(self.ooni_home, 'scheduler') + if self.advanced.decks_dir: self.decks_directory = self.advanced.decks_dir else: diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index 57924ec..e8d747c 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -305,7 +305,8 @@ def createDeck(global_options, url=None): return deck -def runTestWithDirector(director, global_options, url=None, start_tor=True, +def runTestWithDirector(director, global_options, url=None, + start_tor=True, create_input_store=True): deck = createDeck(global_options, url=url) diff --git a/ooni/ui/web/server.py b/ooni/ui/web/server.py index f9886ac..0a3d1ca 100644 --- a/ooni/ui/web/server.py +++ b/ooni/ui/web/server.py @@ -84,10 +84,7 @@ class WebUIAPI(object): "software_name": "ooniprobe", "asn": config.probe_ip.geodata['asn'], "country_code": config.probe_ip.geodata['countrycode'], - "active_measurements": {}, - "completed_measurements": [], - "director_started": False, - "failures": [] + "director_started": False } self.status_poller = LongPoller( @@ -103,33 +100,17 @@ class WebUIAPI(object): d = self.director.start() d.addCallback(self.director_started) - d.addErrback(self.director_startup_failed) d.addBoth(lambda _: self.status_poller.notify()) def handle_director_event(self, event): log.msg("Handling event {0}".format(event.type)) self.director_event_poller.notify(event) - def add_failure(self, failure): - self.status['failures'].append(str(failure)) - def director_started(self, _): self.status['director_started'] = True self.status["asn"] = config.probe_ip.geodata['asn'] self.status["country_code"] = config.probe_ip.geodata['countrycode'] - def director_startup_failed(self, failure): - self.add_failure(failure) - - def completed_measurement(self, measurement_id): - del self.status['active_measurements'][measurement_id] - self.status['completed_measurements'].append(measurement_id) - - def failed_measurement(self, measurement_id, failure): - log.exception(failure) - del self.status['active_measurements'][measurement_id] - self.add_failure(str(failure)) - @app.handle_errors(NotFound) def not_found(self, request, _): request.redirect('/client/') @@ -188,18 +169,9 @@ class WebUIAPI(object): return self.render_json({"command": "deck-list"}, request) def run_deck(self, deck): - for task_id in deck.task_ids: - self.status['active_measurements'][task_id] = { - 'test_name': 'foobar', - 'test_start_time': 'some start time' - } - self.status_poller.notify() deck.setup() - d = deck.run(self.director) - d.addCallback(lambda _: - self.completed_measurement(task_id)) - d.addErrback(lambda failure: - self.failed_measurement(task_id, failure)) + # Here there is a dangling deferred + deck.run(self.director) @app.route('/api/nettest/<string:test_name>/start', methods=["POST"]) def api_nettest_start(self, request, test_name): [View Less]

1 0

[ooni-probe/master] Move gunzip and rename to utils
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 72c123125ac19d0a62f7eb5d3864efd01525d48a Author: Arturo Filastò <arturo(a)filasto.net> Date: Wed Jul 27 00:44:04 2016 +0200 Move gunzip and rename to utils --- ooni/agent/scheduler.py | 4 ++-- ooni/resources.py | 10 +--------- ooni/ui/cli.py | 1 - ooni/utils/__init__.py | 33 ++++++++++++++++++++------------- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/ooni/agent/scheduler.py b/ooni/agent/scheduler.py index ace3bc5..3a9da20 100644 --… [View More]- a/ooni/agent/scheduler.py +++ b/ooni/agent/scheduler.py @@ -137,11 +137,11 @@ class SchedulerService(service.MultiService): self._scheduled_tasks.append(task) def _task_failed(self, failure, task): - log.msg("Failed to run {0}".format(task.identifier)) + log.debug("Failed to run {0}".format(task.identifier)) log.exception(failure) def _task_success(self, result, task): - log.msg("Ran {0}".format(task.identifier)) + log.debug("Ran {0}".format(task.identifier)) def _should_run(self): """ diff --git a/ooni/resources.py b/ooni/resources.py index d67908c..8a0a57f 100644 --- a/ooni/resources.py +++ b/ooni/resources.py @@ -1,14 +1,12 @@ -import os import gzip import json import shutil -from twisted.python.runtime import platform from twisted.python.filepath import FilePath from twisted.internet import defer from twisted.web.client import downloadPage, getPage -from ooni.utils import log +from ooni.utils import log, gunzip, rename from ooni.settings import config class UpdateFailure(Exception): @@ -85,12 +83,6 @@ def gunzip(file_path): in_file.close() rename(tmp_location.path, file_path) -def rename(src, dst): - # Best effort atomic renaming - if platform.isWindows() and os.path.exists(dst): - os.unlink(dst) - os.rename(src, dst) - @defer.inlineCallbacks def check_for_update(country_code=None): """ diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index e8d747c..d8a4a8f 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -2,7 +2,6 @@ import sys import os import json -import yaml import random import textwrap import urlparse diff --git a/ooni/utils/__init__.py b/ooni/utils/__init__.py index d202d97..f8ee953 100644 --- a/ooni/utils/__init__.py +++ b/ooni/utils/__init__.py @@ -1,12 +1,15 @@ import shutil import string import random +import gzip import os -from datetime import datetime -import gzip +from datetime import datetime from zipfile import ZipFile +from twisted.python.filepath import FilePath +from twisted.python.runtime import platform + from ooni import errors @@ -128,6 +131,11 @@ def sanitize_options(options): sanitized_options.append(option) return sanitized_options +def rename(src, dst): + # Best effort atomic renaming + if platform.isWindows() and os.path.exists(dst): + os.unlink(dst) + os.rename(src, dst) def unzip(filename, dst): assert filename.endswith('.zip') @@ -141,17 +149,16 @@ def unzip(filename, dst): return dst_path -def gunzip(filename, dst): - assert filename.endswith(".gz") - dst_path = os.path.join( - dst, - os.path.basename(filename).replace(".gz", "") - ) - with open(dst_path, "w+") as fw: - gzip_file = gzip.open(filename) - shutil.copyfileobj(gzip_file, fw) - gzip_file.close() - +def gunzip(file_path): + """ + gunzip a file in place. + """ + tmp_location = FilePath(file_path).temporarySibling() + in_file = gzip.open(file_path) + with tmp_location.open('w') as out_file: + shutil.copyfileobj(in_file, out_file) + in_file.close() + rename(tmp_location.path, file_path) def get_ooni_root(): script = os.path.join(__file__, '..') [View Less]

1 0

[ooni-probe/master] Move probe_ip singleton to the geoip module.
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 10a802e72c955d450a6a1b56fcb71b4e60d46934 Author: Arturo Filastò <arturo(a)filasto.net> Date: Wed Jul 27 14:24:51 2016 +0200 Move probe_ip singleton to the geoip module. Make it cache the probe_ip for 10 minutes and support waiting if another probe_ip lookup is in progress already. Put a probe ip lookup also inside of the deck --- ooni/agent/scheduler.py | 25 +++++----------------- ooni/deck.py | 8 +++++… [View More]+-- ooni/director.py | 7 +++++-- ooni/geoip.py | 33 ++++++++++++++++++++++++++++-- ooni/nettest.py | 9 ++++---- ooni/nettests/blocking/web_connectivity.py | 8 ++++---- ooni/resources.py | 5 ++++- ooni/scripts/oonideckgen.py | 3 +-- ooni/settings.py | 3 +-- ooni/templates/httpt.py | 5 +++-- ooni/templates/process.py | 7 ++++--- ooni/tests/test_geoip.py | 2 +- ooni/ui/cli.py | 2 +- ooni/ui/web/client/index.html | 2 +- ooni/ui/web/server.py | 25 ++++++++++++++-------- 15 files changed, 89 insertions(+), 55 deletions(-) diff --git a/ooni/agent/scheduler.py b/ooni/agent/scheduler.py index 3a9da20..9784f8a 100644 --- a/ooni/agent/scheduler.py +++ b/ooni/agent/scheduler.py @@ -9,6 +9,7 @@ from ooni.utils import log from ooni.deck import input_store from ooni.settings import config from ooni.contrib import croniter +from ooni.geoip import probe_ip class ScheduledTask(object): _time_format = "%Y-%m-%dT%H:%M:%SZ" @@ -73,19 +74,9 @@ class UpdateInputsAndResources(ScheduledTask): @defer.inlineCallbacks def task(self): log.debug("Updating the inputs") - yield resources.check_for_update(config.probe_ip.geodata['countrycode']) - yield input_store.update(config.probe_ip.geodata['countrycode']) - -class UpdateProbeIp(ScheduledTask): - identifier = "ooni-update-probe-ip" - schedule = "@hourly" - # XXX we need to ensure this is always run the first time ooniprobe or - # ooniprobe-agent is started or implement on disk caching of the users - # IP address. - - def task(self): - log.debug("Updating the probe IP") - return config.probe_ip.lookup() + yield probe_ip.lookup() + yield resources.check_for_update(probe_ip.geodata['countrycode']) + yield input_store.update(probe_ip.geodata['countrycode']) class CleanupInProgressReports(ScheduledTask): identifier = 'ooni-cleanup-reports' @@ -97,18 +88,13 @@ class UploadMissingReports(ScheduledTask): # Order mattters SYSTEM_TASKS = [ - UpdateProbeIp, UpdateInputsAndResources ] @defer.inlineCallbacks -def run_system_tasks(no_geoip=False, no_input_store=False): +def run_system_tasks(no_input_store=False): task_classes = SYSTEM_TASKS[:] - if no_geoip: - log.debug("Not updating probe IP") - task_classes.pop(UpdateProbeIp) - if no_input_store: log.debug("Not updating the inputs") task_classes.pop(UpdateInputsAndResources) @@ -157,7 +143,6 @@ class SchedulerService(service.MultiService): def startService(self): service.MultiService.startService(self) - self.schedule(UpdateProbeIp()) self.schedule(UpdateInputsAndResources()) self._looping_call.start(self.interval) diff --git a/ooni/deck.py b/ooni/deck.py index 0434d81..7976e5e 100644 --- a/ooni/deck.py +++ b/ooni/deck.py @@ -20,6 +20,7 @@ from ooni.resources import check_for_update from ooni.settings import config from ooni.utils import generate_filename from ooni.utils import log +from ooni.geoip import probe_ip from ooni.results import generate_summary @@ -652,7 +653,9 @@ class DeckTask(object): self.ooni['net_test_loader'] = net_test_loader + @defer.inlineCallbacks def _setup_ooni(self): + yield probe_ip.lookup() for input_file in self.ooni['net_test_loader'].inputFiles: file_path = resolve_file_path(input_file['filename'], self.cwd) input_file['test_options'][input_file['key']] = file_path @@ -660,7 +663,7 @@ class DeckTask(object): self.id = generate_filename(self.ooni['test_details']) def setup(self): - getattr(self, "_setup_"+self.type)() + return getattr(self, "_setup_"+self.type)() def _load(self, data): for key in self._metadata_keys: @@ -918,12 +921,13 @@ class NGDeck(object): d.addErrback(self._measurement_failed, task) return d + @defer.inlineCallbacks def setup(self): """ This method needs to be called before you are able to run a deck. """ for task in self._tasks: - task.setup() + yield task.setup() self._is_setup = True @defer.inlineCallbacks diff --git a/ooni/director.py b/ooni/director.py index 1ab076b..ad6d1e2 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -12,6 +12,7 @@ from ooni.nettest import NetTest, getNetTestInformation from ooni.settings import config from ooni.nettest import normalizeTestName from ooni.deck import InputStore +from ooni.geoip import probe_ip from ooni.agent.scheduler import run_system_tasks from ooni.utils.onion import start_tor, connect_to_control_port @@ -199,11 +200,13 @@ class Director(object): aux = map(lambda x: x in annotations, ["city", "country", "asn"]) if not all(aux): log.msg("You should add annotations for the country, city and ASN") + else: + yield probe_ip.lookup() + self.notify(DirectorEvent("success", "Looked up probe IP")) self.notify(DirectorEvent("success", "Running system tasks")) - yield run_system_tasks(no_geoip=no_geoip, - no_input_store=not create_input_store) + yield run_system_tasks(no_input_store=not create_input_store) self.notify(DirectorEvent("success", "Ran system tasks")) diff --git a/ooni/geoip.py b/ooni/geoip.py index 2a7ec92..f271790 100644 --- a/ooni/geoip.py +++ b/ooni/geoip.py @@ -2,6 +2,7 @@ from __future__ import absolute_import import re import os import json +import time import random from hashlib import sha256 @@ -28,7 +29,7 @@ except ImportError: class GeoIPDataFilesNotFound(Exception): pass -def IPToLocation(ipaddr): +def ip_to_location(ipaddr): from ooni.settings import config country_file = config.get_data_file_path( @@ -152,9 +153,14 @@ class DuckDuckGoGeoIP(HTTPGeoIPLookupper): probe_ip = re.search(regexp, j['Answer']).group(1) return probe_ip +INITIAL = 0 +IN_PROGRESS = 1 + class ProbeIP(object): strategy = None address = None + # How long should we consider geoip results valid? + _expire_in = 10*60 def __init__(self): self.geoIPServices = { @@ -168,10 +174,23 @@ class ProbeIP(object): 'ip': '127.0.0.1' } + self._last_lookup = 0 + self._reset_state() + + def _reset_state(self): + self._state = INITIAL + self._looking_up = defer.Deferred() + self._looking_up.addCallback(self._looked_up) + + def _looked_up(self, result): + self._last_lookup = time.time() + self._reset_state() + return result + def resolveGeodata(self): from ooni.settings import config - self.geodata = IPToLocation(self.address) + self.geodata = ip_to_location(self.address) self.geodata['ip'] = self.address if not config.privacy.includeasn: self.geodata['asn'] = 'AS0' @@ -182,13 +201,20 @@ class ProbeIP(object): @defer.inlineCallbacks def lookup(self): + if self._state == IN_PROGRESS: + yield self._looking_up + elif self._last_lookup < time.time() - self._expire_in: + self.address = None + if self.address: defer.returnValue(self.address) else: + self._state = IN_PROGRESS try: yield self.askTor() log.msg("Found your IP via Tor") self.resolveGeodata() + self._looking_up.callback(self.address) defer.returnValue(self.address) except errors.TorStateNotFound: log.debug("Tor is not running. Skipping IP lookup via Tor.") @@ -199,6 +225,7 @@ class ProbeIP(object): yield self.askGeoIPService() log.msg("Found your IP via a GeoIP service") self.resolveGeodata() + self._looking_up.callback(self.address) defer.returnValue(self.address) except Exception: log.msg("Unable to lookup the probe IP via GeoIPService") @@ -241,3 +268,5 @@ class ProbeIP(object): return d else: raise errors.TorStateNotFound + +probe_ip = ProbeIP() diff --git a/ooni/nettest.py b/ooni/nettest.py index 88e4953..566c391 100644 --- a/ooni/nettest.py +++ b/ooni/nettest.py @@ -13,6 +13,7 @@ from ooni.tasks import Measurement from ooni.utils import log, sanitize_options, randomStr from ooni.utils.net import hasRawSocketPermission from ooni.settings import config +from ooni.geoip import probe_ip from ooni import errors as e @@ -192,10 +193,10 @@ class NetTestLoader(object): def getTestDetails(self): return { - 'probe_asn': config.probe_ip.geodata['asn'], - 'probe_cc': config.probe_ip.geodata['countrycode'], - 'probe_ip': config.probe_ip.geodata['ip'], - 'probe_city': config.probe_ip.geodata['city'], + 'probe_asn': probe_ip.geodata['asn'], + 'probe_cc': probe_ip.geodata['countrycode'], + 'probe_ip': probe_ip.geodata['ip'], + 'probe_city': probe_ip.geodata['city'], 'software_name': 'ooniprobe', 'software_version': ooniprobe_version, # XXX only sanitize the input files diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py index dde6b6f..d8d539f 100644 --- a/ooni/nettests/blocking/web_connectivity.py +++ b/ooni/nettests/blocking/web_connectivity.py @@ -350,10 +350,10 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): if len(control_addrs.intersection(experiment_addrs)) > 0: return True - experiment_asns = set(map(lambda x: geoip.IPToLocation(x)['asn'], - experiment_addrs)) - control_asns = set(map(lambda x: geoip.IPToLocation(x)['asn'], - control_addrs)) + experiment_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'], + experiment_addrs)) + control_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'], + control_addrs)) # Remove the instance of AS0 when we fail to find the ASN control_asns.discard('AS0') diff --git a/ooni/resources.py b/ooni/resources.py index 8a0a57f..47ebf86 100644 --- a/ooni/resources.py +++ b/ooni/resources.py @@ -4,7 +4,10 @@ import shutil from twisted.python.filepath import FilePath from twisted.internet import defer -from twisted.web.client import downloadPage, getPage +from twisted.web.client import downloadPage, getPage, HTTPClientFactory + +# Disable logs of HTTPClientFactory +HTTPClientFactory.noisy = False from ooni.utils import log, gunzip, rename from ooni.settings import config diff --git a/ooni/scripts/oonideckgen.py b/ooni/scripts/oonideckgen.py index 10f8673..9b087f9 100644 --- a/ooni/scripts/oonideckgen.py +++ b/ooni/scripts/oonideckgen.py @@ -10,7 +10,7 @@ from twisted.python import usage from ooni.otime import prettyDateNowUTC from ooni import errors -from ooni.geoip import ProbeIP +from ooni.geoip import probe_ip from ooni.resources import check_for_update from ooni.settings import config from ooni.deck import NGDeck @@ -86,7 +86,6 @@ def generate_deck(options): @defer.inlineCallbacks def get_user_country_code(): config.privacy.includecountry = True - probe_ip = ProbeIP() yield probe_ip.lookup() defer.returnValue(probe_ip.geodata['countrycode']) diff --git a/ooni/settings.py b/ooni/settings.py index 36670ac..acb1895 100644 --- a/ooni/settings.py +++ b/ooni/settings.py @@ -23,8 +23,7 @@ class OConfig(object): self.reports = Storage() self.scapyFactory = None self.tor_state = None - # This is used to store the probes IP address obtained via Tor - self.probe_ip = geoip.ProbeIP() + self.logging = True self.basic = Storage() self.advanced = Storage() diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py index 857392e..cba8702 100644 --- a/ooni/templates/httpt.py +++ b/ooni/templates/httpt.py @@ -19,6 +19,7 @@ from ooni.common.txextra import TrueHeaders from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent from ooni.common.http_utils import representBody from ooni.errors import handleAllFailures +from ooni.geoip import probe_ip class InvalidSocksProxyOption(Exception): pass @@ -159,9 +160,9 @@ class HTTPTest(NetTestCase): else: response_body = '' # Attempt to redact the IP address of the probe from the responses - if (config.privacy.includeip is False and config.probe_ip.address is not None and + if (config.privacy.includeip is False and probe_ip.address is not None and (isinstance(response_body, str) or isinstance(response_body, unicode))): - response_body = response_body.replace(config.probe_ip.address, "[REDACTED]") + response_body = response_body.replace(probe_ip.address, "[REDACTED]") if (getattr(response, 'request', None) and getattr(response.request, 'absoluteURI', None)): session['request']['url'] = response.request.absoluteURI diff --git a/ooni/templates/process.py b/ooni/templates/process.py index faf0a66..56fe0fd 100644 --- a/ooni/templates/process.py +++ b/ooni/templates/process.py @@ -3,6 +3,7 @@ from twisted.internet import protocol, defer, reactor from ooni.settings import config from ooni.nettest import NetTestCase from ooni.utils import log +from ooni.geoip import probe_ip class ProcessDirector(protocol.ProcessProtocol): @@ -108,9 +109,9 @@ class ProcessTest(NetTestCase): self.report['commands'] = [] # Attempt to redact the IP address of the probe from the standard output - if config.privacy.includeip is False and config.probe_ip.address is not None: - result['stdout'] = result['stdout'].replace(config.probe_ip.address, "[REDACTED]") - result['stderr'] = result['stderr'].replace(config.probe_ip.address, "[REDACTED]") + if config.privacy.includeip is False and probe_ip.address is not None: + result['stdout'] = result['stdout'].replace(probe_ip.address, "[REDACTED]") + result['stderr'] = result['stderr'].replace(probe_ip.address, "[REDACTED]") self.report['commands'].append({ 'command_name': ' '.join(command), diff --git a/ooni/tests/test_geoip.py b/ooni/tests/test_geoip.py index 66ba13e..8eb964d 100644 --- a/ooni/tests/test_geoip.py +++ b/ooni/tests/test_geoip.py @@ -7,7 +7,7 @@ from ooni import geoip class TestGeoIP(bases.ConfigTestCase): def test_ip_to_location(self): - location = geoip.IPToLocation('8.8.8.8') + location = geoip.ip_to_location('8.8.8.8') assert 'countrycode' in location assert 'asn' in location assert 'city' in location diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index d8a4a8f..51ed3b9 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -313,7 +313,7 @@ def runTestWithDirector(director, global_options, url=None, @defer.inlineCallbacks def post_director_start(_): try: - deck.setup() + yield deck.setup() yield deck.run(director) except errors.UnableToLoadDeckInput as error: raise defer.failure.Failure(error) diff --git a/ooni/ui/web/client/index.html b/ooni/ui/web/client/index.html index 7859216..c6d83b7 100644 --- a/ooni/ui/web/client/index.html +++ b/ooni/ui/web/client/index.html @@ -13,5 +13,5 @@ <app> Loading... </app> - <script type="text/javascript" src="app.bundle.js?de2d27ce59f4cee8dd96"></script></body> + <script type="text/javascript" src="app.bundle.js?16bac0b4c21c5b120b04"></script></body> </html> diff --git a/ooni/ui/web/server.py b/ooni/ui/web/server.py index 0a3d1ca..f14f6b8 100644 --- a/ooni/ui/web/server.py +++ b/ooni/ui/web/server.py @@ -18,6 +18,7 @@ from ooni.settings import config from ooni.utils import log from ooni.director import DirectorEvent from ooni.results import generate_summary +from ooni.geoip import probe_ip config.advanced.debug = True @@ -82,8 +83,8 @@ class WebUIAPI(object): self.status = { "software_version": ooniprobe_version, "software_name": "ooniprobe", - "asn": config.probe_ip.geodata['asn'], - "country_code": config.probe_ip.geodata['countrycode'], + "asn": probe_ip.geodata['asn'], + "country_code": probe_ip.geodata['countrycode'], "director_started": False } @@ -108,8 +109,8 @@ class WebUIAPI(object): def director_started(self, _): self.status['director_started'] = True - self.status["asn"] = config.probe_ip.geodata['asn'] - self.status["country_code"] = config.probe_ip.geodata['countrycode'] + self.status["asn"] = probe_ip.geodata['asn'] + self.status["country_code"] = probe_ip.geodata['countrycode'] @app.handle_errors(NotFound) def not_found(self, request, _): @@ -168,10 +169,15 @@ class WebUIAPI(object): return self.render_json({"command": "deck-list"}, request) + @defer.inlineCallbacks def run_deck(self, deck): - deck.setup() - # Here there is a dangling deferred - deck.run(self.director) + # These are dangling deferreds + try: + yield deck.setup() + yield deck.run(self.director) + except: + self.director_event_poller.notify(DirectorEvent("error", + "Failed to start deck")) @app.route('/api/nettest/<string:test_name>/start', methods=["POST"]) def api_nettest_start(self, request, test_name): @@ -219,7 +225,10 @@ class WebUIAPI(object): @app.route('/api/input', methods=["GET"]) def api_input_list(self, request): - return self.render_json(self.director.input_store.list(), request) + input_store_list = self.director.input_store.list() + for key, value in input_store_list.items(): + value.pop('filepath') + return self.render_json(input_store_list, request) @app.route('/api/input/<string:input_id>/content', methods=["GET"]) def api_input_content(self, request, input_id): [View Less]

1 0

[ooni-probe/master] Initialise the scheduler directory
by art＠torproject.org 19 Sep '16

19 Sep '16

commit e8ac20f398a36c155317d963d039647fa358fb94 Author: Arturo Filastò <arturo(a)filasto.net> Date: Tue Jul 26 21:38:52 2016 +0200 Initialise the scheduler directory --- ooni/settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ooni/settings.py b/ooni/settings.py index 632dbe4..36670ac 100644 --- a/ooni/settings.py +++ b/ooni/settings.py @@ -102,6 +102,8 @@ class OConfig(object): self.inputs_directory = os.path.join(self.ooni_home, 'inputs') … [View More]

1 0

[ooni-probe/master] The report log now is used only for measurements that are written to the ~/.ooni/measurements directory.
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 0362ad88fbe2945f5311b3db5b67031a0e8b218d Author: Arturo Filastò <arturo(a)filasto.net> Date: Thu Jul 28 17:32:12 2016 +0200 The report log now is used only for measurements that are written to the ~/.ooni/measurements directory. * Move tor related functions into utils onion. --- ooni/deck/deck.py | 4 +- ooni/director.py | 55 +------- ooni/reporter.py | 296 +++++++++++++++++++++++--------------------- ooni/scripts/oonireport.py … [View More]| 128 ++++++++++++++----- ooni/tests/test_reporter.py | 87 ++++++------- ooni/utils/log.py | 4 +- ooni/utils/onion.py | 49 ++++++++ 7 files changed, 352 insertions(+), 271 deletions(-) diff --git a/ooni/deck/deck.py b/ooni/deck/deck.py index b11e174..c9b3fc5 100644 --- a/ooni/deck/deck.py +++ b/ooni/deck/deck.py @@ -218,6 +218,7 @@ class NGDeck(object): net_test_loader = task.ooni["net_test_loader"] test_details = task.ooni["test_details"] + measurement_id = None report_filename = task.output_path if not task.output_path: measurement_id = task.id @@ -235,7 +236,8 @@ class NGDeck(object): net_test_loader, report_filename, collector_client=net_test_loader.collector, - test_details=test_details + test_details=test_details, + measurement_id=measurement_id ) d.addCallback(self._measurement_completed, task) d.addErrback(self._measurement_failed, task) diff --git a/ooni/director.py b/ooni/director.py index e3df907..464a203 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -1,4 +1,3 @@ -import pwd import os from twisted.internet import defer @@ -7,7 +6,6 @@ from twisted.python.failure import Failure from ooni.managers import ReportEntryManager, MeasurementManager from ooni.reporter import Report from ooni.utils import log, generate_filename -from ooni.utils.net import randomFreePort from ooni.nettest import NetTest, getNetTestInformation from ooni.settings import config from ooni.nettest import normalizeTestName @@ -15,7 +13,7 @@ from ooni.deck.store import InputStore from ooni.geoip import probe_ip from ooni.agent.scheduler import run_system_tasks -from ooni.utils.onion import start_tor, connect_to_control_port +from ooni.utils.onion import start_tor, connect_to_control_port, get_tor_config class DirectorEvent(object): def __init__(self, type="update", message=""): @@ -299,7 +297,7 @@ class Director(object): @defer.inlineCallbacks def start_net_test_loader(self, net_test_loader, report_filename, collector_client=None, no_yamloo=False, - test_details=None): + test_details=None, measurement_id=None): """ Create the Report for the NetTest and start the report NetTest. @@ -319,7 +317,8 @@ class Director(object): report = Report(test_details, report_filename, self.reportEntryManager, collector_client, - no_yamloo) + no_yamloo, + measurement_id) yield report.open() net_test = NetTest(test_cases, test_details, report) @@ -392,50 +391,8 @@ class Director(object): raise exc if config.advanced.start_tor and config.tor_state is None: - tor_config = TorConfig() - if config.tor.control_port is None: - config.tor.control_port = int(randomFreePort()) - if config.tor.socks_port is None: - config.tor.socks_port = int(randomFreePort()) - - tor_config.ControlPort = config.tor.control_port - tor_config.SocksPort = config.tor.socks_port - - if config.tor.data_dir: - data_dir = os.path.expanduser(config.tor.data_dir) - - if not os.path.exists(data_dir): - log.debug("%s does not exist. Creating it." % data_dir) - os.makedirs(data_dir) - tor_config.DataDirectory = data_dir - - if config.tor.bridges: - tor_config.UseBridges = 1 - if config.advanced.obfsproxy_binary: - tor_config.ClientTransportPlugin = ( - 'obfs2,obfs3 exec %s managed' % - config.advanced.obfsproxy_binary - ) - bridges = [] - with open(config.tor.bridges) as f: - for bridge in f: - if 'obfs' in bridge: - if config.advanced.obfsproxy_binary: - bridges.append(bridge.strip()) - else: - bridges.append(bridge.strip()) - tor_config.Bridge = bridges - - if config.tor.torrc: - for i in config.tor.torrc.keys(): - setattr(tor_config, i, config.tor.torrc[i]) - - if os.geteuid() == 0: - tor_config.User = pwd.getpwuid(os.geteuid()).pw_name - - tor_config.save() - log.debug("Setting control port as %s" % tor_config.ControlPort) - log.debug("Setting SOCKS port as %s" % tor_config.SocksPort) + tor_config = get_tor_config() + try: yield start_tor(tor_config) self._tor_starting.callback(self._tor_state) diff --git a/ooni/reporter.py b/ooni/reporter.py index 20a13f5..cf5341d 100644 --- a/ooni/reporter.py +++ b/ooni/reporter.py @@ -13,6 +13,7 @@ from yaml.emitter import Emitter from yaml.serializer import Serializer from yaml.resolver import Resolver +from twisted.python.filepath import FilePath from twisted.python.util import untilConcludes from twisted.internet import defer from twisted.internet.error import ConnectionRefusedError @@ -362,154 +363,161 @@ class OONIBReporter(OReporter): log.debug("Closing report with id %s" % self.reportId) return self.collector_client.closeReport(self.reportId) +class NoReportLog(Exception): + pass + class OONIBReportLog(object): """ Used to keep track of report creation on a collector backend. """ + _date_format = "%Y%m%dT%H:%M:%SZ" - def __init__(self, file_name=None): - if file_name is None: - file_name = config.report_log_file - self.file_name = file_name - self.create_report_log() - - def get_report_log(self): - with open(self.file_name) as f: - report_log = yaml.safe_load(f) - if not report_log: - report_log = {} # consumers expect dictionary structure - return report_log - - @property - def reports_incomplete(self): - reports = [] - report_log = self.get_report_log() - for report_file, value in report_log.items(): - if value['status'] in ('created'): - try: - os.kill(value['pid'], 0) - except: - reports.append((report_file, value)) - elif value['status'] in ('incomplete'): - reports.append((report_file, value)) - return reports - - @property - def reports_in_progress(self): - reports = [] - report_log = self.get_report_log() - for report_file, value in report_log.items(): - if value['status'] in ('created'): - try: - os.kill(value['pid'], 0) - reports.append((report_file, value)) - except: - pass - return reports - - @property - def reports_to_upload(self): - reports = [] - report_log = self.get_report_log() - for report_file, value in report_log.items(): - if value['status'] in ('creation-failed', 'not-created'): - reports.append((report_file, value)) - return reports - - def run(self, f, *arg, **kw): - lock = defer.DeferredFilesystemLock(self.file_name + '.lock') - d = lock.deferUntilLocked() - - def unlockAndReturn(r): + def __init__(self): + self.measurement_dir = FilePath(config.measurements_directory) + + def _parse_log_entry(self, in_file, measurement_id): + entry = json.load(in_file) + entry['last_update'] = datetime.strptime(entry['last_update'], + self._date_format) + entry['measurements_path'] = self.measurement_dir.child( + measurement_id).child('measurements.njson').path + entry['measurement_id'] = measurement_id + return entry + + def _lock_for_report_log(self, measurement_id): + lock_file = self.measurement_dir.child(measurement_id).child("report_log.lock") + return defer.DeferredFilesystemLock(lock_file.path) + + def _get_report_log_file(self, measurement_id): + report_log_file = self.measurement_dir.child(measurement_id).child("report_log.json") + return report_log_file + + @defer.inlineCallbacks + def get_report_log(self, measurement_id): + lock = self._lock_for_report_log(measurement_id) + yield lock.deferUntilLocked() + + report_log_file = self._get_report_log_file(measurement_id) + if not report_log_file.exists(): lock.unlock() - return r + raise NoReportLog - def execute(_): - d = defer.maybeDeferred(f, *arg, **kw) - d.addBoth(unlockAndReturn) - return d + with report_log_file.open('r') as in_file: + entry = self._parse_log_entry(in_file, measurement_id) - d.addCallback(execute) - return d + lock.unlock() + + defer.returnValue(entry) - def create_report_log(self): - if not os.path.exists(self.file_name): - with open(self.file_name, 'w+') as f: - f.write(yaml.safe_dump({})) - - @contextmanager - def edit_log(self): - with open(self.file_name) as rfp: - report = yaml.safe_load(rfp) - # This should never happen. - if report is None: - report = {} - with open(self.file_name, 'w+') as wfp: + @defer.inlineCallbacks + def get_report_log_entries(self): + entries = [] + for measurement_id in self.measurement_dir.listdir(): try: - yield report - finally: - wfp.write(yaml.safe_dump(report)) - - def _not_created(self, report_file): - with self.edit_log() as report: - report[report_file] = { - 'pid': os.getpid(), - 'created_at': datetime.now(), - 'status': 'not-created', - 'collector': None - } + entry = yield self.get_report_log(measurement_id) + entries.append(entry) + except NoReportLog: + continue + defer.returnValue(entries) - def not_created(self, report_file): - return self.run(self._not_created, report_file) - - def _created(self, report_file, collector_settings, report_id): - with self.edit_log() as report: - assert report_file is not None - report[report_file] = { - 'pid': os.getpid(), - 'created_at': datetime.now(), - 'status': 'created', - 'collector': collector_settings, - 'report_id': report_id - } - return report_id - - def created(self, report_file, collector_settings, report_id): - return self.run(self._created, report_file, - collector_settings, report_id) - - def _creation_failed(self, report_file, collector_settings): - with self.edit_log() as report: - report[report_file] = { - 'pid': os.getpid(), - 'created_at': datetime.now(), - 'status': 'creation-failed', - 'collector': collector_settings - } + @defer.inlineCallbacks + def update_log(self, measurement_id, value): + lock = self._lock_for_report_log(measurement_id) + yield lock.deferUntilLocked() - def creation_failed(self, report_file, collector_settings): - return self.run(self._creation_failed, report_file, - collector_settings) + report_log_file = self._get_report_log_file(measurement_id) + with report_log_file.open('w+') as out_file: + entry = value + entry['last_update'] = datetime.utcnow().strftime(self._date_format) + json.dump(entry, out_file) - def _incomplete(self, report_file): - with self.edit_log() as report: - if report[report_file]['status'] != "created": - raise errors.ReportNotCreated() - report[report_file]['status'] = 'incomplete' + lock.unlock() - def incomplete(self, report_file): - return self.run(self._incomplete, report_file) + @defer.inlineCallbacks + def remove_log(self, measurement_id): + lock = self._lock_for_report_log(measurement_id) + yield lock.deferUntilLocked() - def _closed(self, report_file): - with self.edit_log() as report: - rs = report[report_file]['status'] - if rs != "created" and rs != "incomplete": - raise errors.ReportNotCreated() - del report[report_file] + report_log_file = self._get_report_log_file(measurement_id) + try: + log.debug("Deleting log file") + report_log_file.remove() + except Exception as exc: + log.exception(exc) + + lock.unlock() + + @defer.inlineCallbacks + def get_incomplete(self): + incomplete_reports = [] + all_entries = yield self.get_report_log_entries() + for entry in all_entries[:]: + if entry['status'] in ('created',): + try: + os.kill(entry['pid'], 0) + except OSError: + incomplete_reports.append( + (entry['measurements_path'], entry) + ) + elif entry['status'] in ('incomplete',): + incomplete_reports.append( + (entry['measurements_path'], entry) + ) + defer.returnValue(incomplete_reports) - def closed(self, report_file): - return self.run(self._closed, report_file) + @defer.inlineCallbacks + def get_in_progress(self): + in_progress_reports = [] + all_entries = yield self.get_report_log_entries() + for entry in all_entries[:]: + if entry['status'] in ('created',): + try: + os.kill(entry['pid'], 0) + in_progress_reports.append( + (entry['measurements_path'], entry) + ) + except OSError: + pass + defer.returnValue(in_progress_reports) + + @defer.inlineCallbacks + def get_to_upload(self): + to_upload_reports = [] + all_entries = yield self.get_report_log_entries() + for entry in all_entries[:]: + if entry['status'] in ('creation-failed', 'not-created'): + to_upload_reports.append( + (entry['measurements_path'], entry) + ) + defer.returnValue(to_upload_reports) + + def _update_status(self, measurement_id, status, collector_settings={}): + value = { + 'pid': os.getpid(), + 'status': status, + 'collector': collector_settings + } + return self.update_log(measurement_id, value) + + def not_created(self, measurement_id): + return self._update_status(measurement_id, 'not-created') + + def created(self, measurement_id, collector_settings): + return self._update_status(measurement_id, 'created', + collector_settings) + + + def creation_failed(self, measurement_id, collector_settings): + return self._update_status(measurement_id, 'creation-failed', + collector_settings) + + def incomplete(self, measurement_id, collector_settings): + return self._update_status(measurement_id, 'incomplete', + collector_settings) + + def closed(self, measurement_id): + return self.remove_log(measurement_id) class Report(object): @@ -517,7 +525,7 @@ class Report(object): def __init__(self, test_details, report_filename, reportEntryManager, collector_client=None, - no_njson=False): + no_njson=False, measurement_id=None): """ This is an abstraction layer on top of all the configured reporters. @@ -542,10 +550,12 @@ class Report(object): """ self.test_details = test_details self.collector_client = collector_client + if report_filename is None: report_filename = self.generateReportFilename() self.report_filename = report_filename + self.measurement_id = measurement_id self.report_log = OONIBReportLog() self.njson_reporter = None @@ -565,16 +575,17 @@ class Report(object): def open_oonib_reporter(self): def creation_failed(failure): self.oonib_reporter = None - return self.report_log.creation_failed(self.report_filename, - self.collector_client.settings) + if self.measurement_id: + return self.report_log.creation_failed(self.measurement_id, + self.collector_client.settings) def created(report_id): if not self.oonib_reporter: return self.test_details['report_id'] = report_id - return self.report_log.created(self.report_filename, - self.collector_client.settings, - report_id) + if self.measurement_id: + return self.report_log.created(self.measurement_id, + self.collector_client.settings) d = self.oonib_reporter.createReport() d.addErrback(creation_failed) @@ -595,8 +606,8 @@ class Report(object): if not self.no_njson: self.njson_reporter = NJSONReporter(self.test_details, self.report_filename) - if not self.oonib_reporter: - yield self.report_log.not_created(self.report_filename) + if not self.oonib_reporter and self.measurement_id: + yield self.report_log.not_created(self.measurement_id) yield defer.maybeDeferred(self.njson_reporter.createReport) defer.returnValue(self.reportId) @@ -623,7 +634,9 @@ class Report(object): d.errback(failure) def oonib_report_failed(failure): - return self.report_log.incomplete(self.report_filename) + if self.measurement_id: + return self.report_log.incomplete(self.measurement_id, + self.collector_client.settings) def all_reports_written(_): if not d.called: @@ -662,7 +675,8 @@ class Report(object): d.errback(failure) def oonib_report_closed(result): - return self.report_log.closed(self.report_filename) + if self.measurement_id: + return self.report_log.closed(self.measurement_id) def oonib_report_failed(result): log.exception(result) diff --git a/ooni/scripts/oonireport.py b/ooni/scripts/oonireport.py index 8bcb1af..f59a843 100644 --- a/ooni/scripts/oonireport.py +++ b/ooni/scripts/oonireport.py @@ -2,6 +2,7 @@ from __future__ import print_function import os import sys +import json import yaml from twisted.python import usage @@ -21,7 +22,7 @@ def lookup_collector_client(report_header, bouncer): oonib_client = BouncerClient(bouncer) net_tests = [{ 'test-helpers': [], - 'input-hashes': report_header['input_hashes'], + 'input-hashes': [], 'name': report_header['test_name'], 'version': report_header['test_version'], }] @@ -33,36 +34,57 @@ def lookup_collector_client(report_header, bouncer): ) defer.returnValue(collector_client) +class NoIDFound(Exception): + pass + +def report_path_to_id(report_file): + measurement_dir = os.path.dirname(report_file) + measurement_id = os.path.basename(measurement_dir) + if os.path.dirname(measurement_dir) != config.measurements_directory: + raise NoIDFound + return measurement_id + @defer.inlineCallbacks -def upload(report_file, collector=None, bouncer=None): +def upload(report_file, collector=None, bouncer=None, measurement_id=None): oonib_report_log = OONIBReportLog() collector_client = None if collector: collector_client = CollectorClient(address=collector) + try: + # Try to guess the measurement_id from the file path + measurement_id = report_path_to_id(report_file) + except NoIDFound: + pass + log.msg("Attempting to upload %s" % report_file) - with open(config.report_log_file) as f: - report_log = yaml.safe_load(f) + if report_file.endswith(".njson"): + report = NJSONReportLoader(report_file) + else: + log.warn("Uploading of YAML formatted reports will be dropped in " + "future versions") + report = YAMLReportLoader(report_file) - report = ReportLoader(report_file) if bouncer and collector_client is None: collector_client = yield lookup_collector_client(report.header, bouncer) if collector_client is None: - try: - collector_settings = report_log[report_file]['collector'] - if collector_settings is None: - log.msg("Skipping uploading of %s since this measurement " - "was run by specifying no collector." % - report_file) + if measurement_id: + report_log = yield oonib_report_log.get_report_log(measurement_id) + collector_settings = report_log['collector'] + print(collector_settings) + if collector_settings is None or len(collector_settings) == 0: + log.warn("Skipping uploading of %s since this measurement " + "was run by specifying no collector." % + report_file) defer.returnValue(None) elif isinstance(collector_settings, dict): collector_client = CollectorClient(settings=collector_settings) elif isinstance(collector_settings, str): collector_client = CollectorClient(address=collector_settings) - except KeyError: + else: log.msg("Could not find %s in reporting.yaml. Looking up " "collector with canonical bouncer." % report_file) collector_client = yield lookup_collector_client(report.header, @@ -73,51 +95,59 @@ def upload(report_file, collector=None, bouncer=None): collector_client.settings)) report_id = yield oonib_reporter.createReport() report.header['report_id'] = report_id - yield oonib_report_log.created(report_file, - collector_client.settings, - report_id) + if measurement_id: + log.debug("Marking it as created") + yield oonib_report_log.created(measurement_id, + collector_client.settings) log.msg("Writing report entries") for entry in report: yield oonib_reporter.writeReportEntry(entry) - sys.stdout.write('.') - sys.stdout.flush() + log.msg("Written entry") log.msg("Closing report") yield oonib_reporter.finish() - yield oonib_report_log.closed(report_file) + if measurement_id: + log.debug("Closing log") + yield oonib_report_log.closed(measurement_id) @defer.inlineCallbacks -def upload_all(collector=None, bouncer=None): +def upload_all(collector=None, bouncer=None, upload_incomplete=False): oonib_report_log = OONIBReportLog() - for report_file, value in oonib_report_log.reports_to_upload: + reports_to_upload = yield oonib_report_log.get_to_upload() + for report_file, value in reports_to_upload: try: - yield upload(report_file, collector, bouncer) + yield upload(report_file, collector, bouncer, + value['measurement_id']) except Exception as exc: log.exception(exc) def print_report(report_file, value): print("* %s" % report_file) - print(" %s" % value['created_at']) + print(" %s" % value['last_update']) +(a)defer.inlineCallbacks def status(): oonib_report_log = OONIBReportLog() + reports_to_upload = yield oonib_report_log.get_to_upload() print("Reports to be uploaded") print("----------------------") - for report_file, value in oonib_report_log.reports_to_upload: + for report_file, value in reports_to_upload: print_report(report_file, value) + reports_in_progress = yield oonib_report_log.get_in_progress() print("Reports in progress") print("-------------------") - for report_file, value in oonib_report_log.reports_in_progress: + for report_file, value in reports_in_progress: print_report(report_file, value) + reports_incomplete = yield oonib_report_log.get_incomplete() print("Incomplete reports") print("------------------") - for report_file, value in oonib_report_log.reports_incomplete: + for report_file, value in reports_incomplete: print_report(report_file, value) class ReportLoader(object): @@ -125,24 +155,34 @@ class ReportLoader(object): 'probe_asn', 'probe_cc', 'probe_ip', - 'start_time', + 'probe_city', + 'test_start_time', 'test_name', 'test_version', 'options', 'input_hashes', 'software_name', - 'software_version' + 'software_version', + 'data_format_version', + 'report_id', + 'test_helpers', + 'annotations', + 'id' ) + def __iter__(self): + return self + + def close(self): + self._fp.close() + +class YAMLReportLoader(ReportLoader): def __init__(self, report_filename): self._fp = open(report_filename) self._yfp = yaml.safe_load_all(self._fp) self.header = self._yfp.next() - def __iter__(self): - return self - def next(self): try: return self._yfp.next() @@ -150,8 +190,30 @@ class ReportLoader(object): self.close() raise StopIteration - def close(self): - self._fp.close() +class NJSONReportLoader(ReportLoader): + def __init__(self, report_filename): + self._fp = open(report_filename) + self.header = self._peek_header() + + def _peek_header(self): + header = {} + first_entry = json.loads(next(self._fp)) + for key in self._header_keys: + header[key] = first_entry.get(key, None) + self._fp.seek(0) + return header + + def next(self): + try: + entry = json.loads(next(self._fp)) + for key in self._header_keys: + entry.pop(key, None) + test_keys = entry.pop('test_keys') + entry.update(test_keys) + return entry + except StopIteration: + self.close() + raise StopIteration class Options(usage.Options): @@ -218,11 +280,13 @@ def oonireport(_reactor=reactor, _args=sys.argv[1:]): options['bouncer'] = CANONICAL_BOUNCER_ONION if options['command'] == "upload" and options['report_file']: + log.start() tor_check() return upload(options['report_file'], options['collector'], options['bouncer']) elif options['command'] == "upload": + log.start() tor_check() return upload_all(options['collector'], options['bouncer']) diff --git a/ooni/tests/test_reporter.py b/ooni/tests/test_reporter.py index 8f32733..cbfdaeb 100644 --- a/ooni/tests/test_reporter.py +++ b/ooni/tests/test_reporter.py @@ -2,11 +2,12 @@ import os import yaml import json import time -from mock import MagicMock +import shutil from twisted.internet import defer from twisted.trial import unittest +from ooni.tests.bases import ConfigTestCase from ooni import errors as e from ooni.tests.mocks import MockCollectorClient from ooni.reporter import YAMLReporter, OONIBReporter, OONIBReportLog @@ -114,65 +115,57 @@ class TestOONIBReporter(unittest.TestCase): req = {'content': 'something'} yield self.oonib_reporter.writeReportEntry(req) -class TestOONIBReportLog(unittest.TestCase): +class TestOONIBReportLog(ConfigTestCase): def setUp(self): - self.report_log = OONIBReportLog('report_log') - self.report_log.create_report_log() + super(TestOONIBReportLog, self).setUp() + self.report_log = OONIBReportLog() + self.measurement_id = '20160727T182604Z-ZZ-AS0-dummy' + self.measurement_dir = os.path.join( + self.config.measurements_directory, + self.measurement_id + ) + self.report_log_path = os.path.join(self.measurement_dir, + 'report_log.json') + os.mkdir(self.measurement_dir) def tearDown(self): - os.remove(self.report_log.file_name) + shutil.rmtree(self.measurement_dir) + super(TestOONIBReportLog, self).tearDown() @defer.inlineCallbacks def test_report_created(self): - yield self.report_log.created("path_to_my_report.yaml", - 'httpo://foo.onion', - 'someid') - with open(self.report_log.file_name) as f: - report = yaml.safe_load(f) - assert "path_to_my_report.yaml" in report - - @defer.inlineCallbacks - def test_concurrent_edit(self): - d1 = self.report_log.created("path_to_my_report1.yaml", - 'httpo://foo.onion', - 'someid1') - d2 = self.report_log.created("path_to_my_report2.yaml", - 'httpo://foo.onion', - 'someid2') - yield defer.DeferredList([d1, d2]) - with open(self.report_log.file_name) as f: - report = yaml.safe_load(f) - assert "path_to_my_report1.yaml" in report - assert "path_to_my_report2.yaml" in report + yield self.report_log.created(self.measurement_id, {}) + with open(self.report_log_path) as f: + report = json.load(f) + self.assertEqual(report['status'], 'created') @defer.inlineCallbacks def test_report_closed(self): - yield self.report_log.created("path_to_my_report.yaml", - 'httpo://foo.onion', - 'someid') - yield self.report_log.closed("path_to_my_report.yaml") + yield self.report_log.created(self.measurement_id, {}) + yield self.report_log.closed(self.measurement_id) - with open(self.report_log.file_name) as f: - report = yaml.safe_load(f) - assert "path_to_my_report.yaml" not in report + self.assertFalse(os.path.exists(self.report_log_path)) @defer.inlineCallbacks def test_report_creation_failed(self): - yield self.report_log.creation_failed("path_to_my_report.yaml", - 'httpo://foo.onion') - with open(self.report_log.file_name) as f: - report = yaml.safe_load(f) - assert "path_to_my_report.yaml" in report - assert report["path_to_my_report.yaml"]["status"] == "creation-failed" + yield self.report_log.creation_failed(self.measurement_id, {}) + with open(self.report_log_path) as f: + report = json.load(f) + self.assertEqual(report["status"], "creation-failed") + + @defer.inlineCallbacks + def test_list_reports_in_progress(self): + yield self.report_log.created(self.measurement_id, {}) + in_progress = yield self.report_log.get_in_progress() + incomplete = yield self.report_log.get_incomplete() + self.assertEqual(len(incomplete), 0) + self.assertEqual(len(in_progress), 1) @defer.inlineCallbacks - def test_list_reports(self): - yield self.report_log.creation_failed("failed_report.yaml", - 'httpo://foo.onion') - yield self.report_log.created("created_report.yaml", - 'httpo://foo.onion', 'XXXX') - - assert len(self.report_log.reports_in_progress) == 1 - assert len(self.report_log.reports_incomplete) == 0 - assert len(self.report_log.reports_to_upload) == 1 + def test_list_reports_to_upload(self): + yield self.report_log.creation_failed(self.measurement_id, {}) + incomplete = yield self.report_log.get_incomplete() + to_upload = yield self.report_log.get_to_upload() + self.assertEqual(len(incomplete), 0) + self.assertEqual(len(to_upload), 1) diff --git a/ooni/utils/log.py b/ooni/utils/log.py index c8a7360..982a353 100644 --- a/ooni/utils/log.py +++ b/ooni/utils/log.py @@ -114,7 +114,7 @@ class OONILogger(object): else: tw_log.err(msg, source="ooni") - def warn(self, *arg, **kw): + def warn(self, msg, *arg, **kw): text = log_encode(msg) tw_log.msg(text, log_level=levels['WARNING'], source="ooni") @@ -165,3 +165,5 @@ stop = oonilogger.stop msg = oonilogger.msg debug = oonilogger.debug err = oonilogger.err +warn = oonilogger.warn +exception = oonilogger.exception diff --git a/ooni/utils/onion.py b/ooni/utils/onion.py index df9dfec..6e0d906 100644 --- a/ooni/utils/onion.py +++ b/ooni/utils/onion.py @@ -1,5 +1,6 @@ import os import re +import pwd import string import StringIO import subprocess @@ -12,6 +13,7 @@ from twisted.internet.endpoints import TCP4ClientEndpoint from txtorcon import TorConfig, TorState, launch_tor, build_tor_connection from txtorcon.util import find_tor_binary as tx_find_tor_binary +from ooni.utils.net import randomFreePort from ooni import constants from ooni import errors from ooni.utils import log @@ -213,6 +215,53 @@ def get_client_transport(transport): raise UninstalledTransport +def get_tor_config(): + tor_config = TorConfig() + if config.tor.control_port is None: + config.tor.control_port = int(randomFreePort()) + if config.tor.socks_port is None: + config.tor.socks_port = int(randomFreePort()) + + tor_config.ControlPort = config.tor.control_port + tor_config.SocksPort = config.tor.socks_port + + if config.tor.data_dir: + data_dir = os.path.expanduser(config.tor.data_dir) + + if not os.path.exists(data_dir): + log.debug("%s does not exist. Creating it." % data_dir) + os.makedirs(data_dir) + tor_config.DataDirectory = data_dir + + if config.tor.bridges: + tor_config.UseBridges = 1 + if config.advanced.obfsproxy_binary: + tor_config.ClientTransportPlugin = ( + 'obfs2,obfs3 exec %s managed' % + config.advanced.obfsproxy_binary + ) + bridges = [] + with open(config.tor.bridges) as f: + for bridge in f: + if 'obfs' in bridge: + if config.advanced.obfsproxy_binary: + bridges.append(bridge.strip()) + else: + bridges.append(bridge.strip()) + tor_config.Bridge = bridges + + if config.tor.torrc: + for i in config.tor.torrc.keys(): + setattr(tor_config, i, config.tor.torrc[i]) + + if os.geteuid() == 0: + tor_config.User = pwd.getpwuid(os.geteuid()).pw_name + + tor_config.save() + log.debug("Setting control port as %s" % tor_config.ControlPort) + log.debug("Setting SOCKS port as %s" % tor_config.SocksPort) + return tor_config + class TorLauncherWithRetries(object): def __init__(self, tor_config, timeout=config.tor.timeout): self.retry_with = ["obfs4", "meek"] [View Less]

1 0

[ooni-probe/master] Put all deck related functions inside of deck module.
by art＠torproject.org 19 Sep '16

19 Sep '16

commit 95a531d703321364e6c8759a78b6b287de8d4a23 Author: Arturo Filastò <arturo(a)filasto.net> Date: Thu Jul 28 12:38:55 2016 +0200 Put all deck related functions inside of deck module. Optimise txscapy function to avoid calling a function on every import. --- ooni/agent/scheduler.py | 2 +- ooni/backend_client.py | 22 +- ooni/deck.py | 948 --------------------------------------------- ooni/deck/__init__.py | 1 + ooni/deck/backend.py … [View More]| 191 +++++++++ ooni/deck/deck.py | 386 ++++++++++++++++++ ooni/deck/legacy.py | 65 ++++ ooni/deck/store.py | 128 ++++++ ooni/director.py | 2 +- ooni/nettest.py | 43 +- ooni/tests/test_deck.py | 31 +- ooni/tests/test_txscapy.py | 3 - ooni/ui/cli.py | 3 +- ooni/utils/txscapy.py | 73 ++-- 14 files changed, 885 insertions(+), 1013 deletions(-) diff --git a/ooni/agent/scheduler.py b/ooni/agent/scheduler.py index 71a7edb..1288d6c 100644 --- a/ooni/agent/scheduler.py +++ b/ooni/agent/scheduler.py @@ -6,7 +6,7 @@ from twisted.python.filepath import FilePath from ooni import resources from ooni.utils import log -from ooni.deck import input_store +from ooni.deck.store import input_store from ooni.settings import config from ooni.contrib import croniter from ooni.geoip import probe_ip diff --git a/ooni/backend_client.py b/ooni/backend_client.py index 7721f6f..5298e7c 100644 --- a/ooni/backend_client.py +++ b/ooni/backend_client.py @@ -12,7 +12,7 @@ from twisted.python.versions import Version from twisted import version as _twisted_version _twisted_14_0_2_version = Version('twisted', 14, 0, 2) -from ooni import errors as e +from ooni import errors as e, constants from ooni.settings import config from ooni.utils import log, onion from ooni.utils.net import BodyReceiver, StringProducer, Downloader @@ -273,3 +273,23 @@ class WebConnectivityClient(OONIBClient): 'tcp_connect': tcp_connect } return self.queryBackend('POST', '/', query=request) + + +def get_preferred_bouncer(): + preferred_backend = config.advanced.get( + "preferred_backend", "onion" + ) + bouncer_address = getattr( + constants, "CANONICAL_BOUNCER_{0}".format( + preferred_backend.upper() + ) + ) + if preferred_backend == "cloudfront": + return BouncerClient( + settings={ + 'address': bouncer_address[0], + 'front': bouncer_address[1], + 'type': 'cloudfront' + }) + else: + return BouncerClient(bouncer_address) diff --git a/ooni/deck.py b/ooni/deck.py deleted file mode 100644 index 6844fda..0000000 --- a/ooni/deck.py +++ /dev/null @@ -1,948 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import csv -import json - -from copy import deepcopy - -import yaml - -from twisted.internet import defer -from twisted.python.filepath import FilePath - -from ooni import constants -from ooni import errors as e -from ooni.backend_client import CollectorClient, BouncerClient -from ooni.backend_client import WebConnectivityClient, guess_backend_type -from ooni.nettest import NetTestLoader -from ooni.otime import timestampNowISO8601UTC -from ooni.resources import check_for_update -from ooni.settings import config -from ooni.utils import generate_filename -from ooni.utils import log -from ooni.geoip import probe_ip - -from ooni.results import generate_summary - -def nettest_to_path(path, allow_arbitrary_paths=False): - """ - Takes as input either a path or a nettest name. - - The nettest name may either be prefixed by the category of the nettest ( - blocking, experimental, manipulation or third_party) or not. - - Args: - - allow_arbitrary_paths: - allow also paths that are not relative to the nettest_directory. - - Returns: - - full path to the nettest file. - """ - if allow_arbitrary_paths and os.path.exists(path): - return path - - test_name = path.rsplit("/", 1)[-1] - test_categories = [ - "blocking", - "experimental", - "manipulation", - "third_party" - ] - nettest_dir = FilePath(config.nettest_directory) - found_path = None - for category in test_categories: - p = nettest_dir.preauthChild(os.path.join(category, test_name) + '.py') - if p.exists(): - if found_path is not None: - raise Exception("Found two tests named %s" % test_name) - found_path = p.path - - if not found_path: - raise e.NetTestNotFound(path) - return found_path - - -def get_preferred_bouncer(): - preferred_backend = config.advanced.get( - "preferred_backend", "onion" - ) - bouncer_address = getattr( - constants, "CANONICAL_BOUNCER_{0}".format( - preferred_backend.upper() - ) - ) - if preferred_backend == "cloudfront": - return BouncerClient( - settings={ - 'address': bouncer_address[0], - 'front': bouncer_address[1], - 'type': 'cloudfront' - }) - else: - return BouncerClient(bouncer_address) - -<<<<<<< d0fb4f37530aeb6b69fbc2985019464f8ec10312 -class Deck(InputFile): - # this exists so we can mock it out in unittests - _BouncerClient = BouncerClient - _CollectorClient = CollectorClient - - def __init__(self, deck_hash=None, - bouncer=None, - decks_directory=config.decks_directory, - no_collector=False): - self.id = deck_hash - self.no_collector = no_collector - - self.preferred_backend = config.advanced.get( - "preferred_backend", "onion" - ) - if self.preferred_backend not in ["onion", "https", "cloudfront"]: - raise e.InvalidPreferredBackend - - if bouncer is None: - bouncer_address = getattr( - constants, "CANONICAL_BOUNCER_{0}".format( - self.preferred_backend.upper() - ) - ) - if self.preferred_backend == "cloudfront": - self.bouncer = self._BouncerClient(settings={ - 'address': bouncer_address[0], - 'front': bouncer_address[1], - 'type': 'cloudfront' - }) - else: - self.bouncer = self._BouncerClient(bouncer_address) - else: - self.bouncer = self._BouncerClient(bouncer) - - self.requiresTor = False - - self.netTestLoaders = [] - self.inputs = [] - - self.decksDirectory = os.path.abspath(decks_directory) - - @property - def cached_file(self): - return os.path.join(self.decksDirectory, self.id) - - @property - def cached_descriptor(self): - return self.cached_file + '.desc' - - def loadDeck(self, deckFile, global_options={}): - with open(deckFile) as f: - self.id = sha256(f.read()).hexdigest() - f.seek(0) - test_deck = yaml.safe_load(f) - - for test in test_deck: - try: - nettest_path = nettest_to_path(test['options']['test_file']) - except e.NetTestNotFound: - log.err("Could not find %s" % test['options']['test_file']) - log.msg("Skipping...") - continue - - annotations = test['options'].get('annotations', {}) - if global_options.get('annotations') is not None: - annotations = global_options["annotations"] - - collector_address = test['options'].get('collector', None) - if global_options.get('collector') is not None: - collector_address = global_options['collector'] - - net_test_loader = NetTestLoader(test['options']['subargs'], - annotations=annotations, - test_file=nettest_path) - if collector_address is not None: - net_test_loader.collector = CollectorClient( - collector_address - ) - if test['options'].get('bouncer', None) is not None: - self.bouncer = self._BouncerClient(test['options']['bouncer']) - if self.bouncer.backend_type == "onion": - self.requiresTor = True - self.insert(net_test_loader) - - def insert(self, net_test_loader): - """ Add a NetTestLoader to this test deck """ - if (net_test_loader.collector is not None - and net_test_loader.collector.backend_type == "onion"): - self.requiresTor = True - try: - net_test_loader.checkOptions() - if net_test_loader.requiresTor: - self.requiresTor = True - except e.MissingTestHelper: - if self.preferred_backend == "onion": - self.requiresTor = True - - self.netTestLoaders.append(net_test_loader) - - @defer.inlineCallbacks - def setup(self): - """ fetch and verify inputs for all NetTests in the deck """ - log.msg("Fetching required net test inputs...") - for net_test_loader in self.netTestLoaders: - # XXX figure out if we want to keep this or drop this. - yield self.fetchAndVerifyNetTestInput(net_test_loader) - - if self.bouncer: - log.msg("Looking up collector and test helpers with {0}".format( - self.bouncer.base_address)) - yield lookup_collector_and_test_helpers(self.netTestLoaders, - self.bouncer, - self.preferred_backend, - self.no_collector) - - @defer.inlineCallbacks - def fetchAndVerifyNetTestInput(self, net_test_loader): - """ fetch and verify a single NetTest's inputs """ - log.debug("Fetching and verifying inputs") - for i in net_test_loader.inputFiles: - if i['url']: - log.debug("Downloading %s" % i['url']) - oonibclient = self._CollectorClient(i['address']) - - try: - input_file = yield oonibclient.downloadInput(i['hash']) - except: - raise e.UnableToLoadDeckInput - - try: - input_file.verify() - except AssertionError: - raise e.UnableToLoadDeckInput - - i['test_options'][i['key']] = input_file.cached_file - - -(a)defer.inlineCallbacks -def lookup_collector_and_test_helpers(net_test_loaders, - bouncer, - preferred_backend, - no_collector=False): - required_nettests = [] - - requires_test_helpers = False - requires_collector = False - for net_test_loader in net_test_loaders: - nettest = { - 'name': net_test_loader.testName, - 'version': net_test_loader.testVersion, - 'test-helpers': [], - # XXX deprecate this very soon - 'input-hashes': [] - } - if not net_test_loader.collector and not no_collector: - requires_collector = True - - if len(net_test_loader.missingTestHelpers) > 0: - requires_test_helpers = True - nettest['test-helpers'] += map(lambda x: x[1], - net_test_loader.missingTestHelpers) - - required_nettests.append(nettest) - - if not requires_test_helpers and not requires_collector: - defer.returnValue(None) - - response = yield bouncer.lookupTestCollector(required_nettests) - try: - provided_net_tests = yield get_reachable_test_helpers_and_collectors( - response['net-tests'], preferred_backend) - except e.NoReachableCollectors: - log.err("Could not find any reachable collector") - raise - except e.NoReachableTestHelpers: - log.err("Could not find any reachable test helpers") - raise - - def find_collector_and_test_helpers(test_name, test_version): - # input_files = [u""+x['hash'] for x in input_files] - for net_test in provided_net_tests: - if net_test['name'] != test_name: - continue - if net_test['version'] != test_version: - continue - # XXX remove the notion of policies based on input file hashes - # if set(net_test['input-hashes']) != set(input_files): - # continue - return net_test['collector'], net_test['test-helpers'] - - for net_test_loader in net_test_loaders: - log.msg("Setting collector and test helpers for %s" % - net_test_loader.testName) - - collector, test_helpers = \ - find_collector_and_test_helpers( - test_name=net_test_loader.testName, - test_version=net_test_loader.testVersion - # input_files=net_test_loader.inputFiles - ) - - for option, name in net_test_loader.missingTestHelpers: - test_helper_address_or_settings = test_helpers[name] - net_test_loader.localOptions[option] = test_helper_address_or_settings - net_test_loader.testHelpers[option] = test_helper_address_or_settings - - if not net_test_loader.collector and not no_collector: - log.debug("Using collector {0}".format(collector)) - net_test_loader.collector = collector - -(a)defer.inlineCallbacks -def get_reachable_test_helpers_and_collectors(net_tests, preferred_backend): - for net_test in net_tests: - primary_address = net_test['collector'] - alternate_addresses = net_test.get('collector-alternate', []) - net_test['collector'] = yield get_reachable_collector( - primary_address, alternate_addresses, preferred_backend) - - for test_helper_name, test_helper_address in net_test['test-helpers'].items(): - test_helper_alternate = \ - net_test.get('test-helpers-alternate', {}).get(test_helper_name, []) - net_test['test-helpers'][test_helper_name] = \ - yield get_reachable_test_helper(test_helper_name, - test_helper_address, - test_helper_alternate, - preferred_backend) - - defer.returnValue(net_tests) - -(a)defer.inlineCallbacks -def get_reachable_collector(collector_address, collector_alternate, - preferred_backend): - # We prefer onion collector to https collector to cloudfront - # collectors to plaintext collectors - for collector_settings in sort_addresses_by_priority( - collector_address, - collector_alternate, - preferred_backend): - collector = CollectorClient(settings=collector_settings) - if not collector.isSupported(): - log.err("Unsupported %s collector %s" % ( - collector_settings['type'], - collector_settings['address'])) - continue - reachable = yield collector.isReachable() - if not reachable: - log.err("Unreachable %s collector %s" % ( - collector_settings['type'], - collector_settings['address'])) - continue - defer.returnValue(collector) - - raise e.NoReachableCollectors - - -(a)defer.inlineCallbacks -def get_reachable_test_helper(test_helper_name, test_helper_address, - test_helper_alternate, preferred_backend): - # For the moment we look for alternate addresses only of - # web_connectivity test helpers. - if test_helper_name == 'web-connectivity': - for web_connectivity_settings in sort_addresses_by_priority( - test_helper_address, test_helper_alternate, - preferred_backend): - web_connectivity_test_helper = WebConnectivityClient( - settings=web_connectivity_settings) - if not web_connectivity_test_helper.isSupported(): - log.err("Unsupported %s web_connectivity test_helper " - "%s" % ( - web_connectivity_settings['type'], - web_connectivity_settings['address'] - )) - continue - reachable = yield web_connectivity_test_helper.isReachable() - if not reachable: - log.err("Unreachable %s web_connectivity test helper %s" % ( - web_connectivity_settings['type'], - web_connectivity_settings['address'] - )) - continue - defer.returnValue(web_connectivity_settings) - raise e.NoReachableTestHelpers - else: - defer.returnValue(test_helper_address.encode('ascii')) - -def sort_addresses_by_priority(priority_address, - alternate_addresses, - preferred_backend): - prioritised_addresses = [] - - backend_type = guess_backend_type(priority_address) - priority_address = { - 'address': priority_address, - 'type': backend_type - } - address_priority = ['onion', 'https', 'cloudfront', 'http'] - address_priority.remove(preferred_backend) - address_priority.insert(0, preferred_backend) - - def filter_by_type(collectors, collector_type): - return filter(lambda x: x['type'] == collector_type, collectors) - - if (priority_address['type'] != preferred_backend): - valid_alternatives = filter_by_type(alternate_addresses, - preferred_backend) - if len(valid_alternatives) > 0: - alternate_addresses += [priority_address] - priority_address = valid_alternatives[0] - alternate_addresses.remove(priority_address) - - prioritised_addresses += [priority_address] - for address_type in address_priority: - prioritised_addresses += filter_by_type(alternate_addresses, - address_type) - - return prioritised_addresses - - -class InputNotFound(Exception): - pass - - -class InputStore(object): - def __init__(self): - self.path = FilePath(config.inputs_directory) - self.resources = FilePath(config.resources_directory) - self._cache_stale = True - self._cache = {} - - @defer.inlineCallbacks - def update_url_lists(self, country_code): - countries = ["global"] - if country_code != "ZZ": - countries.append(country_code) - - for cc in countries: - in_file = self.resources.child("citizenlab-test-lists").child("{0}.csv".format(cc)) - if not in_file.exists(): - yield check_for_update(country_code) - - if not in_file.exists(): - continue - - # XXX maybe move this to some utility function. - # It's duplicated in oonideckgen. - data_fname = "citizenlab-test-lists_{0}.txt".format(cc) - desc_fname = "citizenlab-test-lists_{0}.desc".format(cc) - - out_file = self.path.child("data").child(data_fname) - out_fh = out_file.open('w') - with in_file.open('r') as in_fh: - csvreader = csv.reader(in_fh) - csvreader.next() - for row in csvreader: - out_fh.write("%s\n" % row[0]) - out_fh.close() - - desc_file = self.path.child("descriptors").child(desc_fname) - with desc_file.open('w') as out_fh: - if cc == "global": - name = "List of globally accessed websites" - else: - # XXX resolve this to a human readable country name - country_name = cc - name = "List of websites for {0}".format(country_name) - json.dump({ - "name": name, - "filepath": out_file.path, - "last_updated": timestampNowISO8601UTC(), - "id": "citizenlab_{0}_urls".format(cc), - "type": "file/url" - }, out_fh) - self._cache_stale = True - - @defer.inlineCallbacks - def create(self, country_code=None): - # XXX This is a hax to avoid race conditions in testing because this - # object is a singleton and config can have a custom home directory - # passed at runtime. - self.path = FilePath(config.inputs_directory) - self.resources = FilePath(config.resources_directory) - - self.path.child("descriptors").makedirs(ignoreExistingDirectory=True) - self.path.child("data").makedirs(ignoreExistingDirectory=True) - yield self.update_url_lists(country_code) - - @defer.inlineCallbacks - def update(self, country_code=None): - # XXX why do we make a difference between create and update? - yield self.create(country_code) - - def _update_cache(self): - descs = self.path.child("descriptors") - if not descs.exists(): - self._cache = {} - return - - for fn in descs.listdir(): - with descs.child(fn).open("r") as in_fh: - input_desc = json.load(in_fh) - self._cache[input_desc.pop("id")] = input_desc - self._cache_stale = False - return - - def list(self): - if self._cache_stale: - self._update_cache() - return deepcopy(self._cache) - - def get(self, input_id): - if self._cache_stale: - self._update_cache() - try: - input_desc = self._cache[input_id] - except KeyError: - raise InputNotFound(input_id) - return input_desc - - def getContent(self, input_id): - input_desc = self.get(input_id) - with open(input_desc["filepath"]) as fh: - return fh.read() - -class DeckStore(object): - def __init__(self): - self.path = FilePath(config.decks_directory) - - def update(self): - pass - - def get(self): - pass - -def resolve_file_path(v, prepath=None): - if v.startswith("$"): - # This raises InputNotFound and we let it carry onto the caller - return input_store.get(v[1:])["filepath"] - elif prepath is not None and (not os.path.isabs(v)): - return FilePath(prepath).preauthChild(v).path - return v - -def options_to_args(options): - args = [] - for k, v in options.items(): - if v is None: - continue - if v == False or v == 0: - continue - if (len(k)) == 1: - args.append('-'+k) - else: - args.append('--'+k) - if isinstance(v, bool) or isinstance(v, int): - continue - args.append(v) - return args - -def normalize_options(options): - """ - Takes some options that have a mixture of - and _ and returns the - equivalent options with only '_'. - """ - normalized_opts = {} - for k, v in options.items(): - normalized_key = k.replace('-', '_') - assert normalized_key not in normalized_opts, "The key {0} cannot be normalized".format(k) - normalized_opts[normalized_key] = v - return normalized_opts - -class UnknownTaskKey(Exception): - pass - -class MissingTaskDataKey(Exception): - pass - -class DeckTask(object): - _metadata_keys = ["name"] - _supported_tasks = ["ooni"] - - def __init__(self, data, - parent_metadata={}, - global_options={}, - cwd=None, - arbitrary_paths=False): - - self.parent_metadata = normalize_options(parent_metadata) - self.global_options = global_options - self.cwd = cwd - self.data = deepcopy(data) - - self._skip = False - - self.id = "" - - self.type = None - self.metadata = {} - self.requires_tor = False - self.requires_bouncer = False - - # If this is set to true a deck can specify any path. It should only - # be run against trusted decks or when you create a deck - # programmaticaly to a run test specified from the command line. - self._arbitrary_paths = arbitrary_paths - - self.ooni = { - 'bouncer_client': None, - 'test_details': {} - } - self.output_path = None - - self._load(data) - - def _get_option(self, name, task_data, default=None): - try: - return self.global_options[name] - except KeyError: - return task_data.pop(name, - self.parent_metadata.get(name, default)) - - def _load_ooni(self, task_data): - required_keys = ["test_name"] - for required_key in required_keys: - if required_key not in task_data: - raise MissingTaskDataKey(required_key) - - # This raises e.NetTestNotFound, we let it go onto the caller - nettest_path = nettest_to_path(task_data.pop("test_name"), - self._arbitrary_paths) - - annotations = self._get_option('annotations', task_data, {}) - collector_address = self._get_option('collector', task_data, None) - - try: - self.output_path = self.global_options['reportfile'] - except KeyError: - self.output_path = task_data.pop('reportfile', None) - - if task_data.get('no-collector', False): - collector_address = None - - net_test_loader = NetTestLoader( - options_to_args(task_data), - annotations=annotations, - test_file=nettest_path - ) - - if isinstance(collector_address, dict): - net_test_loader.collector = CollectorClient( - settings=collector_address - ) - elif collector_address is not None: - net_test_loader.collector = CollectorClient( - collector_address - ) - - if (net_test_loader.collector is not None and - net_test_loader.collector.backend_type == "onion"): - self.requires_tor = True - - try: - net_test_loader.checkOptions() - if net_test_loader.requiresTor: - self.requires_tor = True - except e.MissingTestHelper: - self.requires_bouncer = True - - self.ooni['net_test_loader'] = net_test_loader - - @defer.inlineCallbacks - def _setup_ooni(self): - yield probe_ip.lookup() - for input_file in self.ooni['net_test_loader'].inputFiles: - file_path = resolve_file_path(input_file['filename'], self.cwd) - input_file['test_options'][input_file['key']] = file_path - self.ooni['test_details'] = self.ooni['net_test_loader'].getTestDetails() - self.id = generate_filename(self.ooni['test_details']) - - def setup(self): - return getattr(self, "_setup_"+self.type)() - - def _load(self, data): - for key in self._metadata_keys: - try: - self.metadata[key] = data.pop(key) - except KeyError: - continue - - task_type, task_data = data.popitem() - if task_type not in self._supported_tasks: - raise UnknownTaskKey(task_type) - self.type = task_type - getattr(self, "_load_"+task_type)(task_data) - assert len(data) == 0, "Got an unidentified key" - -class NotAnOption(Exception): - pass - -def subargs_to_options(subargs): - options = {} - - def parse_option_name(arg): - if arg.startswith("--"): - return arg[2:] - elif arg.startswith("-"): - return arg[1:] - raise NotAnOption - - subargs = iter(reversed(subargs)) - for subarg in subargs: - try: - value = subarg - name = parse_option_name(subarg) - options[name] = True - except NotAnOption: - try: - name = parse_option_name(subargs.next()) - options[name] = value - except StopIteration: - break - - return options - -def convert_legacy_deck(deck_data): - """ - I take a legacy deck list and convert it to the new deck format. - - :param deck_data: in the legacy format - :return: deck_data in the new format - """ - assert isinstance(deck_data, list), "Legacy decks are lists" - new_deck_data = {} - new_deck_data["name"] = "Legacy deck" - new_deck_data["description"] = "This is a legacy deck converted to the " \ - "new format" - new_deck_data["bouncer"] = None - new_deck_data["tasks"] = [] - for deck_item in deck_data: - deck_task = {"ooni": {}} - - options = deck_item["options"] - deck_task["ooni"]["test_name"] = options.pop("test_file") - deck_task["ooni"]["annotations"] = options.pop("annotations", {}) - deck_task["ooni"]["collector"] = options.pop("collector", None) - - # XXX here we end up picking only the last not none bouncer_address - bouncer_address = options.pop("bouncer", None) - if bouncer_address is not None: - new_deck_data["bouncer"] = bouncer_address - - subargs = options.pop("subargs", []) - for name, value in subargs_to_options(subargs).items(): - deck_task["ooni"][name] = value - - for name, value in options.items(): - deck_task["ooni"][name] = value - - new_deck_data["tasks"].append(deck_task) - - return new_deck_data - -class NGDeck(object): - def __init__(self, - deck_data=None, - deck_path=None, - global_options={}, - no_collector=False, - arbitrary_paths=False): - # Used to resolve relative paths inside of decks. - self.deck_directory = os.getcwd() - self.requires_tor = False - self.no_collector = no_collector - self.name = "" - self.description = "" - self.schedule = None - - self.metadata = {} - self.global_options = normalize_options(global_options) - self.bouncer = None - - self._arbitrary_paths = arbitrary_paths - self._is_setup = False - - self._measurement_path = FilePath(config.measurements_directory) - self._tasks = [] - self.task_ids = [] - - if deck_path is not None: - self.open(deck_path) - elif deck_data is not None: - self.load(deck_data) - - def open(self, deck_path, global_options=None): - with open(deck_path) as fh: - deck_data = yaml.safe_load(fh) - self.deck_directory = os.path.abspath(os.path.dirname(deck_path)) - self.load(deck_data, global_options) - - def load(self, deck_data, global_options=None): - if global_options is not None: - self.global_options = normalize_options(global_options) - - if isinstance(deck_data, list): - deck_data = convert_legacy_deck(deck_data) - - self.name = deck_data.pop("name", "Un-named Deck") - self.description = deck_data.pop("description", "No description") - - bouncer_address = self.global_options.get('bouncer', - deck_data.pop("bouncer", None)) - if bouncer_address is None: - self.bouncer = get_preferred_bouncer() - elif isinstance(bouncer_address, dict): - self.bouncer = BouncerClient(settings=bouncer_address) - else: - self.bouncer = BouncerClient(bouncer_address) - - self.schedule = deck_data.pop("schedule", None) - - tasks_data = deck_data.pop("tasks", []) - for key, metadata in deck_data.items(): - self.metadata[key] = metadata - - # We override the task metadata with the global options if present - self.metadata.update(self.global_options) - - for task_data in tasks_data: - deck_task = DeckTask( - data=task_data, - parent_metadata=self.metadata, - global_options=self.global_options, - cwd=self.deck_directory, - arbitrary_paths=self._arbitrary_paths - ) - if deck_task.requires_tor: - self.requires_tor = True - if (deck_task.requires_bouncer and - self.bouncer.backend_type == "onion"): - self.requires_tor = True - self._tasks.append(deck_task) - self.task_ids.append(deck_task.id) - - if self.metadata.get('no_collector', False): - self.no_collector = True - - @property - def tasks(self): - return self._tasks - - def write(self, fh): - """ - Writes a properly formatted deck to the supplied file handle. - :param fh: an open file handle - :return: - """ - deck_data = { - "name": self.name, - "description": self.description, - "tasks": [task.data for task in self._tasks] - } - if self.schedule is not None: - deck_data["schedule"] = self.schedule - for key, value in self.metadata.items(): - deck_data[key] = value - - fh.write("---\n") - yaml.safe_dump(deck_data, fh, default_flow_style=False) - - @defer.inlineCallbacks - def query_bouncer(self): - preferred_backend = config.advanced.get( - "preferred_backend", "onion" - ) - log.msg("Looking up collector and test helpers with {0}".format( - self.bouncer.base_address) - ) - net_test_loaders = [] - for task in self._tasks: - if task.type == "ooni": - net_test_loaders.append(task.ooni["net_test_loader"]) - - yield lookup_collector_and_test_helpers( - net_test_loaders, - self.bouncer, - preferred_backend, - self.no_collector - ) - defer.returnValue(net_test_loaders) - - def _measurement_completed(self, result, task): - if not task.output_path: - measurement_id = task.id - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.child("measurements.njson.progress").moveTo( - measurement_dir.child("measurements.njson") - ) - generate_summary( - measurement_dir.child("measurements.njson").path, - measurement_dir.child("summary.json").path - ) - measurement_dir.child("running.pid").remove() - - def _measurement_failed(self, failure, task): - if not task.output_path: - # XXX do we also want to delete measurements.njson.progress? - measurement_id = task.id - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.child("running.pid").remove() - return failure - - def _run_ooni_task(self, task, director): - net_test_loader = task.ooni["net_test_loader"] - test_details = task.ooni["test_details"] - - report_filename = task.output_path - if not task.output_path: - measurement_id = task.id - - measurement_dir = self._measurement_path.child(measurement_id) - measurement_dir.createDirectory() - - report_filename = measurement_dir.child("measurements.njson.progress").path - pid_file = measurement_dir.child("running.pid") - - with pid_file.open('w') as out_file: - out_file.write("{0}".format(os.getpid())) - - d = director.start_net_test_loader( - net_test_loader, - report_filename, - collector_client=net_test_loader.collector, - test_details=test_details - ) - d.addCallback(self._measurement_completed, task) - d.addErrback(self._measurement_failed, task) - return d - - @defer.inlineCallbacks - def setup(self): - """ - This method needs to be called before you are able to run a deck. - """ - for task in self._tasks: - yield task.setup() - self._is_setup = True - - @defer.inlineCallbacks - def run(self, director): - assert self._is_setup, "You must call setup() before you can run a " \ - "deck" - if self.requires_tor: - yield director.start_tor() - yield self.query_bouncer() - for task in self._tasks: - if task._skip is True: - log.msg("Skipping running {0}".format(task.name)) - continue - if task.type == "ooni": - yield self._run_ooni_task(task, director) - self._is_setup = False - -input_store = InputStore() diff --git a/ooni/deck/__init__.py b/ooni/deck/__init__.py new file mode 100644 index 0000000..3b48d52 --- /dev/null +++ b/ooni/deck/__init__.py @@ -0,0 +1 @@ +from .deck import NGDeck diff --git a/ooni/deck/backend.py b/ooni/deck/backend.py new file mode 100644 index 0000000..b2df9bc --- /dev/null +++ b/ooni/deck/backend.py @@ -0,0 +1,191 @@ +from twisted.internet import defer + +from ooni import errors as e +from ooni.backend_client import guess_backend_type, WebConnectivityClient, \ + CollectorClient +from ooni.utils import log + + +def sort_addresses_by_priority(priority_address, + alternate_addresses, + preferred_backend): + prioritised_addresses = [] + + backend_type = guess_backend_type(priority_address) + priority_address = { + 'address': priority_address, + 'type': backend_type + } + address_priority = ['onion', 'https', 'cloudfront', 'http'] + address_priority.remove(preferred_backend) + address_priority.insert(0, preferred_backend) + + def filter_by_type(collectors, collector_type): + return filter(lambda x: x['type'] == collector_type, collectors) + + if (priority_address['type'] != preferred_backend): + valid_alternatives = filter_by_type(alternate_addresses, + preferred_backend) + if len(valid_alternatives) > 0: + alternate_addresses += [priority_address] + priority_address = valid_alternatives[0] + alternate_addresses.remove(priority_address) + + prioritised_addresses += [priority_address] + for address_type in address_priority: + prioritised_addresses += filter_by_type(alternate_addresses, + address_type) + + return prioritised_addresses + + +(a)defer.inlineCallbacks +def get_reachable_test_helper(test_helper_name, test_helper_address, + test_helper_alternate, preferred_backend): + # For the moment we look for alternate addresses only of + # web_connectivity test helpers. + if test_helper_name == 'web-connectivity': + for web_connectivity_settings in sort_addresses_by_priority( + test_helper_address, test_helper_alternate, + preferred_backend): + web_connectivity_test_helper = WebConnectivityClient( + settings=web_connectivity_settings) + if not web_connectivity_test_helper.isSupported(): + log.err("Unsupported %s web_connectivity test_helper " + "%s" % ( + web_connectivity_settings['type'], + web_connectivity_settings['address'] + )) + continue + reachable = yield web_connectivity_test_helper.isReachable() + if not reachable: + log.err("Unreachable %s web_connectivity test helper %s" % ( + web_connectivity_settings['type'], + web_connectivity_settings['address'] + )) + continue + defer.returnValue(web_connectivity_settings) + raise e.NoReachableTestHelpers + else: + defer.returnValue(test_helper_address.encode('ascii')) + + +(a)defer.inlineCallbacks +def get_reachable_collector(collector_address, collector_alternate, + preferred_backend): + # We prefer onion collector to https collector to cloudfront + # collectors to plaintext collectors + for collector_settings in sort_addresses_by_priority( + collector_address, + collector_alternate, + preferred_backend): + collector = CollectorClient(settings=collector_settings) + if not collector.isSupported(): + log.err("Unsupported %s collector %s" % ( + collector_settings['type'], + collector_settings['address'])) + continue + reachable = yield collector.isReachable() + if not reachable: + log.err("Unreachable %s collector %s" % ( + collector_settings['type'], + collector_settings['address'])) + continue + defer.returnValue(collector) + + raise e.NoReachableCollectors + + +(a)defer.inlineCallbacks +def get_reachable_test_helpers_and_collectors(net_tests, preferred_backend): + for net_test in net_tests: + primary_address = net_test['collector'] + alternate_addresses = net_test.get('collector-alternate', []) + net_test['collector'] = yield get_reachable_collector( + primary_address, alternate_addresses, preferred_backend) + + for test_helper_name, test_helper_address in net_test['test-helpers'].items(): + test_helper_alternate = \ + net_test.get('test-helpers-alternate', {}).get(test_helper_name, []) + net_test['test-helpers'][test_helper_name] = \ + yield get_reachable_test_helper(test_helper_name, + test_helper_address, + test_helper_alternate, + preferred_backend) + + defer.returnValue(net_tests) + + +(a)defer.inlineCallbacks +def lookup_collector_and_test_helpers(net_test_loaders, + bouncer, + preferred_backend, + no_collector=False): + required_nettests = [] + + requires_test_helpers = False + requires_collector = False + for net_test_loader in net_test_loaders: + nettest = { + 'name': net_test_loader.testName, + 'version': net_test_loader.testVersion, + 'test-helpers': [], + # XXX deprecate this very soon + 'input-hashes': [] + } + if not net_test_loader.collector and not no_collector: + requires_collector = True + + if len(net_test_loader.missingTestHelpers) > 0: + requires_test_helpers = True + nettest['test-helpers'] += map(lambda x: x[1], + net_test_loader.missingTestHelpers) + + required_nettests.append(nettest) + + if not requires_test_helpers and not requires_collector: + defer.returnValue(None) + + print("Using bouncer %s" % bouncer) + response = yield bouncer.lookupTestCollector(required_nettests) + try: + provided_net_tests = yield get_reachable_test_helpers_and_collectors( + response['net-tests'], preferred_backend) + except e.NoReachableCollectors: + log.err("Could not find any reachable collector") + raise + except e.NoReachableTestHelpers: + log.err("Could not find any reachable test helpers") + raise + + def find_collector_and_test_helpers(test_name, test_version): + # input_files = [u""+x['hash'] for x in input_files] + for net_test in provided_net_tests: + if net_test['name'] != test_name: + continue + if net_test['version'] != test_version: + continue + # XXX remove the notion of policies based on input file hashes + # if set(net_test['input-hashes']) != set(input_files): + # continue + return net_test['collector'], net_test['test-helpers'] + + for net_test_loader in net_test_loaders: + log.msg("Setting collector and test helpers for %s" % + net_test_loader.testName) + + collector, test_helpers = \ + find_collector_and_test_helpers( + test_name=net_test_loader.testName, + test_version=net_test_loader.testVersion + # input_files=net_test_loader.inputFiles + ) + + for option, name in net_test_loader.missingTestHelpers: + test_helper_address_or_settings = test_helpers[name] + net_test_loader.localOptions[option] = test_helper_address_or_settings + net_test_loader.testHelpers[option] = test_helper_address_or_settings + + if not net_test_loader.collector and not no_collector: + log.debug("Using collector {0}".format(collector)) + net_test_loader.collector = collector diff --git a/ooni/deck/deck.py b/ooni/deck/deck.py new file mode 100644 index 0000000..b11e174 --- /dev/null +++ b/ooni/deck/deck.py @@ -0,0 +1,386 @@ +import os +from copy import deepcopy + +import yaml +from twisted.internet import defer +from twisted.python.filepath import FilePath + +from ooni import errors as e +from ooni.backend_client import BouncerClient, CollectorClient +from ooni.backend_client import get_preferred_bouncer +from ooni.deck.backend import lookup_collector_and_test_helpers +from ooni.deck.legacy import convert_legacy_deck +from ooni.deck.store import input_store +from ooni.geoip import probe_ip +from ooni.nettest import NetTestLoader, nettest_to_path +from ooni.results import generate_summary +from ooni.settings import config +from ooni.utils import log, generate_filename + + +def resolve_file_path(v, prepath=None): + if v.startswith("$"): + # This raises InputNotFound and we let it carry onto the caller + return input_store.get(v[1:])["filepath"] + elif prepath is not None and (not os.path.isabs(v)): + return FilePath(prepath).preauthChild(v).path + return v + + +def options_to_args(options): + args = [] + for k, v in options.items(): + if v is None: + continue + if v == False or v == 0: + continue + if (len(k)) == 1: + args.append('-'+k) + else: + args.append('--'+k) + if isinstance(v, bool) or isinstance(v, int): + continue + args.append(v) + return args + + +def normalize_options(options): + """ + Takes some options that have a mixture of - and _ and returns the + equivalent options with only '_'. + """ + normalized_opts = {} + for k, v in options.items(): + normalized_key = k.replace('-', '_') + assert normalized_key not in normalized_opts, "The key {0} cannot be normalized".format(k) + normalized_opts[normalized_key] = v + return normalized_opts + + +class UnknownTaskKey(Exception): + pass + + +class MissingTaskDataKey(Exception): + pass + + +class NGDeck(object): + def __init__(self, + deck_data=None, + deck_path=None, + global_options={}, + no_collector=False, + arbitrary_paths=False): + # Used to resolve relative paths inside of decks. + self.deck_directory = os.getcwd() + self.requires_tor = False + self.no_collector = no_collector + self.name = "" + self.description = "" + self.schedule = None + + self.metadata = {} + self.global_options = normalize_options(global_options) + self.bouncer = None + + self._arbitrary_paths = arbitrary_paths + self._is_setup = False + + self._measurement_path = FilePath(config.measurements_directory) + self._tasks = [] + self.task_ids = [] + + if deck_path is not None: + self.open(deck_path) + elif deck_data is not None: + self.load(deck_data) + + def open(self, deck_path, global_options=None): + with open(deck_path) as fh: + deck_data = yaml.safe_load(fh) + self.deck_directory = os.path.abspath(os.path.dirname(deck_path)) + self.load(deck_data, global_options) + + def load(self, deck_data, global_options=None): + if global_options is not None: + self.global_options = normalize_options(global_options) + + if isinstance(deck_data, list): + deck_data = convert_legacy_deck(deck_data) + + self.name = deck_data.pop("name", "Un-named Deck") + self.description = deck_data.pop("description", "No description") + + bouncer_address = self.global_options.get('bouncer', + deck_data.pop("bouncer", None)) + if bouncer_address is None: + self.bouncer = get_preferred_bouncer() + elif isinstance(bouncer_address, dict): + self.bouncer = BouncerClient(settings=bouncer_address) + else: + self.bouncer = BouncerClient(bouncer_address) + + self.schedule = deck_data.pop("schedule", None) + + tasks_data = deck_data.pop("tasks", []) + for key, metadata in deck_data.items(): + self.metadata[key] = metadata + + # We override the task metadata with the global options if present + self.metadata.update(self.global_options) + + for task_data in tasks_data: + deck_task = DeckTask( + data=task_data, + parent_metadata=self.metadata, + global_options=self.global_options, + cwd=self.deck_directory, + arbitrary_paths=self._arbitrary_paths + ) + if deck_task.requires_tor: + self.requires_tor = True + if (deck_task.requires_bouncer and + self.bouncer.backend_type == "onion"): + self.requires_tor = True + self._tasks.append(deck_task) + self.task_ids.append(deck_task.id) + + if self.metadata.get('no_collector', False): + self.no_collector = True + + @property + def tasks(self): + return self._tasks + + def write(self, fh): + """ + Writes a properly formatted deck to the supplied file handle. + :param fh: an open file handle + :return: + """ + deck_data = { + "name": self.name, + "description": self.description, + "tasks": [task.data for task in self._tasks] + } + if self.schedule is not None: + deck_data["schedule"] = self.schedule + for key, value in self.metadata.items(): + deck_data[key] = value + + fh.write("---\n") + yaml.safe_dump(deck_data, fh, default_flow_style=False) + + @defer.inlineCallbacks + def query_bouncer(self): + preferred_backend = config.advanced.get( + "preferred_backend", "onion" + ) + log.msg("Looking up collector and test helpers with {0}".format( + self.bouncer.base_address) + ) + net_test_loaders = [] + for task in self._tasks: + if task.type == "ooni": + net_test_loaders.append(task.ooni["net_test_loader"]) + + yield lookup_collector_and_test_helpers( + net_test_loaders, + self.bouncer, + preferred_backend, + self.no_collector + ) + defer.returnValue(net_test_loaders) + + def _measurement_completed(self, result, task): + if not task.output_path: + measurement_id = task.id + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.child("measurements.njson.progress").moveTo( + measurement_dir.child("measurements.njson") + ) + generate_summary( + measurement_dir.child("measurements.njson").path, + measurement_dir.child("summary.json").path + ) + measurement_dir.child("running.pid").remove() + + def _measurement_failed(self, failure, task): + if not task.output_path: + # XXX do we also want to delete measurements.njson.progress? + measurement_id = task.id + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.child("running.pid").remove() + return failure + + def _run_ooni_task(self, task, director): + net_test_loader = task.ooni["net_test_loader"] + test_details = task.ooni["test_details"] + + report_filename = task.output_path + if not task.output_path: + measurement_id = task.id + + measurement_dir = self._measurement_path.child(measurement_id) + measurement_dir.createDirectory() + + report_filename = measurement_dir.child("measurements.njson.progress").path + pid_file = measurement_dir.child("running.pid") + + with pid_file.open('w') as out_file: + out_file.write("{0}".format(os.getpid())) + + d = director.start_net_test_loader( + net_test_loader, + report_filename, + collector_client=net_test_loader.collector, + test_details=test_details + ) + d.addCallback(self._measurement_completed, task) + d.addErrback(self._measurement_failed, task) + return d + + @defer.inlineCallbacks + def setup(self): + """ + This method needs to be called before you are able to run a deck. + """ + for task in self._tasks: + yield task.setup() + self._is_setup = True + + @defer.inlineCallbacks + def run(self, director): + assert self._is_setup, "You must call setup() before you can run a " \ + "deck" + if self.requires_tor: + yield director.start_tor() + yield self.query_bouncer() + for task in self._tasks: + if task._skip is True: + log.msg("Skipping running {0}".format(task.name)) + continue + if task.type == "ooni": + yield self._run_ooni_task(task, director) + self._is_setup = False + + +class DeckTask(object): + _metadata_keys = ["name"] + _supported_tasks = ["ooni"] + + def __init__(self, data, + parent_metadata={}, + global_options={}, + cwd=None, + arbitrary_paths=False): + + self.parent_metadata = normalize_options(parent_metadata) + self.global_options = global_options + self.cwd = cwd + self.data = deepcopy(data) + + self._skip = False + + self.id = "" + + self.type = None + self.metadata = {} + self.requires_tor = False + self.requires_bouncer = False + + # If this is set to true a deck can specify any path. It should only + # be run against trusted decks or when you create a deck + # programmaticaly to a run test specified from the command line. + self._arbitrary_paths = arbitrary_paths + + self.ooni = { + 'bouncer_client': None, + 'test_details': {} + } + self.output_path = None + + self._load(data) + + def _get_option(self, name, task_data, default=None): + try: + return self.global_options[name] + except KeyError: + return task_data.pop(name, + self.parent_metadata.get(name, default)) + + def _load_ooni(self, task_data): + required_keys = ["test_name"] + for required_key in required_keys: + if required_key not in task_data: + raise MissingTaskDataKey(required_key) + + # This raises e.NetTestNotFound, we let it go onto the caller + nettest_path = nettest_to_path(task_data.pop("test_name"), + self._arbitrary_paths) + + annotations = self._get_option('annotations', task_data, {}) + collector_address = self._get_option('collector', task_data, None) + + try: + self.output_path = self.global_options['reportfile'] + except KeyError: + self.output_path = task_data.pop('reportfile', None) + + if task_data.get('no-collector', False): + collector_address = None + + net_test_loader = NetTestLoader( + options_to_args(task_data), + annotations=annotations, + test_file=nettest_path + ) + + if isinstance(collector_address, dict): + net_test_loader.collector = CollectorClient( + settings=collector_address + ) + elif collector_address is not None: + net_test_loader.collector = CollectorClient( + collector_address + ) + + if (net_test_loader.collector is not None and + net_test_loader.collector.backend_type == "onion"): + self.requires_tor = True + + try: + net_test_loader.checkOptions() + if net_test_loader.requiresTor: + self.requires_tor = True + except e.MissingTestHelper: + self.requires_bouncer = True + + self.ooni['net_test_loader'] = net_test_loader + + @defer.inlineCallbacks + def _setup_ooni(self): + yield probe_ip.lookup() + for input_file in self.ooni['net_test_loader'].inputFiles: + file_path = resolve_file_path(input_file['filename'], self.cwd) + input_file['test_options'][input_file['key']] = file_path + self.ooni['test_details'] = self.ooni['net_test_loader'].getTestDetails() + self.id = generate_filename(self.ooni['test_details']) + + def setup(self): + return getattr(self, "_setup_"+self.type)() + + def _load(self, data): + for key in self._metadata_keys: + try: + self.metadata[key] = data.pop(key) + except KeyError: + continue + + task_type, task_data = data.popitem() + if task_type not in self._supported_tasks: + raise UnknownTaskKey(task_type) + self.type = task_type + getattr(self, "_load_"+task_type)(task_data) + assert len(data) == 0, "Got an unidentified key" diff --git a/ooni/deck/legacy.py b/ooni/deck/legacy.py new file mode 100644 index 0000000..cf0a30b --- /dev/null +++ b/ooni/deck/legacy.py @@ -0,0 +1,65 @@ +class NotAnOption(Exception): + pass + +def subargs_to_options(subargs): + options = {} + + def parse_option_name(arg): + if arg.startswith("--"): + return arg[2:] + elif arg.startswith("-"): + return arg[1:] + raise NotAnOption + + subargs = iter(reversed(subargs)) + for subarg in subargs: + try: + value = subarg + name = parse_option_name(subarg) + options[name] = True + except NotAnOption: + try: + name = parse_option_name(subargs.next()) + options[name] = value + except StopIteration: + break + + return options + +def convert_legacy_deck(deck_data): + """ + I take a legacy deck list and convert it to the new deck format. + + :param deck_data: in the legacy format + :return: deck_data in the new format + """ + assert isinstance(deck_data, list), "Legacy decks are lists" + new_deck_data = {} + new_deck_data["name"] = "Legacy deck" + new_deck_data["description"] = "This is a legacy deck converted to the " \ + "new format" + new_deck_data["bouncer"] = None + new_deck_data["tasks"] = [] + for deck_item in deck_data: + deck_task = {"ooni": {}} + + options = deck_item["options"] + deck_task["ooni"]["test_name"] = options.pop("test_file") + deck_task["ooni"]["annotations"] = options.pop("annotations", {}) + deck_task["ooni"]["collector"] = options.pop("collector", None) + + # XXX here we end up picking only the last not none bouncer_address + bouncer_address = options.pop("bouncer", None) + if bouncer_address is not None: + new_deck_data["bouncer"] = bouncer_address + + subargs = options.pop("subargs", []) + for name, value in subargs_to_options(subargs).items(): + deck_task["ooni"][name] = value + + for name, value in options.items(): + deck_task["ooni"][name] = value + + new_deck_data["tasks"].append(deck_task) + + return new_deck_data diff --git a/ooni/deck/store.py b/ooni/deck/store.py new file mode 100644 index 0000000..05c0b95 --- /dev/null +++ b/ooni/deck/store.py @@ -0,0 +1,128 @@ +import csv +import json +from copy import deepcopy + +from twisted.internet import defer +from twisted.python.filepath import FilePath + +from ooni.otime import timestampNowISO8601UTC +from ooni.resources import check_for_update +from ooni.settings import config + +class InputNotFound(Exception): + pass + +class InputStore(object): + def __init__(self): + self.path = FilePath(config.inputs_directory) + self.resources = FilePath(config.resources_directory) + self._cache_stale = True + self._cache = {} + + @defer.inlineCallbacks + def update_url_lists(self, country_code): + countries = ["global"] + if country_code != "ZZ": + countries.append(country_code) + + for cc in countries: + in_file = self.resources.child("citizenlab-test-lists").child("{0}.csv".format(cc)) + if not in_file.exists(): + yield check_for_update(country_code) + + if not in_file.exists(): + continue + + # XXX maybe move this to some utility function. + # It's duplicated in oonideckgen. + data_fname = "citizenlab-test-lists_{0}.txt".format(cc) + desc_fname = "citizenlab-test-lists_{0}.desc".format(cc) + + out_file = self.path.child("data").child(data_fname) + out_fh = out_file.open('w') + with in_file.open('r') as in_fh: + csvreader = csv.reader(in_fh) + csvreader.next() + for row in csvreader: + out_fh.write("%s\n" % row[0]) + out_fh.close() + + desc_file = self.path.child("descriptors").child(desc_fname) + with desc_file.open('w') as out_fh: + if cc == "global": + name = "List of globally accessed websites" + else: + # XXX resolve this to a human readable country name + country_name = cc + name = "List of websites for {0}".format(country_name) + json.dump({ + "name": name, + "filepath": out_file.path, + "last_updated": timestampNowISO8601UTC(), + "id": "citizenlab_{0}_urls".format(cc), + "type": "file/url" + }, out_fh) + self._cache_stale = True + + @defer.inlineCallbacks + def create(self, country_code=None): + # XXX This is a hax to avoid race conditions in testing because this + # object is a singleton and config can have a custom home directory + # passed at runtime. + self.path = FilePath(config.inputs_directory) + self.resources = FilePath(config.resources_directory) + + self.path.child("descriptors").makedirs(ignoreExistingDirectory=True) + self.path.child("data").makedirs(ignoreExistingDirectory=True) + yield self.update_url_lists(country_code) + + @defer.inlineCallbacks + def update(self, country_code=None): + # XXX why do we make a difference between create and update? + yield self.create(country_code) + + def _update_cache(self): + descs = self.path.child("descriptors") + if not descs.exists(): + self._cache = {} + return + + for fn in descs.listdir(): + with descs.child(fn).open("r") as in_fh: + input_desc = json.load(in_fh) + self._cache[input_desc.pop("id")] = input_desc + self._cache_stale = False + return + + def list(self): + if self._cache_stale: + self._update_cache() + return deepcopy(self._cache) + + def get(self, input_id): + if self._cache_stale: + self._update_cache() + try: + input_desc = self._cache[input_id] + except KeyError: + raise InputNotFound(input_id) + return input_desc + + def getContent(self, input_id): + input_desc = self.get(input_id) + with open(input_desc["filepath"]) as fh: + return fh.read() + + +class DeckStore(object): + def __init__(self): + self.path = FilePath(config.decks_directory) + + def update(self): + pass + + def get(self): + pass + + +input_store = InputStore() diff --git a/ooni/director.py b/ooni/director.py index f6311ac..e3df907 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -11,7 +11,7 @@ from ooni.utils.net import randomFreePort from ooni.nettest import NetTest, getNetTestInformation from ooni.settings import config from ooni.nettest import normalizeTestName -from ooni.deck import InputStore +from ooni.deck.store import InputStore from ooni.geoip import probe_ip from ooni.agent.scheduler import run_system_tasks diff --git a/ooni/nettest.py b/ooni/nettest.py index 1978c68..4ea3329 100644 --- a/ooni/nettest.py +++ b/ooni/nettest.py @@ -4,10 +4,11 @@ import time import sys from twisted.internet import defer +from twisted.python.filepath import FilePath from twisted.trial.runner import filenameToModule from twisted.python import usage, reflect -from ooni import __version__ as ooniprobe_version +from ooni import __version__ as ooniprobe_version, errors from ooni import otime from ooni.tasks import Measurement from ooni.utils import log, sanitize_options, randomStr @@ -838,3 +839,43 @@ class NetTestCase(object): def __repr__(self): return "<%s inputs=%s>" % (self.__class__, self.inputs) + + +def nettest_to_path(path, allow_arbitrary_paths=False): + """ + Takes as input either a path or a nettest name. + + The nettest name may either be prefixed by the category of the nettest ( + blocking, experimental, manipulation or third_party) or not. + + Args: + + allow_arbitrary_paths: + allow also paths that are not relative to the nettest_directory. + + Returns: + + full path to the nettest file. + """ + if allow_arbitrary_paths and os.path.exists(path): + return path + + test_name = path.rsplit("/", 1)[-1] + test_categories = [ + "blocking", + "experimental", + "manipulation", + "third_party" + ] + nettest_dir = FilePath(config.nettest_directory) + found_path = None + for category in test_categories: + p = nettest_dir.preauthChild(os.path.join(category, test_name) + '.py') + if p.exists(): + if found_path is not None: + raise Exception("Found two tests named %s" % test_name) + found_path = p.path + + if not found_path: + raise e.NetTestNotFound(path) + return found_path diff --git a/ooni/tests/test_deck.py b/ooni/tests/test_deck.py index 7c18647..1bc6097 100644 --- a/ooni/tests/test_deck.py +++ b/ooni/tests/test_deck.py @@ -5,16 +5,17 @@ from copy import deepcopy import yaml -from mock import patch, MagicMock +from mock import patch from twisted.internet import defer from twisted.trial import unittest from hashlib import sha256 from ooni import errors -from ooni.deck import input_store, lookup_collector_and_test_helpers -from ooni.deck import nettest_to_path, NGDeck -from ooni.deck import convert_legacy_deck +from ooni.deck.store import input_store +from ooni.deck.backend import lookup_collector_and_test_helpers +from ooni.deck.deck import nettest_to_path, NGDeck +from ooni.deck.legacy import convert_legacy_deck from ooni.tests.bases import ConfigTestCase from ooni.tests.mocks import MockBouncerClient, MockCollectorClient @@ -123,12 +124,12 @@ class TestDeck(BaseTestCase, ConfigTestCase): global_options['collector'].replace("httpo://", "http://") ) - @patch('ooni.deck.BouncerClient', MockBouncerClient) - @patch('ooni.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.backend.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector(self): deck = NGDeck() - deck.bouncer = MockBouncerClient(FAKE_BOUNCER_ADDRESS) deck.open(self.deck_file) self.assertEqual( @@ -139,7 +140,7 @@ class TestDeck(BaseTestCase, ConfigTestCase): yield lookup_collector_and_test_helpers( net_test_loaders=[deck.tasks[0].ooni['net_test_loader']], preferred_backend='onion', - bouncer=deck.bouncer + bouncer=MockBouncerClient() ) self.assertEqual( @@ -179,8 +180,9 @@ class TestDeck(BaseTestCase, ConfigTestCase): nettest_to_path, "invalid_test") - @patch('ooni.deck.BouncerClient', MockBouncerClient) - @patch('ooni.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.backend.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector_cloudfront(self): self.config.advanced.preferred_backend = "cloudfront" @@ -194,7 +196,7 @@ class TestDeck(BaseTestCase, ConfigTestCase): yield lookup_collector_and_test_helpers( net_test_loaders=net_test_loaders , preferred_backend='cloudfront', - bouncer=deck.bouncer + bouncer=MockBouncerClient() ) self.assertEqual( @@ -211,8 +213,9 @@ class TestDeck(BaseTestCase, ConfigTestCase): '127.0.0.1' ) - @patch('ooni.deck.BouncerClient', MockBouncerClient) - @patch('ooni.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.deck.BouncerClient', MockBouncerClient) + @patch('ooni.deck.deck.CollectorClient', MockCollectorClient) + @patch('ooni.deck.backend.CollectorClient', MockCollectorClient) @defer.inlineCallbacks def test_lookup_test_helpers_and_collector_https(self): self.config.advanced.preferred_backend = "https" @@ -228,7 +231,7 @@ class TestDeck(BaseTestCase, ConfigTestCase): yield lookup_collector_and_test_helpers( net_test_loaders=net_test_loaders, preferred_backend='https', - bouncer=deck.bouncer + bouncer=MockBouncerClient() ) self.assertEqual( diff --git a/ooni/tests/test_txscapy.py b/ooni/tests/test_txscapy.py index 0332fcf..613bbb5 100644 --- a/ooni/tests/test_txscapy.py +++ b/ooni/tests/test_txscapy.py @@ -18,9 +18,6 @@ class TestTxScapy(unittest.TestCase): def tearDown(self): self.scapy_factory.connectionLost(None) - def test_pcapdnet_installed(self): - assert txscapy.pcapdnet_installed() is True - def test_send_packet_no_answer(self): from scapy.all import IP, TCP diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index 2f5c467..2b5d844 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -235,7 +235,8 @@ def setupCollector(global_options, collector_client): return collector_client def createDeck(global_options, url=None): - from ooni.deck import NGDeck, subargs_to_options + from ooni.deck import NGDeck + from ooni.deck.legacy import subargs_to_options if url: log.msg("Creating deck for: %s" % (url)) diff --git a/ooni/utils/txscapy.py b/ooni/utils/txscapy.py index 27b68bd..278023b 100644 --- a/ooni/utils/txscapy.py +++ b/ooni/utils/txscapy.py @@ -4,6 +4,7 @@ import random from twisted.internet import fdesc from twisted.internet import reactor from twisted.internet import defer, abstract + from scapy.config import conf from scapy.all import RandShort, IP, IPerror, ICMP, ICMPerror, TCP, TCPerror, UDP, UDPerror @@ -15,58 +16,45 @@ from ooni.utils.net import getDefaultIface, getAddresses from ooni.settings import config -def pcapdnet_installed(): - """ - Checks to see if libdnet or libpcap are installed and set the according - variables. - - Returns: - - True - if pypcap and libdnet are installed - - False - if one of the two is absent - """ - # In debian libdnet is called dumbnet instead of dnet, but scapy is - # expecting "dnet" so we try and import it under such name. - try: - import dumbnet +# Check to see if libdnet or libpcap are installed and set the according +# variables. - sys.modules['dnet'] = dumbnet - except ImportError: - pass +# In debian libdnet is called dumbnet instead of dnet, but scapy is +# expecting "dnet" so we try and import it under such name. +try: + import dumbnet - try: - conf.use_pcap = True - conf.use_dnet = True - from scapy.arch import pcapdnet + sys.modules['dnet'] = dumbnet +except ImportError: + pass - config.pcap_dnet = True +try: + conf.use_pcap = True + conf.use_dnet = True + from scapy.arch import pcapdnet - except ImportError as e: - log.err(e.message + ". Pypcap or dnet are not properly installed. Certain tests may not work.") - config.pcap_dnet = False - conf.use_pcap = False - conf.use_dnet = False + config.pcap_dnet = True - # This is required for unix systems that are different than linux (OSX for - # example) since scapy explicitly wants pcap and libdnet installed for it - # to work. - try: - from scapy.arch import pcapdnet - except ImportError: - log.err("Your platform requires having libdnet and libpcap installed.") - raise LibraryNotInstalledError +except ImportError as e: + log.err(e.message + ". Pypcap or dnet are not properly installed. Certain tests may not work.") + config.pcap_dnet = False + conf.use_pcap = False + conf.use_dnet = False - return config.pcap_dnet +# This is required for unix systems that are different than linux (OSX for +# example) since scapy explicitly wants pcap and libdnet installed for it +# to work. +try: + from scapy.arch import pcapdnet +except ImportError: + log.err("Your platform requires having libdnet and libpcap installed.") + raise LibraryNotInstalledError +_PCAP_DNET_INSTALLED = config.pcap_dnet -if pcapdnet_installed(): +if _PCAP_DNET_INSTALLED: from scapy.all import PcapWriter - else: - class DummyPcapWriter: def __init__(self, pcap_filename, *arg, **kw): log.err("Initializing DummyPcapWriter. We will not actually write to a pcapfile") @@ -79,7 +67,6 @@ else: from scapy.all import Gen, SetGen, MTU - class ScapyFactory(abstract.FileDescriptor): """ Inspired by muxTCP scapyLink: [View Less]

1 0