commit cee9d833edca688429e5323426a1b9ef43050d46 Author: Arturo Filastò arturo@filasto.net Date: Fri Jul 22 19:49:41 2016 +0200
Start outlining update mechanism for inputs
* Expose only simple options in web UI --- ooni/deck.py | 109 +++++++++++++++++++++-- ooni/director.py | 5 ++ ooni/nettest.py | 34 +++++--- ooni/nettests/blocking/web_connectivity.py | 6 ++ ooni/otime.py | 7 ++ ooni/resources/__init__.py | 19 +++- ooni/resources/update.py | 134 ++++++++++++++++++++++++++++- ooni/settings.py | 2 +- ooni/ui/cli.py | 1 - ooni/ui/web/client/index.html | 2 +- 10 files changed, 294 insertions(+), 25 deletions(-)
diff --git a/ooni/deck.py b/ooni/deck.py index 1e1d580..9f17530 100644 --- a/ooni/deck.py +++ b/ooni/deck.py @@ -1,21 +1,27 @@ # -*- coding: utf-8 -*- +import csv +import os +import yaml +import json
+from hashlib import sha256 +from datetime import datetime from ooni.backend_client import CollectorClient, BouncerClient from ooni.backend_client import WebConnectivityClient, guess_backend_type from ooni.nettest import NetTestLoader from ooni.settings import config -from ooni.utils import log, onion + +from ooni.otime import timestampNowISO8601UTC + +from ooni.resources.update import check_for_update + +from ooni.utils import log from ooni import constants from ooni import errors as e
from twisted.python.filepath import FilePath from twisted.internet import defer
-import os -import yaml -import json -from hashlib import sha256 -
class InputFile(object): def __init__(self, input_hash, base_path=config.inputs_directory): @@ -411,3 +417,94 @@ class Deck(InputFile): raise e.UnableToLoadDeckInput
i['test_options'][i['key']] = input_file.cached_file + + +class InputStore(object): + def __init__(self): + self.path = FilePath(config.inputs_directory) + self.resources = FilePath(config.resources_directory) + + @defer.inlineCallbacks + def update_url_lists(self, country_code): + countries = ["global"] + if country_code == "ZZ": + country_code = None + else: + countries.append(country_code) + + for cc in countries: + in_file = self.resources.child("citizenlab-test-lists").child("{0}.csv".format(cc)) + if not in_file.exists(): + yield check_for_update(country_code) + + if not in_file.exists(): + continue + + # XXX maybe move this to some utility function. + # It's duplicated in oonideckgen. + data_fname = "citizenlab-test-lists_{0}.txt".format(cc) + desc_fname = "citizenlab-test-lists_{0}.desc".format(cc) + + out_file = self.path.child("data").child(data_fname) + out_fh = out_file.open('w') + with in_file.open('r') as in_fh: + csvreader = csv.reader(in_fh) + csvreader.next() + for row in csvreader: + out_fh.write("%s\n" % row[0]) + out_fh.close() + + desc_file = self.path.child("descriptors").child(desc_fname) + with desc_file.open('w') as out_fh: + if cc == "global": + name = "List of globally accessed websites" + else: + # XXX resolve this to a human readable country name + country_name = cc + name = "List of websites for {0}".format(country_name) + json.dump({ + "name": name, + "filepath": out_file.path, + "last_updated": timestampNowISO8601UTC(), + "id": "citizenlab_test_lists_{0}_txt".format(cc), + "type": "file/url" + }, out_fh) + + @defer.inlineCallbacks + def create(self, country_code=None): + self.path.child("descriptors").makedirs(ignoreExistingDirectory=True) + self.path.child("data").makedirs(ignoreExistingDirectory=True) + yield self.update_url_lists(country_code) + + @defer.inlineCallbacks + def update(self, country_code=None): + yield self.update_url_lists(country_code) + + def list(self): + inputs = [] + descs = self.path.child("descriptors") + if not descs.exists(): + return inputs + + for fn in descs.listdir(): + with descs.child(fn).open("r") as in_fh: + inputs.append(json.load(in_fh)) + return inputs + +class DeckStore(object): + def __init__(self): + self.path = FilePath(config.decks_directory) + + def update(self): + pass + + def get(self): + pass + +class NGInput(object): + def __init__(self, input_name): + pass + +class NGDeck(object): + def __init__(self, deck_path): + pass diff --git a/ooni/director.py b/ooni/director.py index 9897ffd..d39a11b 100644 --- a/ooni/director.py +++ b/ooni/director.py @@ -9,6 +9,7 @@ from ooni.nettest import NetTest, getNetTestInformation from ooni.settings import config from ooni import errors from ooni.nettest import normalizeTestName +from ooni.deck import InputStore
from ooni.utils.onion import start_tor, connect_to_control_port
@@ -92,6 +93,8 @@ class Director(object): self.allTestsDone = defer.Deferred() self.sniffers = {}
+ self.input_store = InputStore() + def getNetTests(self): nettests = {}
@@ -144,6 +147,8 @@ class Director(object): else: yield config.probe_ip.lookup()
+ yield self.input_store.create(config.probe_ip.geodata["countrycode"]) + @property def measurementSuccessRatio(self): if self.totalMeasurements == 0: diff --git a/ooni/nettest.py b/ooni/nettest.py index 09b0202..d01cf7b 100644 --- a/ooni/nettest.py +++ b/ooni/nettest.py @@ -67,10 +67,12 @@ def getOption(opt_parameter, required_options, type='text'): else: required = False
- return {'description': description, - 'value': default, 'required': required, - 'type': type - } + return { + 'description': description, + 'value': default, + 'required': required, + 'type': type + }
def getArguments(test_class): @@ -119,13 +121,15 @@ def getNetTestInformation(net_test_file): test_class = getTestClassFromFile(net_test_file)
test_id = os.path.basename(net_test_file).replace('.py', '') - information = {'id': test_id, - 'name': test_class.name, - 'description': test_class.description, - 'version': test_class.version, - 'arguments': getArguments(test_class), - 'path': net_test_file, - } + information = { + 'id': test_id, + 'name': test_class.name, + 'description': test_class.description, + 'version': test_class.version, + 'arguments': getArguments(test_class), + 'simple_options': test_class.simpleOptions, + 'path': net_test_file + } return information
@@ -454,7 +458,11 @@ class NetTestState(object): (self.doneTasks, self.tasks)) if self.completedScheduling and \ self.doneTasks == self.tasks: - self.allTasksDone.callback(self.doneTasks) + if self.allTasksDone.called: + log.err("allTasksDone was already called. This is probably a bug.") + else: + self.allTasksDone.callback(self.doneTasks) +
def taskDone(self): """ @@ -706,6 +714,8 @@ class NetTestCase(object): requiresRoot = False requiresTor = False
+ simpleOptions = {} + localOptions = {}
@classmethod diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py index b7deffe..dde6b6f 100644 --- a/ooni/nettests/blocking/web_connectivity.py +++ b/ooni/nettests/blocking/web_connectivity.py @@ -77,6 +77,12 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): requiresTor = False followRedirects = True
+ # These are the options to be shown on the GUI + simpleOptions = [ + {"name": "url", "type": "text"}, + {"name": "file", "type": "file/url"} + ] + # Factor used to determine HTTP blockpage detection # the factor 0.7 comes from http://www3.cs.stonybrook.edu/~phillipa/papers/JLFG14.pdf factor = 0.7 diff --git a/ooni/otime.py b/ooni/otime.py index ffa95d0..804d558 100644 --- a/ooni/otime.py +++ b/ooni/otime.py @@ -18,3 +18,10 @@ def timestampNowLongUTC(): Coordinates. """ return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + +def timestampNowISO8601UTC(): + """ + Returns a timestamp in the format of %Y-%m-%d %H:%M:%S in Universal Time + Coordinates. + """ + return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") diff --git a/ooni/resources/__init__.py b/ooni/resources/__init__.py index cc1da79..6550887 100644 --- a/ooni/resources/__init__.py +++ b/ooni/resources/__init__.py @@ -1,7 +1,22 @@ -from ooni import __resources_version__ as resources_version +import json + +from twisted.python.filepath import FilePath
-__version__ = "0.2.0" +from ooni import __resources_version__ as resources_version +from ooni.settings import config
ooni_resources_url = ("https://github.com/TheTorProject/ooni-probe/releases" "/download/v{}/" "ooni-resources.tar.gz").format(resources_version) + +def get_download_url(tag_name, filename): + return ("https://github.com/OpenObservatory/ooni-resources/releases" + "/download/{0}/{1}".format(tag_name, filename)) + +def get_current_version(): + manifest = FilePath(config.resources_directory).child("manifest.json") + if not manifest.exists(): + return 0 + with manifest.open("r") as f: + manifest = json.load(f) + return int(manifest["version"]) diff --git a/ooni/resources/update.py b/ooni/resources/update.py index 46bbdf8..b464920 100644 --- a/ooni/resources/update.py +++ b/ooni/resources/update.py @@ -1,13 +1,143 @@ import os +import json import tarfile import tempfile
from twisted.python.filepath import FilePath from twisted.internet import defer -from twisted.web.client import downloadPage +from twisted.web.client import downloadPage, getPage
+from ooni.utils import log from ooni.settings import config -from ooni.resources import ooni_resources_url +from ooni.resources import ooni_resources_url, get_download_url +from ooni.resources import get_current_version + +class UpdateFailure(Exception): + pass + +@defer.inlineCallbacks +def get_latest_version(): + """ + Fetches the latest version of the resources package. + :return: (int) the latest version number + """ + try: + version = yield getPage(get_download_url("latest", "version")) + except Exception as exc: + raise exc + defer.returnValue(int(version.strip())) + + +def get_out_of_date_resources(current_manifest, new_manifest, + country_code=None): + current_res = {} + new_res = {} + for r in current_manifest["resources"]: + current_res[r["path"]] = r + + for r in new_manifest["resources"]: + new_res[r["path"]] = r + + paths_to_delete = [ + current_res[path] for path in list(set(current_res.keys()) - + set(new_res.keys())) + ] + paths_to_update = [] + _resources = FilePath(config.resources_directory) + for path, info in new_res.items(): + if (country_code is not None and + info["country_code"] != "ALL" and + info["country_code"] != country_code): + continue + if current_res[path]["version"] < info["version"]: + paths_to_update.append(info) + else: + pre_path, filename = info["path"].split("/") + # Also perform an update when it doesn't exist on disk, although + # the manifest claims we have a more up to date version. + # This happens if an update by country_code happened and a new + # country code is now required. + if not _resources.child(pre_path).child(filename).exists(): + paths_to_update.append(info) + + return paths_to_update, paths_to_delete + +@defer.inlineCallbacks +def check_for_update(country_code=None): + """ + Checks if we need to update the resources. + If the country_code is specified then only the resources for that + country will be updated/downloaded. + :return: the latest version. + """ + temporary_files = [] + def cleanup(): + # If we fail we need to delete all the temporary files + for _, src_file_path in temporary_files: + src_file_path.remove() + + current_version = get_current_version() + latest_version = yield get_latest_version() + + # We are already at the latest version + if current_version == latest_version: + defer.returnValue(latest_version) + + resources_dir = FilePath(config.resources_directory) + resources_dir.makedirs(ignoreExistingDirectory=True) + current_manifest = resources_dir.child("manifest.json") + + new_manifest = current_manifest.temporarySibling() + new_manifest.alwaysCreate = 0 + + temporary_files.append((current_manifest, new_manifest)) + + try: + yield downloadPage( + get_download_url(latest_version, "manifest.json"), + new_manifest.path + ) + except: + cleanup() + raise UpdateFailure("Failed to download manifest") + + new_manifest_data = json.loads(new_manifest.getContent()) + + to_update = new_manifest_data["resources"] + to_delete = [] + if current_manifest.exists(): + with current_manifest.open("r") as f: + current_manifest_data = json.loads(f) + to_update, to_delete = get_out_of_date_resources( + current_manifest_data, new_manifest_data, country_code) + + try: + for resource in to_update: + pre_path, filename = resource["path"].split("/") + dst_file = resources_dir.child(pre_path).child(filename) + dst_file.parent().makedirs(ignoreExistingDirectory=True) + src_file = dst_file.temporarySibling() + src_file.alwaysCreate = 0 + + temporary_files.append((dst_file, src_file)) + # The paths for the download require replacing "/" with "." + download_url = get_download_url(latest_version, + resource["path"].replace("/", ".")) + print("Downloading {0}".format(download_url)) + yield downloadPage(download_url, src_file.path) + except Exception as exc: + cleanup() + log.exception(exc) + raise UpdateFailure("Failed to download resource {0}".format(resource["path"])) + + for dst_file, src_file in temporary_files: + log.msg("Moving {0} to {1}".format(src_file.path, + dst_file.path)) + src_file.moveTo(dst_file) + + for resource in to_delete: + log.msg("Deleting old resources") + resources_dir.child(resource["path"]).remove()
@defer.inlineCallbacks def download_resources(): diff --git a/ooni/settings.py b/ooni/settings.py index 5245451..0491f6e 100644 --- a/ooni/settings.py +++ b/ooni/settings.py @@ -108,7 +108,7 @@ class OConfig(object):
self.measurements_directory = os.path.join(self.ooni_home, 'measurements') - self.resources_directory = os.path.join(self.data_directory, + self.resources_directory = os.path.join(self.ooni_home, "resources") if self.advanced.report_log_file: self.report_log_file = self.advanced.report_log_file diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py index 2b402c2..7a0036e 100644 --- a/ooni/ui/cli.py +++ b/ooni/ui/cli.py @@ -11,7 +11,6 @@ from twisted.python import usage from twisted.internet import defer
from ooni import errors, __version__ -from ooni.constants import CANONICAL_BOUNCER_ONION from ooni.settings import config from ooni.utils import log
diff --git a/ooni/ui/web/client/index.html b/ooni/ui/web/client/index.html index 3812461..cc45067 100644 --- a/ooni/ui/web/client/index.html +++ b/ooni/ui/web/client/index.html @@ -13,5 +13,5 @@ <app> Loading... </app> - <script type="text/javascript" src="app.bundle.js?06362599078bf7c36b72"></script></body> + <script type="text/javascript" src="app.bundle.js?27ae67e2c74ae4ae9a82"></script></body> </html>
tor-commits@lists.torproject.org