[tor-commits] [ooni-probe/master] Start outlining update mechanism for inputs

art at torproject.org art at torproject.org
Mon Sep 19 12:14:24 UTC 2016


commit cee9d833edca688429e5323426a1b9ef43050d46
Author: Arturo Filastò <arturo at filasto.net>
Date:   Fri Jul 22 19:49:41 2016 +0200

    Start outlining update mechanism for inputs
    
    * Expose only simple options in web UI
---
 ooni/deck.py                               | 109 +++++++++++++++++++++--
 ooni/director.py                           |   5 ++
 ooni/nettest.py                            |  34 +++++---
 ooni/nettests/blocking/web_connectivity.py |   6 ++
 ooni/otime.py                              |   7 ++
 ooni/resources/__init__.py                 |  19 +++-
 ooni/resources/update.py                   | 134 ++++++++++++++++++++++++++++-
 ooni/settings.py                           |   2 +-
 ooni/ui/cli.py                             |   1 -
 ooni/ui/web/client/index.html              |   2 +-
 10 files changed, 294 insertions(+), 25 deletions(-)

diff --git a/ooni/deck.py b/ooni/deck.py
index 1e1d580..9f17530 100644
--- a/ooni/deck.py
+++ b/ooni/deck.py
@@ -1,21 +1,27 @@
 # -*- coding: utf-8 -*-
+import csv
+import os
+import yaml
+import json
 
+from hashlib import sha256
+from datetime import datetime
 from ooni.backend_client import CollectorClient, BouncerClient
 from ooni.backend_client import WebConnectivityClient, guess_backend_type
 from ooni.nettest import NetTestLoader
 from ooni.settings import config
-from ooni.utils import log, onion
+
+from ooni.otime import timestampNowISO8601UTC
+
+from ooni.resources.update import check_for_update
+
+from ooni.utils import log
 from ooni import constants
 from ooni import errors as e
 
 from twisted.python.filepath import FilePath
 from twisted.internet import defer
 
-import os
-import yaml
-import json
-from hashlib import sha256
-
 
 class InputFile(object):
     def __init__(self, input_hash, base_path=config.inputs_directory):
@@ -411,3 +417,94 @@ class Deck(InputFile):
                     raise e.UnableToLoadDeckInput
 
                 i['test_options'][i['key']] = input_file.cached_file
+
+
+class InputStore(object):
+    def __init__(self):
+        self.path = FilePath(config.inputs_directory)
+        self.resources = FilePath(config.resources_directory)
+
+    @defer.inlineCallbacks
+    def update_url_lists(self, country_code):
+        countries = ["global"]
+        if country_code == "ZZ":
+            country_code = None
+        else:
+            countries.append(country_code)
+
+        for cc in countries:
+            in_file = self.resources.child("citizenlab-test-lists").child("{0}.csv".format(cc))
+            if not in_file.exists():
+                yield check_for_update(country_code)
+
+            if not in_file.exists():
+                continue
+
+            # XXX maybe move this to some utility function.
+            # It's duplicated in oonideckgen.
+            data_fname = "citizenlab-test-lists_{0}.txt".format(cc)
+            desc_fname = "citizenlab-test-lists_{0}.desc".format(cc)
+
+            out_file = self.path.child("data").child(data_fname)
+            out_fh = out_file.open('w')
+            with in_file.open('r') as in_fh:
+                csvreader = csv.reader(in_fh)
+                csvreader.next()
+                for row in csvreader:
+                    out_fh.write("%s\n" % row[0])
+            out_fh.close()
+
+            desc_file = self.path.child("descriptors").child(desc_fname)
+            with desc_file.open('w') as out_fh:
+                if cc == "global":
+                    name = "List of globally accessed websites"
+                else:
+                    # XXX resolve this to a human readable country name
+                    country_name = cc
+                    name = "List of websites for {0}".format(country_name)
+                json.dump({
+                    "name": name,
+                    "filepath": out_file.path,
+                    "last_updated": timestampNowISO8601UTC(),
+                    "id": "citizenlab_test_lists_{0}_txt".format(cc),
+                    "type": "file/url"
+                }, out_fh)
+
+    @defer.inlineCallbacks
+    def create(self, country_code=None):
+        self.path.child("descriptors").makedirs(ignoreExistingDirectory=True)
+        self.path.child("data").makedirs(ignoreExistingDirectory=True)
+        yield self.update_url_lists(country_code)
+
+    @defer.inlineCallbacks
+    def update(self, country_code=None):
+        yield self.update_url_lists(country_code)
+
+    def list(self):
+        inputs = []
+        descs = self.path.child("descriptors")
+        if not descs.exists():
+            return inputs
+
+        for fn in descs.listdir():
+            with descs.child(fn).open("r") as in_fh:
+                inputs.append(json.load(in_fh))
+        return inputs
+
+class DeckStore(object):
+    def __init__(self):
+        self.path = FilePath(config.decks_directory)
+
+    def update(self):
+        pass
+
+    def get(self):
+        pass
+
+class NGInput(object):
+    def __init__(self, input_name):
+        pass
+
+class NGDeck(object):
+    def __init__(self, deck_path):
+        pass
diff --git a/ooni/director.py b/ooni/director.py
index 9897ffd..d39a11b 100644
--- a/ooni/director.py
+++ b/ooni/director.py
@@ -9,6 +9,7 @@ from ooni.nettest import NetTest, getNetTestInformation
 from ooni.settings import config
 from ooni import errors
 from ooni.nettest import normalizeTestName
+from ooni.deck import InputStore
 
 from ooni.utils.onion import start_tor, connect_to_control_port
 
@@ -92,6 +93,8 @@ class Director(object):
         self.allTestsDone = defer.Deferred()
         self.sniffers = {}
 
+        self.input_store = InputStore()
+
     def getNetTests(self):
         nettests = {}
 
@@ -144,6 +147,8 @@ class Director(object):
         else:
             yield config.probe_ip.lookup()
 
+        yield self.input_store.create(config.probe_ip.geodata["countrycode"])
+
     @property
     def measurementSuccessRatio(self):
         if self.totalMeasurements == 0:
diff --git a/ooni/nettest.py b/ooni/nettest.py
index 09b0202..d01cf7b 100644
--- a/ooni/nettest.py
+++ b/ooni/nettest.py
@@ -67,10 +67,12 @@ def getOption(opt_parameter, required_options, type='text'):
     else:
         required = False
 
-    return {'description': description,
-            'value': default, 'required': required,
-            'type': type
-            }
+    return {
+        'description': description,
+        'value': default,
+        'required': required,
+        'type': type
+    }
 
 
 def getArguments(test_class):
@@ -119,13 +121,15 @@ def getNetTestInformation(net_test_file):
     test_class = getTestClassFromFile(net_test_file)
 
     test_id = os.path.basename(net_test_file).replace('.py', '')
-    information = {'id': test_id,
-                   'name': test_class.name,
-                   'description': test_class.description,
-                   'version': test_class.version,
-                   'arguments': getArguments(test_class),
-                   'path': net_test_file,
-                   }
+    information = {
+        'id': test_id,
+        'name': test_class.name,
+        'description': test_class.description,
+        'version': test_class.version,
+        'arguments': getArguments(test_class),
+        'simple_options': test_class.simpleOptions,
+        'path': net_test_file
+    }
     return information
 
 
@@ -454,7 +458,11 @@ class NetTestState(object):
                   (self.doneTasks, self.tasks))
         if self.completedScheduling and \
                 self.doneTasks == self.tasks:
-            self.allTasksDone.callback(self.doneTasks)
+            if self.allTasksDone.called:
+                log.err("allTasksDone was already called. This is probably a bug.")
+            else:
+                self.allTasksDone.callback(self.doneTasks)
+
 
     def taskDone(self):
         """
@@ -706,6 +714,8 @@ class NetTestCase(object):
     requiresRoot = False
     requiresTor = False
 
+    simpleOptions = {}
+
     localOptions = {}
 
     @classmethod
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py
index b7deffe..dde6b6f 100644
--- a/ooni/nettests/blocking/web_connectivity.py
+++ b/ooni/nettests/blocking/web_connectivity.py
@@ -77,6 +77,12 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
     requiresTor = False
     followRedirects = True
 
+    # These are the options to be shown on the GUI
+    simpleOptions = [
+        {"name": "url", "type": "text"},
+        {"name": "file", "type": "file/url"}
+    ]
+
     # Factor used to determine HTTP blockpage detection
     # the factor 0.7 comes from http://www3.cs.stonybrook.edu/~phillipa/papers/JLFG14.pdf
     factor = 0.7
diff --git a/ooni/otime.py b/ooni/otime.py
index ffa95d0..804d558 100644
--- a/ooni/otime.py
+++ b/ooni/otime.py
@@ -18,3 +18,10 @@ def timestampNowLongUTC():
     Coordinates.
     """
     return datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+
+def timestampNowISO8601UTC():
+    """
+    Returns a timestamp in the format of %Y-%m-%d %H:%M:%S in Universal Time
+    Coordinates.
+    """
+    return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
diff --git a/ooni/resources/__init__.py b/ooni/resources/__init__.py
index cc1da79..6550887 100644
--- a/ooni/resources/__init__.py
+++ b/ooni/resources/__init__.py
@@ -1,7 +1,22 @@
-from ooni import __resources_version__ as resources_version
+import json
+
+from twisted.python.filepath import FilePath
 
-__version__ = "0.2.0"
+from ooni import __resources_version__ as resources_version
+from ooni.settings import config
 
 ooni_resources_url = ("https://github.com/TheTorProject/ooni-probe/releases"
                       "/download/v{}/"
                       "ooni-resources.tar.gz").format(resources_version)
+
+def get_download_url(tag_name, filename):
+    return ("https://github.com/OpenObservatory/ooni-resources/releases"
+            "/download/{0}/{1}".format(tag_name, filename))
+
+def get_current_version():
+    manifest = FilePath(config.resources_directory).child("manifest.json")
+    if not manifest.exists():
+        return 0
+    with manifest.open("r") as f:
+        manifest = json.load(f)
+    return int(manifest["version"])
diff --git a/ooni/resources/update.py b/ooni/resources/update.py
index 46bbdf8..b464920 100644
--- a/ooni/resources/update.py
+++ b/ooni/resources/update.py
@@ -1,13 +1,143 @@
 import os
+import json
 import tarfile
 import tempfile
 
 from twisted.python.filepath import FilePath
 from twisted.internet import defer
-from twisted.web.client import downloadPage
+from twisted.web.client import downloadPage, getPage
 
+from ooni.utils import log
 from ooni.settings import config
-from ooni.resources import ooni_resources_url
+from ooni.resources import ooni_resources_url, get_download_url
+from ooni.resources import get_current_version
+
+class UpdateFailure(Exception):
+    pass
+
+ at defer.inlineCallbacks
+def get_latest_version():
+    """
+    Fetches the latest version of the resources package.
+    :return: (int) the latest version number
+    """
+    try:
+        version = yield getPage(get_download_url("latest", "version"))
+    except Exception as exc:
+        raise exc
+    defer.returnValue(int(version.strip()))
+
+
+def get_out_of_date_resources(current_manifest, new_manifest,
+                              country_code=None):
+    current_res = {}
+    new_res = {}
+    for r in current_manifest["resources"]:
+        current_res[r["path"]] = r
+
+    for r in new_manifest["resources"]:
+        new_res[r["path"]] = r
+
+    paths_to_delete = [
+        current_res[path] for path in list(set(current_res.keys()) -
+                                           set(new_res.keys()))
+        ]
+    paths_to_update = []
+    _resources = FilePath(config.resources_directory)
+    for path, info in new_res.items():
+        if (country_code is not None and
+                info["country_code"] != "ALL" and
+                info["country_code"] != country_code):
+            continue
+        if current_res[path]["version"] < info["version"]:
+            paths_to_update.append(info)
+        else:
+            pre_path, filename = info["path"].split("/")
+            # Also perform an update when it doesn't exist on disk, although
+            #  the manifest claims we have a more up to date version.
+            # This happens if an update by country_code happened and a new
+            # country code is now required.
+            if not _resources.child(pre_path).child(filename).exists():
+                paths_to_update.append(info)
+
+    return paths_to_update, paths_to_delete
+
+ at defer.inlineCallbacks
+def check_for_update(country_code=None):
+    """
+    Checks if we need to update the resources.
+    If the country_code is specified then only the resources for that
+    country will be updated/downloaded.
+    :return: the latest version.
+    """
+    temporary_files = []
+    def cleanup():
+        # If we fail we need to delete all the temporary files
+        for _, src_file_path in temporary_files:
+            src_file_path.remove()
+
+    current_version = get_current_version()
+    latest_version = yield get_latest_version()
+
+    # We are already at the latest version
+    if current_version == latest_version:
+        defer.returnValue(latest_version)
+
+    resources_dir = FilePath(config.resources_directory)
+    resources_dir.makedirs(ignoreExistingDirectory=True)
+    current_manifest = resources_dir.child("manifest.json")
+
+    new_manifest = current_manifest.temporarySibling()
+    new_manifest.alwaysCreate = 0
+
+    temporary_files.append((current_manifest, new_manifest))
+
+    try:
+        yield downloadPage(
+            get_download_url(latest_version, "manifest.json"),
+            new_manifest.path
+        )
+    except:
+        cleanup()
+        raise UpdateFailure("Failed to download manifest")
+
+    new_manifest_data = json.loads(new_manifest.getContent())
+
+    to_update = new_manifest_data["resources"]
+    to_delete = []
+    if current_manifest.exists():
+        with current_manifest.open("r") as f:
+            current_manifest_data = json.loads(f)
+        to_update, to_delete = get_out_of_date_resources(
+            current_manifest_data, new_manifest_data, country_code)
+
+    try:
+        for resource in to_update:
+            pre_path, filename = resource["path"].split("/")
+            dst_file = resources_dir.child(pre_path).child(filename)
+            dst_file.parent().makedirs(ignoreExistingDirectory=True)
+            src_file = dst_file.temporarySibling()
+            src_file.alwaysCreate = 0
+
+            temporary_files.append((dst_file, src_file))
+            # The paths for the download require replacing "/" with "."
+            download_url = get_download_url(latest_version,
+                                            resource["path"].replace("/", "."))
+            print("Downloading {0}".format(download_url))
+            yield downloadPage(download_url, src_file.path)
+    except Exception as exc:
+        cleanup()
+        log.exception(exc)
+        raise UpdateFailure("Failed to download resource {0}".format(resource["path"]))
+
+    for dst_file, src_file in temporary_files:
+        log.msg("Moving {0} to {1}".format(src_file.path,
+                                           dst_file.path))
+        src_file.moveTo(dst_file)
+
+    for resource in to_delete:
+        log.msg("Deleting old resources")
+        resources_dir.child(resource["path"]).remove()
 
 @defer.inlineCallbacks
 def download_resources():
diff --git a/ooni/settings.py b/ooni/settings.py
index 5245451..0491f6e 100644
--- a/ooni/settings.py
+++ b/ooni/settings.py
@@ -108,7 +108,7 @@ class OConfig(object):
 
         self.measurements_directory = os.path.join(self.ooni_home,
                                                    'measurements')
-        self.resources_directory = os.path.join(self.data_directory,
+        self.resources_directory = os.path.join(self.ooni_home,
                                                 "resources")
         if self.advanced.report_log_file:
             self.report_log_file = self.advanced.report_log_file
diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py
index 2b402c2..7a0036e 100644
--- a/ooni/ui/cli.py
+++ b/ooni/ui/cli.py
@@ -11,7 +11,6 @@ from twisted.python import usage
 from twisted.internet import defer
 
 from ooni import errors, __version__
-from ooni.constants import CANONICAL_BOUNCER_ONION
 from ooni.settings import config
 from ooni.utils import log
 
diff --git a/ooni/ui/web/client/index.html b/ooni/ui/web/client/index.html
index 3812461..cc45067 100644
--- a/ooni/ui/web/client/index.html
+++ b/ooni/ui/web/client/index.html
@@ -13,5 +13,5 @@
     <app>
       Loading...
     </app>
-  <script type="text/javascript" src="app.bundle.js?06362599078bf7c36b72"></script></body>
+  <script type="text/javascript" src="app.bundle.js?27ae67e2c74ae4ae9a82"></script></body>
 </html>





More information about the tor-commits mailing list