[tor-commits] [ooni-probe/master] Add support for downloading resources from mirror on github.

art at torproject.org art at torproject.org
Fri Apr 29 09:42:26 UTC 2016


commit 6a6e0ac21e9ee20db47df37a4fc9d4fb4e3be794
Author: Arturo Filastò <arturo at filasto.net>
Date:   Sat Apr 23 15:20:02 2016 +0200

    Add support for downloading resources from mirror on github.
    
    * Download resources when they are missing from oonideckgen
    * Remove dependency on GeoLiteCity database (16MB is just too much)
---
 ooni/deckgen/cli.py                              | 32 +++++++--
 ooni/deckgen/processors/citizenlab_test_lists.py |  6 +-
 ooni/geoip.py                                    | 11 ---
 ooni/resources/__init__.py                       | 59 +---------------
 ooni/resources/cli.py                            | 37 ++++------
 ooni/resources/update.py                         | 86 ++++++++++++++----------
 ooni/tests/test_geoip.py                         |  1 -
 7 files changed, 93 insertions(+), 139 deletions(-)

diff --git a/ooni/deckgen/cli.py b/ooni/deckgen/cli.py
index 1f618b7..b1a6b87 100644
--- a/ooni/deckgen/cli.py
+++ b/ooni/deckgen/cli.py
@@ -13,8 +13,9 @@ from ooni.geoip import ProbeIP
 from ooni.settings import config
 
 from ooni.deckgen import __version__
-from ooni.resources import inputs
-
+from ooni.deckgen.processors import citizenlab_test_lists
+from ooni.deckgen.processors import namebench_dns_servers
+from ooni.resources.update import download_resources
 
 class Options(usage.Options):
     synopsis = """%s [options]
@@ -70,11 +71,9 @@ class Deck(object):
 
 
 def generate_deck(options):
-    dns_servers_processor = inputs['namebench-dns-servers.csv']['processor']
-    url_lists_processor = inputs['citizenlab-test-lists.zip']['processor']
 
     try:
-        url_list_country = url_lists_processor.generate_country_input(
+        url_list_country = citizenlab_test_lists.generate_country_input(
             options['country-code'],
             options['output']
         )
@@ -84,10 +83,10 @@ def generate_deck(options):
         print "We will just use the global one."
         url_list_country = None
 
-    url_list_global = url_lists_processor.generate_global_input(
+    url_list_global = citizenlab_test_lists.generate_global_input(
         options['output']
     )
-    dns_servers = dns_servers_processor.generate_country_input(
+    dns_servers = namebench_dns_servers.generate_country_input(
         options['country-code'],
         options['output']
     )
@@ -124,6 +123,20 @@ def get_user_country_code():
     yield probe_ip.lookup()
     defer.returnValue(probe_ip.geodata['countrycode'])
 
+def resources_up_to_date():
+    if config.get_data_file_path("GeoIP/GeoIP.dat") is None:
+        return False
+
+    if config.get_data_file_path("resources/"
+                                 "namebench-dns-servers.csv") is None:
+        return False
+
+    if config.get_data_file_path("resources/"
+                                 "citizenlab-test-lists/"
+                                 "global.csv") is None:
+        return False
+
+    return True
 
 @defer.inlineCallbacks
 def run():
@@ -135,6 +148,11 @@ def run():
         print options
         sys.exit(1)
 
+    if not resources_up_to_date():
+        print("Resources for running ooniprobe are not up to date.")
+        print("Will update them now.")
+        yield download_resources()
+
     if not options['output']:
         options['output'] = os.getcwd()
 
diff --git a/ooni/deckgen/processors/citizenlab_test_lists.py b/ooni/deckgen/processors/citizenlab_test_lists.py
index 086b550..8a660cf 100644
--- a/ooni/deckgen/processors/citizenlab_test_lists.py
+++ b/ooni/deckgen/processors/citizenlab_test_lists.py
@@ -28,10 +28,9 @@ def generate_country_input(country_code, dst):
 
     input_list = config.get_data_file_path("resources/"
                                            "citizenlab-test-lists/"
-                                           "test-lists-master/lists/"
                                            + country_code + ".csv")
 
-    if not os.path.exists(input_list):
+    if not input_list:
         raise Exception("Could not find list for country %s" % country_code)
 
     load_input(input_list, filename)
@@ -44,12 +43,11 @@ def generate_global_input(dst):
 
     input_list = config.get_data_file_path("resources/"
                                            "citizenlab-test-lists/"
-                                           "test-lists-master/lists/"
                                            "global.csv")
 
     if not input_list:
         print("Could not find the global input list")
-        print("Perhaps you should run ooniresources --update-inputs")
+        print("Perhaps you should run ooniresources")
         raise Exception("Could not find the global input list")
 
     load_input(input_list, filename)
diff --git a/ooni/geoip.py b/ooni/geoip.py
index 11800b6..83f3648 100644
--- a/ooni/geoip.py
+++ b/ooni/geoip.py
@@ -31,7 +31,6 @@ class GeoIPDataFilesNotFound(Exception):
 def IPToLocation(ipaddr):
     from ooni.settings import config
 
-    city_file = config.get_data_file_path('GeoIP/GeoLiteCity.dat')
     country_file = config.get_data_file_path('GeoIP/GeoIP.dat')
     asn_file = config.get_data_file_path('GeoIP/GeoIPASNum.dat')
 
@@ -51,12 +50,6 @@ def IPToLocation(ipaddr):
         error()
 
     try:
-        city_dat = GeoIP(city_file)
-        location['city'] = city_dat.record_by_addr(ipaddr)['city']
-    except:
-        error()
-
-    try:
         asn_dat = GeoIP(asn_file)
         location['asn'] = asn_dat.org_by_addr(ipaddr).split(' ')[0]
     except:
@@ -75,10 +68,6 @@ def database_version():
         'GeoIPASNum': {
             'sha256': None,
             'timestamp': None
-        },
-        'GeoLiteCity': {
-            'sha256': None,
-            'timestamp': None
         }
     }
 
diff --git a/ooni/resources/__init__.py b/ooni/resources/__init__.py
index 94cd36e..8b7e63b 100644
--- a/ooni/resources/__init__.py
+++ b/ooni/resources/__init__.py
@@ -1,59 +1,6 @@
-import os
-
-from ooni.settings import config
-from ooni.utils import unzip, gunzip
-
-from ooni.deckgen.processors import citizenlab_test_lists
-from ooni.deckgen.processors import namebench_dns_servers
+from ooni import __version__ as ooniprobe_version
 
 __version__ = "0.1.0"
 
-if os.access(config.var_lib_path, os.W_OK):
-    resources_directory = os.path.join(config.var_lib_path,
-                                       "resources")
-    geoip_directory = os.path.join(config.var_lib_path,
-                                   "GeoIP")
-else:
-    resources_directory = os.path.join(config.ooni_home,
-                                       "resources")
-    geoip_directory = os.path.join(config.ooni_home,
-                                   "GeoIP")
-
-inputs = {
-    "namebench-dns-servers.csv": {
-        "url": "https://namebench.googlecode.com/svn/trunk/config/servers.csv",
-        "action": None,
-        "action_args": [],
-        "processor": namebench_dns_servers,
-    },
-    "citizenlab-test-lists.zip": {
-        "url": "https://github.com/citizenlab/test-lists/archive/master.zip",
-        "action": unzip,
-        "action_args": [resources_directory],
-        "processor": citizenlab_test_lists
-    }
-}
-
-geoip = {
-    "GeoLiteCity.dat.gz": {
-        "url": "https://geolite.maxmind.com/download/"
-               "geoip/database/GeoLiteCity.dat.gz",
-        "action": gunzip,
-        "action_args": [geoip_directory],
-        "processor": None
-    },
-    "GeoIPASNum.dat.gz": {
-        "url": "https://geolite.maxmind.com/download/"
-               "geoip/database/asnum/GeoIPASNum.dat.gz",
-        "action": gunzip,
-        "action_args": [geoip_directory],
-        "processor": None
-    },
-    "GeoIP.dat.gz": {
-        "url": "https://geolite.maxmind.com/"
-               "download/geoip/database/GeoLiteCountry/GeoIP.dat.gz",
-        "action": gunzip,
-        "action_args": [geoip_directory],
-        "processor": None
-    }
-}
+ooni_resources_url = ("https://github.com/TheTorProject/ooni-probe/releases"
+                      "/download/v{}/ooni-resources.tar.gz").format(ooniprobe_version)
diff --git a/ooni/resources/cli.py b/ooni/resources/cli.py
index cab9700..0d7832a 100644
--- a/ooni/resources/cli.py
+++ b/ooni/resources/cli.py
@@ -3,18 +3,23 @@ import sys
 from twisted.internet import defer
 from twisted.python import usage
 
-from ooni.utils import log
-
 from ooni.resources import __version__
 from ooni.resources import update
 
 
 class Options(usage.Options):
-    synopsis = """%s""" % sys.argv[0]
+    synopsis = """%s
+    This is used to update the resources required to run oonideckgen and
+    ooniprobe.
+    You just run this script with no arguments and it will update the
+    resources.
+    """ % sys.argv[0]
 
     optFlags = [
-        ["update-inputs", None, "Update the resources needed for inputs."],
-        ["update-geoip", None, "Update the geoip related resources."]
+        ["update-inputs", None, "(deprecated) update the resources needed for "
+                                "inputs."],
+        ["update-geoip", None, "(deprecated) Update the geoip related "
+                               "resources."]
     ]
     optParameters = []
 
@@ -33,23 +38,7 @@ def run():
         print "%s: Try --help for usage details." % (sys.argv[0])
         sys.exit(1)
 
-    if not any(options.values()):
-        print("%s: no command specified" % sys.argv[0])
-        print options
-        sys.exit(1)
+    if options['update-inputs'] or options['update-geoip']:
+        print("WARNING: Passing command line arguments is deprecated")
 
-    if options['update-inputs']:
-        print "Downloading inputs"
-        try:
-            yield update.download_inputs()
-        except Exception as exc:
-            log.err("failed to download geoip files")
-            log.exception(exc)
-
-    if options['update-geoip']:
-        print "Downloading geoip files"
-        try:
-            yield update.download_geoip()
-        except Exception as exc:
-            log.err("failed to download geoip files")
-            log.exception(exc)
+    yield update.download_resources()
diff --git a/ooni/resources/update.py b/ooni/resources/update.py
index 7a949c9..46bbdf8 100644
--- a/ooni/resources/update.py
+++ b/ooni/resources/update.py
@@ -1,43 +1,57 @@
 import os
+import tarfile
+import tempfile
 
+from twisted.python.filepath import FilePath
 from twisted.internet import defer
 from twisted.web.client import downloadPage
 
-from ooni.resources import inputs, geoip, resources_directory
-from ooni.utils import unzip, gunzip
-
+from ooni.settings import config
+from ooni.resources import ooni_resources_url
 
 @defer.inlineCallbacks
-def download_resource(resources):
-    for filename, resource in resources.items():
-        print "Downloading %s" % filename
-
-        if resource["action"] in [unzip, gunzip] and resource["action_args"]:
-                    dirname = resource["action_args"][0]
-                    filename = os.path.join(dirname, filename)
-        else:
-            filename = os.path.join(resources_directory, filename)
-        if not os.path.exists(filename):
-            directory = os.path.dirname(filename)
-            if not os.path.isdir(directory):
-                os.makedirs(directory)
-            f = open(filename, 'w')
-            f.close()
-        elif not os.path.isfile(filename):
-            print "[!] %s must be a file." % filename
-            defer.returnValue(False)
-        yield downloadPage(resource['url'], filename)
-
-        if resource['action'] is not None:
-            yield defer.maybeDeferred(resource['action'],
-                                      filename,
-                                      *resource['action_args'])
-        print "%s written." % filename
-
-
-def download_inputs():
-    return download_resource(inputs)
-
-
-def download_geoip():
-    return download_resource(geoip)
+def download_resources():
+    if os.access(config.var_lib_path, os.W_OK):
+        dst_directory = FilePath(config.var_lib_path)
+    else:
+        dst_directory = FilePath(config.ooni_home)
+
+    print("Downloading {} to {}".format(ooni_resources_url,
+                                        dst_directory.path))
+    tmp_download_directory = FilePath(tempfile.mkdtemp())
+    tmp_download_filename = tmp_download_directory.temporarySibling()
+
+
+    try:
+        yield downloadPage(ooni_resources_url, tmp_download_filename.path)
+        ooni_resources_tar_gz = tarfile.open(tmp_download_filename.path)
+        ooni_resources_tar_gz.extractall(tmp_download_directory.path)
+
+        if not tmp_download_directory.child('GeoIP').exists():
+            raise Exception("Could not find GeoIP data files in downloaded "
+                            "tar.")
+
+        if not tmp_download_directory.child('resources').exists():
+            raise Exception("Could not find resources data files in "
+                            "downloaded tar.")
+
+        geoip_dir = dst_directory.child('GeoIP')
+        resources_dir = dst_directory.child('resources')
+
+        if geoip_dir.exists():
+            geoip_dir.remove()
+        tmp_download_directory.child('GeoIP').moveTo(geoip_dir)
+
+        if resources_dir.exists():
+            resources_dir.remove()
+        tmp_download_directory.child('resources').moveTo(resources_dir)
+
+        print("Written GeoIP files to {}".format(geoip_dir.path))
+        print("Written resources files to {}".format(resources_dir.path))
+
+    except Exception as exc:
+        print("Failed to download resources!")
+        raise exc
+
+    finally:
+        tmp_download_directory.remove()
diff --git a/ooni/tests/test_geoip.py b/ooni/tests/test_geoip.py
index 5e8b67b..66ba13e 100644
--- a/ooni/tests/test_geoip.py
+++ b/ooni/tests/test_geoip.py
@@ -26,7 +26,6 @@ class TestGeoIP(bases.ConfigTestCase):
         version = geoip.database_version()
         assert 'GeoIP' in version.keys()
         assert 'GeoIPASNum' in version.keys()
-        assert 'GeoLiteCity' in version.keys()
 
         assert len(version['GeoIP']['sha256']) == 64
         assert isinstance(version['GeoIP']['timestamp'], float)





More information about the tor-commits mailing list