commit 6a6e0ac21e9ee20db47df37a4fc9d4fb4e3be794 Author: Arturo Filastò arturo@filasto.net Date: Sat Apr 23 15:20:02 2016 +0200
Add support for downloading resources from mirror on github.
* Download resources when they are missing from oonideckgen * Remove dependency on GeoLiteCity database (16MB is just too much) --- ooni/deckgen/cli.py | 32 +++++++-- ooni/deckgen/processors/citizenlab_test_lists.py | 6 +- ooni/geoip.py | 11 --- ooni/resources/__init__.py | 59 +--------------- ooni/resources/cli.py | 37 ++++------ ooni/resources/update.py | 86 ++++++++++++++---------- ooni/tests/test_geoip.py | 1 - 7 files changed, 93 insertions(+), 139 deletions(-)
diff --git a/ooni/deckgen/cli.py b/ooni/deckgen/cli.py index 1f618b7..b1a6b87 100644 --- a/ooni/deckgen/cli.py +++ b/ooni/deckgen/cli.py @@ -13,8 +13,9 @@ from ooni.geoip import ProbeIP from ooni.settings import config
from ooni.deckgen import __version__ -from ooni.resources import inputs - +from ooni.deckgen.processors import citizenlab_test_lists +from ooni.deckgen.processors import namebench_dns_servers +from ooni.resources.update import download_resources
class Options(usage.Options): synopsis = """%s [options] @@ -70,11 +71,9 @@ class Deck(object):
def generate_deck(options): - dns_servers_processor = inputs['namebench-dns-servers.csv']['processor'] - url_lists_processor = inputs['citizenlab-test-lists.zip']['processor']
try: - url_list_country = url_lists_processor.generate_country_input( + url_list_country = citizenlab_test_lists.generate_country_input( options['country-code'], options['output'] ) @@ -84,10 +83,10 @@ def generate_deck(options): print "We will just use the global one." url_list_country = None
- url_list_global = url_lists_processor.generate_global_input( + url_list_global = citizenlab_test_lists.generate_global_input( options['output'] ) - dns_servers = dns_servers_processor.generate_country_input( + dns_servers = namebench_dns_servers.generate_country_input( options['country-code'], options['output'] ) @@ -124,6 +123,20 @@ def get_user_country_code(): yield probe_ip.lookup() defer.returnValue(probe_ip.geodata['countrycode'])
+def resources_up_to_date(): + if config.get_data_file_path("GeoIP/GeoIP.dat") is None: + return False + + if config.get_data_file_path("resources/" + "namebench-dns-servers.csv") is None: + return False + + if config.get_data_file_path("resources/" + "citizenlab-test-lists/" + "global.csv") is None: + return False + + return True
@defer.inlineCallbacks def run(): @@ -135,6 +148,11 @@ def run(): print options sys.exit(1)
+ if not resources_up_to_date(): + print("Resources for running ooniprobe are not up to date.") + print("Will update them now.") + yield download_resources() + if not options['output']: options['output'] = os.getcwd()
diff --git a/ooni/deckgen/processors/citizenlab_test_lists.py b/ooni/deckgen/processors/citizenlab_test_lists.py index 086b550..8a660cf 100644 --- a/ooni/deckgen/processors/citizenlab_test_lists.py +++ b/ooni/deckgen/processors/citizenlab_test_lists.py @@ -28,10 +28,9 @@ def generate_country_input(country_code, dst):
input_list = config.get_data_file_path("resources/" "citizenlab-test-lists/" - "test-lists-master/lists/" + country_code + ".csv")
- if not os.path.exists(input_list): + if not input_list: raise Exception("Could not find list for country %s" % country_code)
load_input(input_list, filename) @@ -44,12 +43,11 @@ def generate_global_input(dst):
input_list = config.get_data_file_path("resources/" "citizenlab-test-lists/" - "test-lists-master/lists/" "global.csv")
if not input_list: print("Could not find the global input list") - print("Perhaps you should run ooniresources --update-inputs") + print("Perhaps you should run ooniresources") raise Exception("Could not find the global input list")
load_input(input_list, filename) diff --git a/ooni/geoip.py b/ooni/geoip.py index 11800b6..83f3648 100644 --- a/ooni/geoip.py +++ b/ooni/geoip.py @@ -31,7 +31,6 @@ class GeoIPDataFilesNotFound(Exception): def IPToLocation(ipaddr): from ooni.settings import config
- city_file = config.get_data_file_path('GeoIP/GeoLiteCity.dat') country_file = config.get_data_file_path('GeoIP/GeoIP.dat') asn_file = config.get_data_file_path('GeoIP/GeoIPASNum.dat')
@@ -51,12 +50,6 @@ def IPToLocation(ipaddr): error()
try: - city_dat = GeoIP(city_file) - location['city'] = city_dat.record_by_addr(ipaddr)['city'] - except: - error() - - try: asn_dat = GeoIP(asn_file) location['asn'] = asn_dat.org_by_addr(ipaddr).split(' ')[0] except: @@ -75,10 +68,6 @@ def database_version(): 'GeoIPASNum': { 'sha256': None, 'timestamp': None - }, - 'GeoLiteCity': { - 'sha256': None, - 'timestamp': None } }
diff --git a/ooni/resources/__init__.py b/ooni/resources/__init__.py index 94cd36e..8b7e63b 100644 --- a/ooni/resources/__init__.py +++ b/ooni/resources/__init__.py @@ -1,59 +1,6 @@ -import os - -from ooni.settings import config -from ooni.utils import unzip, gunzip - -from ooni.deckgen.processors import citizenlab_test_lists -from ooni.deckgen.processors import namebench_dns_servers +from ooni import __version__ as ooniprobe_version
__version__ = "0.1.0"
-if os.access(config.var_lib_path, os.W_OK): - resources_directory = os.path.join(config.var_lib_path, - "resources") - geoip_directory = os.path.join(config.var_lib_path, - "GeoIP") -else: - resources_directory = os.path.join(config.ooni_home, - "resources") - geoip_directory = os.path.join(config.ooni_home, - "GeoIP") - -inputs = { - "namebench-dns-servers.csv": { - "url": "https://namebench.googlecode.com/svn/trunk/config/servers.csv", - "action": None, - "action_args": [], - "processor": namebench_dns_servers, - }, - "citizenlab-test-lists.zip": { - "url": "https://github.com/citizenlab/test-lists/archive/master.zip", - "action": unzip, - "action_args": [resources_directory], - "processor": citizenlab_test_lists - } -} - -geoip = { - "GeoLiteCity.dat.gz": { - "url": "https://geolite.maxmind.com/download/" - "geoip/database/GeoLiteCity.dat.gz", - "action": gunzip, - "action_args": [geoip_directory], - "processor": None - }, - "GeoIPASNum.dat.gz": { - "url": "https://geolite.maxmind.com/download/" - "geoip/database/asnum/GeoIPASNum.dat.gz", - "action": gunzip, - "action_args": [geoip_directory], - "processor": None - }, - "GeoIP.dat.gz": { - "url": "https://geolite.maxmind.com/" - "download/geoip/database/GeoLiteCountry/GeoIP.dat.gz", - "action": gunzip, - "action_args": [geoip_directory], - "processor": None - } -} +ooni_resources_url = ("https://github.com/TheTorProject/ooni-probe/releases" + "/download/v{}/ooni-resources.tar.gz").format(ooniprobe_version) diff --git a/ooni/resources/cli.py b/ooni/resources/cli.py index cab9700..0d7832a 100644 --- a/ooni/resources/cli.py +++ b/ooni/resources/cli.py @@ -3,18 +3,23 @@ import sys from twisted.internet import defer from twisted.python import usage
-from ooni.utils import log - from ooni.resources import __version__ from ooni.resources import update
class Options(usage.Options): - synopsis = """%s""" % sys.argv[0] + synopsis = """%s + This is used to update the resources required to run oonideckgen and + ooniprobe. + You just run this script with no arguments and it will update the + resources. + """ % sys.argv[0]
optFlags = [ - ["update-inputs", None, "Update the resources needed for inputs."], - ["update-geoip", None, "Update the geoip related resources."] + ["update-inputs", None, "(deprecated) update the resources needed for " + "inputs."], + ["update-geoip", None, "(deprecated) Update the geoip related " + "resources."] ] optParameters = []
@@ -33,23 +38,7 @@ def run(): print "%s: Try --help for usage details." % (sys.argv[0]) sys.exit(1)
- if not any(options.values()): - print("%s: no command specified" % sys.argv[0]) - print options - sys.exit(1) + if options['update-inputs'] or options['update-geoip']: + print("WARNING: Passing command line arguments is deprecated")
- if options['update-inputs']: - print "Downloading inputs" - try: - yield update.download_inputs() - except Exception as exc: - log.err("failed to download geoip files") - log.exception(exc) - - if options['update-geoip']: - print "Downloading geoip files" - try: - yield update.download_geoip() - except Exception as exc: - log.err("failed to download geoip files") - log.exception(exc) + yield update.download_resources() diff --git a/ooni/resources/update.py b/ooni/resources/update.py index 7a949c9..46bbdf8 100644 --- a/ooni/resources/update.py +++ b/ooni/resources/update.py @@ -1,43 +1,57 @@ import os +import tarfile +import tempfile
+from twisted.python.filepath import FilePath from twisted.internet import defer from twisted.web.client import downloadPage
-from ooni.resources import inputs, geoip, resources_directory -from ooni.utils import unzip, gunzip - +from ooni.settings import config +from ooni.resources import ooni_resources_url
@defer.inlineCallbacks -def download_resource(resources): - for filename, resource in resources.items(): - print "Downloading %s" % filename - - if resource["action"] in [unzip, gunzip] and resource["action_args"]: - dirname = resource["action_args"][0] - filename = os.path.join(dirname, filename) - else: - filename = os.path.join(resources_directory, filename) - if not os.path.exists(filename): - directory = os.path.dirname(filename) - if not os.path.isdir(directory): - os.makedirs(directory) - f = open(filename, 'w') - f.close() - elif not os.path.isfile(filename): - print "[!] %s must be a file." % filename - defer.returnValue(False) - yield downloadPage(resource['url'], filename) - - if resource['action'] is not None: - yield defer.maybeDeferred(resource['action'], - filename, - *resource['action_args']) - print "%s written." % filename - - -def download_inputs(): - return download_resource(inputs) - - -def download_geoip(): - return download_resource(geoip) +def download_resources(): + if os.access(config.var_lib_path, os.W_OK): + dst_directory = FilePath(config.var_lib_path) + else: + dst_directory = FilePath(config.ooni_home) + + print("Downloading {} to {}".format(ooni_resources_url, + dst_directory.path)) + tmp_download_directory = FilePath(tempfile.mkdtemp()) + tmp_download_filename = tmp_download_directory.temporarySibling() + + + try: + yield downloadPage(ooni_resources_url, tmp_download_filename.path) + ooni_resources_tar_gz = tarfile.open(tmp_download_filename.path) + ooni_resources_tar_gz.extractall(tmp_download_directory.path) + + if not tmp_download_directory.child('GeoIP').exists(): + raise Exception("Could not find GeoIP data files in downloaded " + "tar.") + + if not tmp_download_directory.child('resources').exists(): + raise Exception("Could not find resources data files in " + "downloaded tar.") + + geoip_dir = dst_directory.child('GeoIP') + resources_dir = dst_directory.child('resources') + + if geoip_dir.exists(): + geoip_dir.remove() + tmp_download_directory.child('GeoIP').moveTo(geoip_dir) + + if resources_dir.exists(): + resources_dir.remove() + tmp_download_directory.child('resources').moveTo(resources_dir) + + print("Written GeoIP files to {}".format(geoip_dir.path)) + print("Written resources files to {}".format(resources_dir.path)) + + except Exception as exc: + print("Failed to download resources!") + raise exc + + finally: + tmp_download_directory.remove() diff --git a/ooni/tests/test_geoip.py b/ooni/tests/test_geoip.py index 5e8b67b..66ba13e 100644 --- a/ooni/tests/test_geoip.py +++ b/ooni/tests/test_geoip.py @@ -26,7 +26,6 @@ class TestGeoIP(bases.ConfigTestCase): version = geoip.database_version() assert 'GeoIP' in version.keys() assert 'GeoIPASNum' in version.keys() - assert 'GeoLiteCity' in version.keys()
assert len(version['GeoIP']['sha256']) == 64 assert isinstance(version['GeoIP']['timestamp'], float)