[tor-commits] [ooni-probe/master] Move probe_ip singleton to the geoip module.

art at torproject.org art at torproject.org
Mon Sep 19 12:14:24 UTC 2016


commit 10a802e72c955d450a6a1b56fcb71b4e60d46934
Author: Arturo Filastò <arturo at filasto.net>
Date:   Wed Jul 27 14:24:51 2016 +0200

    Move probe_ip singleton to the geoip module.
    
    Make it cache the probe_ip for 10 minutes and support waiting if another
    probe_ip lookup is in progress already.
    
    Put a probe ip lookup also inside of the deck
---
 ooni/agent/scheduler.py                    | 25 +++++-----------------
 ooni/deck.py                               |  8 ++++++--
 ooni/director.py                           |  7 +++++--
 ooni/geoip.py                              | 33 ++++++++++++++++++++++++++++--
 ooni/nettest.py                            |  9 ++++----
 ooni/nettests/blocking/web_connectivity.py |  8 ++++----
 ooni/resources.py                          |  5 ++++-
 ooni/scripts/oonideckgen.py                |  3 +--
 ooni/settings.py                           |  3 +--
 ooni/templates/httpt.py                    |  5 +++--
 ooni/templates/process.py                  |  7 ++++---
 ooni/tests/test_geoip.py                   |  2 +-
 ooni/ui/cli.py                             |  2 +-
 ooni/ui/web/client/index.html              |  2 +-
 ooni/ui/web/server.py                      | 25 ++++++++++++++--------
 15 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/ooni/agent/scheduler.py b/ooni/agent/scheduler.py
index 3a9da20..9784f8a 100644
--- a/ooni/agent/scheduler.py
+++ b/ooni/agent/scheduler.py
@@ -9,6 +9,7 @@ from ooni.utils import log
 from ooni.deck import input_store
 from ooni.settings import config
 from ooni.contrib import croniter
+from ooni.geoip import probe_ip
 
 class ScheduledTask(object):
     _time_format = "%Y-%m-%dT%H:%M:%SZ"
@@ -73,19 +74,9 @@ class UpdateInputsAndResources(ScheduledTask):
     @defer.inlineCallbacks
     def task(self):
         log.debug("Updating the inputs")
-        yield resources.check_for_update(config.probe_ip.geodata['countrycode'])
-        yield input_store.update(config.probe_ip.geodata['countrycode'])
-
-class UpdateProbeIp(ScheduledTask):
-    identifier = "ooni-update-probe-ip"
-    schedule = "@hourly"
-    # XXX we need to ensure this is always run the first time ooniprobe or
-    # ooniprobe-agent is started or implement on disk caching of the users
-    # IP address.
-
-    def task(self):
-        log.debug("Updating the probe IP")
-        return config.probe_ip.lookup()
+        yield probe_ip.lookup()
+        yield resources.check_for_update(probe_ip.geodata['countrycode'])
+        yield input_store.update(probe_ip.geodata['countrycode'])
 
 class CleanupInProgressReports(ScheduledTask):
     identifier = 'ooni-cleanup-reports'
@@ -97,18 +88,13 @@ class UploadMissingReports(ScheduledTask):
 
 # Order mattters
 SYSTEM_TASKS = [
-    UpdateProbeIp,
     UpdateInputsAndResources
 ]
 
 @defer.inlineCallbacks
-def run_system_tasks(no_geoip=False, no_input_store=False):
+def run_system_tasks(no_input_store=False):
     task_classes = SYSTEM_TASKS[:]
 
-    if no_geoip:
-        log.debug("Not updating probe IP")
-        task_classes.pop(UpdateProbeIp)
-
     if no_input_store:
         log.debug("Not updating the inputs")
         task_classes.pop(UpdateInputsAndResources)
@@ -157,7 +143,6 @@ class SchedulerService(service.MultiService):
     def startService(self):
         service.MultiService.startService(self)
 
-        self.schedule(UpdateProbeIp())
         self.schedule(UpdateInputsAndResources())
 
         self._looping_call.start(self.interval)
diff --git a/ooni/deck.py b/ooni/deck.py
index 0434d81..7976e5e 100644
--- a/ooni/deck.py
+++ b/ooni/deck.py
@@ -20,6 +20,7 @@ from ooni.resources import check_for_update
 from ooni.settings import config
 from ooni.utils import generate_filename
 from ooni.utils import log
+from ooni.geoip import probe_ip
 
 from ooni.results import generate_summary
 
@@ -652,7 +653,9 @@ class DeckTask(object):
 
         self.ooni['net_test_loader'] = net_test_loader
 
+    @defer.inlineCallbacks
     def _setup_ooni(self):
+        yield probe_ip.lookup()
         for input_file in self.ooni['net_test_loader'].inputFiles:
             file_path = resolve_file_path(input_file['filename'], self.cwd)
             input_file['test_options'][input_file['key']] = file_path
@@ -660,7 +663,7 @@ class DeckTask(object):
         self.id = generate_filename(self.ooni['test_details'])
 
     def setup(self):
-        getattr(self, "_setup_"+self.type)()
+        return getattr(self, "_setup_"+self.type)()
 
     def _load(self, data):
         for key in self._metadata_keys:
@@ -918,12 +921,13 @@ class NGDeck(object):
         d.addErrback(self._measurement_failed, task)
         return d
 
+    @defer.inlineCallbacks
     def setup(self):
         """
         This method needs to be called before you are able to run a deck.
         """
         for task in self._tasks:
-            task.setup()
+            yield task.setup()
         self._is_setup = True
 
     @defer.inlineCallbacks
diff --git a/ooni/director.py b/ooni/director.py
index 1ab076b..ad6d1e2 100644
--- a/ooni/director.py
+++ b/ooni/director.py
@@ -12,6 +12,7 @@ from ooni.nettest import NetTest, getNetTestInformation
 from ooni.settings import config
 from ooni.nettest import normalizeTestName
 from ooni.deck import InputStore
+from ooni.geoip import probe_ip
 
 from ooni.agent.scheduler import run_system_tasks
 from ooni.utils.onion import start_tor, connect_to_control_port
@@ -199,11 +200,13 @@ class Director(object):
                 aux = map(lambda x: x in annotations, ["city", "country", "asn"])
             if not all(aux):
                 log.msg("You should add annotations for the country, city and ASN")
+        else:
+            yield probe_ip.lookup()
+            self.notify(DirectorEvent("success", "Looked up probe IP"))
 
         self.notify(DirectorEvent("success",
                                   "Running system tasks"))
-        yield run_system_tasks(no_geoip=no_geoip,
-                               no_input_store=not create_input_store)
+        yield run_system_tasks(no_input_store=not create_input_store)
         self.notify(DirectorEvent("success",
                                   "Ran system tasks"))
 
diff --git a/ooni/geoip.py b/ooni/geoip.py
index 2a7ec92..f271790 100644
--- a/ooni/geoip.py
+++ b/ooni/geoip.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 import re
 import os
 import json
+import time
 import random
 
 from hashlib import sha256
@@ -28,7 +29,7 @@ except ImportError:
 class GeoIPDataFilesNotFound(Exception):
     pass
 
-def IPToLocation(ipaddr):
+def ip_to_location(ipaddr):
     from ooni.settings import config
 
     country_file = config.get_data_file_path(
@@ -152,9 +153,14 @@ class DuckDuckGoGeoIP(HTTPGeoIPLookupper):
         probe_ip = re.search(regexp, j['Answer']).group(1)
         return probe_ip
 
+INITIAL = 0
+IN_PROGRESS = 1
+
 class ProbeIP(object):
     strategy = None
     address = None
+    # How long should we consider geoip results valid?
+    _expire_in = 10*60
 
     def __init__(self):
         self.geoIPServices = {
@@ -168,10 +174,23 @@ class ProbeIP(object):
             'ip': '127.0.0.1'
         }
 
+        self._last_lookup = 0
+        self._reset_state()
+
+    def _reset_state(self):
+        self._state = INITIAL
+        self._looking_up = defer.Deferred()
+        self._looking_up.addCallback(self._looked_up)
+
+    def _looked_up(self, result):
+        self._last_lookup = time.time()
+        self._reset_state()
+        return result
+
     def resolveGeodata(self):
         from ooni.settings import config
 
-        self.geodata = IPToLocation(self.address)
+        self.geodata = ip_to_location(self.address)
         self.geodata['ip'] = self.address
         if not config.privacy.includeasn:
             self.geodata['asn'] = 'AS0'
@@ -182,13 +201,20 @@ class ProbeIP(object):
 
     @defer.inlineCallbacks
     def lookup(self):
+        if self._state == IN_PROGRESS:
+            yield self._looking_up
+        elif self._last_lookup < time.time() - self._expire_in:
+            self.address = None
+
         if self.address:
             defer.returnValue(self.address)
         else:
+            self._state = IN_PROGRESS
             try:
                 yield self.askTor()
                 log.msg("Found your IP via Tor")
                 self.resolveGeodata()
+                self._looking_up.callback(self.address)
                 defer.returnValue(self.address)
             except errors.TorStateNotFound:
                 log.debug("Tor is not running. Skipping IP lookup via Tor.")
@@ -199,6 +225,7 @@ class ProbeIP(object):
                 yield self.askGeoIPService()
                 log.msg("Found your IP via a GeoIP service")
                 self.resolveGeodata()
+                self._looking_up.callback(self.address)
                 defer.returnValue(self.address)
             except Exception:
                 log.msg("Unable to lookup the probe IP via GeoIPService")
@@ -241,3 +268,5 @@ class ProbeIP(object):
             return d
         else:
             raise errors.TorStateNotFound
+
+probe_ip = ProbeIP()
diff --git a/ooni/nettest.py b/ooni/nettest.py
index 88e4953..566c391 100644
--- a/ooni/nettest.py
+++ b/ooni/nettest.py
@@ -13,6 +13,7 @@ from ooni.tasks import Measurement
 from ooni.utils import log, sanitize_options, randomStr
 from ooni.utils.net import hasRawSocketPermission
 from ooni.settings import config
+from ooni.geoip import probe_ip
 
 from ooni import errors as e
 
@@ -192,10 +193,10 @@ class NetTestLoader(object):
 
     def getTestDetails(self):
         return {
-            'probe_asn': config.probe_ip.geodata['asn'],
-            'probe_cc': config.probe_ip.geodata['countrycode'],
-            'probe_ip': config.probe_ip.geodata['ip'],
-            'probe_city': config.probe_ip.geodata['city'],
+            'probe_asn': probe_ip.geodata['asn'],
+            'probe_cc': probe_ip.geodata['countrycode'],
+            'probe_ip': probe_ip.geodata['ip'],
+            'probe_city': probe_ip.geodata['city'],
             'software_name': 'ooniprobe',
             'software_version': ooniprobe_version,
             # XXX only sanitize the input files
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py
index dde6b6f..d8d539f 100644
--- a/ooni/nettests/blocking/web_connectivity.py
+++ b/ooni/nettests/blocking/web_connectivity.py
@@ -350,10 +350,10 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
         if len(control_addrs.intersection(experiment_addrs)) > 0:
             return True
 
-        experiment_asns = set(map(lambda x: geoip.IPToLocation(x)['asn'],
-                              experiment_addrs))
-        control_asns = set(map(lambda x: geoip.IPToLocation(x)['asn'],
-                           control_addrs))
+        experiment_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'],
+                                  experiment_addrs))
+        control_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'],
+                               control_addrs))
 
         # Remove the instance of AS0 when we fail to find the ASN
         control_asns.discard('AS0')
diff --git a/ooni/resources.py b/ooni/resources.py
index 8a0a57f..47ebf86 100644
--- a/ooni/resources.py
+++ b/ooni/resources.py
@@ -4,7 +4,10 @@ import shutil
 
 from twisted.python.filepath import FilePath
 from twisted.internet import defer
-from twisted.web.client import downloadPage, getPage
+from twisted.web.client import downloadPage, getPage, HTTPClientFactory
+
+# Disable logs of HTTPClientFactory
+HTTPClientFactory.noisy = False
 
 from ooni.utils import log, gunzip, rename
 from ooni.settings import config
diff --git a/ooni/scripts/oonideckgen.py b/ooni/scripts/oonideckgen.py
index 10f8673..9b087f9 100644
--- a/ooni/scripts/oonideckgen.py
+++ b/ooni/scripts/oonideckgen.py
@@ -10,7 +10,7 @@ from twisted.python import usage
 
 from ooni.otime import prettyDateNowUTC
 from ooni import errors
-from ooni.geoip import ProbeIP
+from ooni.geoip import probe_ip
 from ooni.resources import check_for_update
 from ooni.settings import config
 from ooni.deck import NGDeck
@@ -86,7 +86,6 @@ def generate_deck(options):
 @defer.inlineCallbacks
 def get_user_country_code():
     config.privacy.includecountry = True
-    probe_ip = ProbeIP()
     yield probe_ip.lookup()
     defer.returnValue(probe_ip.geodata['countrycode'])
 
diff --git a/ooni/settings.py b/ooni/settings.py
index 36670ac..acb1895 100644
--- a/ooni/settings.py
+++ b/ooni/settings.py
@@ -23,8 +23,7 @@ class OConfig(object):
         self.reports = Storage()
         self.scapyFactory = None
         self.tor_state = None
-        # This is used to store the probes IP address obtained via Tor
-        self.probe_ip = geoip.ProbeIP()
+
         self.logging = True
         self.basic = Storage()
         self.advanced = Storage()
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index 857392e..cba8702 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -19,6 +19,7 @@ from ooni.common.txextra import TrueHeaders
 from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent
 from ooni.common.http_utils import representBody
 from ooni.errors import handleAllFailures
+from ooni.geoip import probe_ip
 
 class InvalidSocksProxyOption(Exception):
     pass
@@ -159,9 +160,9 @@ class HTTPTest(NetTestCase):
             else:
                 response_body = ''
             # Attempt to redact the IP address of the probe from the responses
-            if (config.privacy.includeip is False and config.probe_ip.address is not None and
+            if (config.privacy.includeip is False and probe_ip.address is not None and
                     (isinstance(response_body, str) or isinstance(response_body, unicode))):
-                response_body = response_body.replace(config.probe_ip.address, "[REDACTED]")
+                response_body = response_body.replace(probe_ip.address, "[REDACTED]")
             if (getattr(response, 'request', None) and
                     getattr(response.request, 'absoluteURI', None)):
                 session['request']['url'] = response.request.absoluteURI
diff --git a/ooni/templates/process.py b/ooni/templates/process.py
index faf0a66..56fe0fd 100644
--- a/ooni/templates/process.py
+++ b/ooni/templates/process.py
@@ -3,6 +3,7 @@ from twisted.internet import protocol, defer, reactor
 from ooni.settings import config
 from ooni.nettest import NetTestCase
 from ooni.utils import log
+from ooni.geoip import probe_ip
 
 
 class ProcessDirector(protocol.ProcessProtocol):
@@ -108,9 +109,9 @@ class ProcessTest(NetTestCase):
             self.report['commands'] = []
 
         # Attempt to redact the IP address of the probe from the standard output
-        if config.privacy.includeip is False and config.probe_ip.address is not None:
-            result['stdout'] = result['stdout'].replace(config.probe_ip.address, "[REDACTED]")
-            result['stderr'] = result['stderr'].replace(config.probe_ip.address, "[REDACTED]")
+        if config.privacy.includeip is False and probe_ip.address is not None:
+            result['stdout'] = result['stdout'].replace(probe_ip.address, "[REDACTED]")
+            result['stderr'] = result['stderr'].replace(probe_ip.address, "[REDACTED]")
 
         self.report['commands'].append({
             'command_name': ' '.join(command),
diff --git a/ooni/tests/test_geoip.py b/ooni/tests/test_geoip.py
index 66ba13e..8eb964d 100644
--- a/ooni/tests/test_geoip.py
+++ b/ooni/tests/test_geoip.py
@@ -7,7 +7,7 @@ from ooni import geoip
 
 class TestGeoIP(bases.ConfigTestCase):
     def test_ip_to_location(self):
-        location = geoip.IPToLocation('8.8.8.8')
+        location = geoip.ip_to_location('8.8.8.8')
         assert 'countrycode' in location
         assert 'asn' in location
         assert 'city' in location
diff --git a/ooni/ui/cli.py b/ooni/ui/cli.py
index d8a4a8f..51ed3b9 100644
--- a/ooni/ui/cli.py
+++ b/ooni/ui/cli.py
@@ -313,7 +313,7 @@ def runTestWithDirector(director, global_options, url=None,
     @defer.inlineCallbacks
     def post_director_start(_):
         try:
-            deck.setup()
+            yield deck.setup()
             yield deck.run(director)
         except errors.UnableToLoadDeckInput as error:
             raise defer.failure.Failure(error)
diff --git a/ooni/ui/web/client/index.html b/ooni/ui/web/client/index.html
index 7859216..c6d83b7 100644
--- a/ooni/ui/web/client/index.html
+++ b/ooni/ui/web/client/index.html
@@ -13,5 +13,5 @@
     <app>
       Loading...
     </app>
-  <script type="text/javascript" src="app.bundle.js?de2d27ce59f4cee8dd96"></script></body>
+  <script type="text/javascript" src="app.bundle.js?16bac0b4c21c5b120b04"></script></body>
 </html>
diff --git a/ooni/ui/web/server.py b/ooni/ui/web/server.py
index 0a3d1ca..f14f6b8 100644
--- a/ooni/ui/web/server.py
+++ b/ooni/ui/web/server.py
@@ -18,6 +18,7 @@ from ooni.settings import config
 from ooni.utils import log
 from ooni.director import DirectorEvent
 from ooni.results import generate_summary
+from ooni.geoip import probe_ip
 
 config.advanced.debug = True
 
@@ -82,8 +83,8 @@ class WebUIAPI(object):
         self.status = {
             "software_version": ooniprobe_version,
             "software_name": "ooniprobe",
-            "asn": config.probe_ip.geodata['asn'],
-            "country_code": config.probe_ip.geodata['countrycode'],
+            "asn": probe_ip.geodata['asn'],
+            "country_code": probe_ip.geodata['countrycode'],
             "director_started": False
         }
 
@@ -108,8 +109,8 @@ class WebUIAPI(object):
 
     def director_started(self, _):
         self.status['director_started'] = True
-        self.status["asn"] = config.probe_ip.geodata['asn']
-        self.status["country_code"] = config.probe_ip.geodata['countrycode']
+        self.status["asn"] = probe_ip.geodata['asn']
+        self.status["country_code"] = probe_ip.geodata['countrycode']
 
     @app.handle_errors(NotFound)
     def not_found(self, request, _):
@@ -168,10 +169,15 @@ class WebUIAPI(object):
 
         return self.render_json({"command": "deck-list"}, request)
 
+    @defer.inlineCallbacks
     def run_deck(self, deck):
-        deck.setup()
-        # Here there is a dangling deferred
-        deck.run(self.director)
+        # These are dangling deferreds
+        try:
+            yield deck.setup()
+            yield deck.run(self.director)
+        except:
+            self.director_event_poller.notify(DirectorEvent("error",
+                                                            "Failed to start deck"))
 
     @app.route('/api/nettest/<string:test_name>/start', methods=["POST"])
     def api_nettest_start(self, request, test_name):
@@ -219,7 +225,10 @@ class WebUIAPI(object):
 
     @app.route('/api/input', methods=["GET"])
     def api_input_list(self, request):
-        return self.render_json(self.director.input_store.list(), request)
+        input_store_list = self.director.input_store.list()
+        for key, value in input_store_list.items():
+            value.pop('filepath')
+        return self.render_json(input_store_list, request)
 
     @app.route('/api/input/<string:input_id>/content', methods=["GET"])
     def api_input_content(self, request, input_id):





More information about the tor-commits mailing list