[tor-commits] [bridgedb/master] Fix broken download of Tor exit relays.

phw at torproject.org phw at torproject.org
Tue Aug 20 16:56:57 UTC 2019


commit 0d5ed52e5906260e142ef9cfa12752810fd1ffa2
Author: Philipp Winter <phw at nymity.ch>
Date:   Mon Aug 12 13:52:10 2019 -0700

    Fix broken download of Tor exit relays.
    
    The periodic download of Tor exit relays was broken for a number of
    reasons.  Here's what we're doing to fix this issue:
    
    1. We're moving the proxies variable outside of state.  The problem is
       that the state object is written to disk and reloaded every 30
       minutes (a cron job is triggering this reload by running
       ~/bridgedb-admin/reload-bridgedb).  The reload causes state.proxies
       to be at a different memory address than before the reload, which
       breaks the looping call that fetches new exit relays every three
       hours.  This looping call expects to write exit relays to the same
       memory address each time, but after BridgeDB's first reload, the
       memory address changed, so exit relays are no longer updated.
       There's no need to keep our proxies in BridgeDB's state.  We fetch
       them continuously anyway, and also right after BridgeDB starts.
    
    2. We're adding the method replaceExitRelays().  Once we have a new
       batch of exit relay addresses, this method allows us to completely
       overwrite the past batch.
    
    3. We're adding the argument "setStdout=False" to the call to
       startLogging() because otherwise we're missing the download script's
       output.
---
 bridgedb/main.py      |  7 +++----
 bridgedb/proxy.py     | 15 +++++++++++++--
 scripts/get-tor-exits |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/bridgedb/main.py b/bridgedb/main.py
index c1e6250..1a617b2 100644
--- a/bridgedb/main.py
+++ b/bridgedb/main.py
@@ -359,7 +359,6 @@ def run(options, reactor=reactor):
     moatDistributor = None
 
     # Save our state
-    state.proxies = proxies
     state.key = key
     state.save()
 
@@ -411,13 +410,13 @@ def run(options, reactor=reactor):
         logging.info("Reloading the list of open proxies...")
         for proxyfile in cfg.PROXY_LIST_FILES:
             logging.info("Loading proxies from: %s" % proxyfile)
-            proxy.loadProxiesFromFile(proxyfile, state.proxies, removeStale=True)
+            proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
 
         logging.info("Reparsing bridge descriptors...")
         (hashring,
          emailDistributorTmp,
          ipDistributorTmp,
-         moatDistributorTmp) = createBridgeRings(cfg, state.proxies, key)
+         moatDistributorTmp) = createBridgeRings(cfg, proxies, key)
         logging.info("Bridges loaded: %d" % len(hashring))
 
         # Initialize our DB.
@@ -483,7 +482,7 @@ def run(options, reactor=reactor):
         if config.TASKS['GET_TOR_EXIT_LIST']:
             tasks['GET_TOR_EXIT_LIST'] = task.LoopingCall(
                 proxy.downloadTorExits,
-                state.proxies,
+                proxies,
                 config.SERVER_PUBLIC_EXTERNAL_IP)
 
         if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
diff --git a/bridgedb/proxy.py b/bridgedb/proxy.py
index 39cd109..6d93c48 100644
--- a/bridgedb/proxy.py
+++ b/bridgedb/proxy.py
@@ -51,7 +51,7 @@ def downloadTorExits(proxyList, ipaddress, port=443, protocol=None):
     """
     proto = ExitListProtocol() if protocol is None else protocol()
     args = [proto.script, '--stdout', '-a', ipaddress, '-p', str(port)]
-    proto.deferred.addCallback(proxyList.addExitRelays)
+    proto.deferred.addCallback(proxyList.replaceExitRelays)
     proto.deferred.addErrback(logging.exception)
     transport = reactor.spawnProcess(proto, proto.script, args=args, env={})
     return proto.deferred
@@ -76,7 +76,7 @@ def loadProxiesFromFile(filename, proxySet=None, removeStale=False):
     :returns: A list of all the proxies listed in the **files* (regardless of
         whether they were added or removed).
     """
-    logging.info("Reloading proxy lists...")
+    logging.info("Reloading proxy lists from file %s" % filename)
 
     addresses = []
 
@@ -256,6 +256,17 @@ class ProxySet(MutableSet):
         logging.info("Loading exit relays into proxy list...")
         [self.add(x, self._exitTag) for x in relays]
 
+    def replaceExitRelays(self, relays):
+        existingExitRelays = self.getAllWithTag(self._exitTag)
+        logging.debug("Replacing %d existing with %d new exit relays." %
+                      (len(existingExitRelays), len(relays)))
+
+        for relay in existingExitRelays:
+            self.discard(relay)
+
+        self.addExitRelays(relays)
+
+
     def getTag(self, ip):
         """Get the tag for an **ip** in this ``ProxySet``, if available.
 
diff --git a/scripts/get-tor-exits b/scripts/get-tor-exits
index fcd9fff..6ab2201 100755
--- a/scripts/get-tor-exits
+++ b/scripts/get-tor-exits
@@ -36,7 +36,7 @@ from twisted.internet.error import DNSLookupError
 from twisted.internet.error import TimeoutError
 
 
-log.startLogging(sys.stderr)
+log.startLogging(sys.stderr, setStdout=False)
 
 
 def backupFile(filename):





More information about the tor-commits mailing list