commit 0d5ed52e5906260e142ef9cfa12752810fd1ffa2 Author: Philipp Winter phw@nymity.ch Date: Mon Aug 12 13:52:10 2019 -0700
Fix broken download of Tor exit relays.
The periodic download of Tor exit relays was broken for a number of reasons. Here's what we're doing to fix this issue:
1. We're moving the proxies variable outside of state. The problem is that the state object is written to disk and reloaded every 30 minutes (a cron job is triggering this reload by running ~/bridgedb-admin/reload-bridgedb). The reload causes state.proxies to be at a different memory address than before the reload, which breaks the looping call that fetches new exit relays every three hours. This looping call expects to write exit relays to the same memory address each time, but after BridgeDB's first reload, the memory address changed, so exit relays are no longer updated. There's no need to keep our proxies in BridgeDB's state. We fetch them continuously anyway, and also right after BridgeDB starts.
2. We're adding the method replaceExitRelays(). Once we have a new batch of exit relay addresses, this method allows us to completely overwrite the past batch.
3. We're adding the argument "setStdout=False" to the call to startLogging() because otherwise we're missing the download script's output. --- bridgedb/main.py | 7 +++---- bridgedb/proxy.py | 15 +++++++++++++-- scripts/get-tor-exits | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/bridgedb/main.py b/bridgedb/main.py index c1e6250..1a617b2 100644 --- a/bridgedb/main.py +++ b/bridgedb/main.py @@ -359,7 +359,6 @@ def run(options, reactor=reactor): moatDistributor = None
# Save our state - state.proxies = proxies state.key = key state.save()
@@ -411,13 +410,13 @@ def run(options, reactor=reactor): logging.info("Reloading the list of open proxies...") for proxyfile in cfg.PROXY_LIST_FILES: logging.info("Loading proxies from: %s" % proxyfile) - proxy.loadProxiesFromFile(proxyfile, state.proxies, removeStale=True) + proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
logging.info("Reparsing bridge descriptors...") (hashring, emailDistributorTmp, ipDistributorTmp, - moatDistributorTmp) = createBridgeRings(cfg, state.proxies, key) + moatDistributorTmp) = createBridgeRings(cfg, proxies, key) logging.info("Bridges loaded: %d" % len(hashring))
# Initialize our DB. @@ -483,7 +482,7 @@ def run(options, reactor=reactor): if config.TASKS['GET_TOR_EXIT_LIST']: tasks['GET_TOR_EXIT_LIST'] = task.LoopingCall( proxy.downloadTorExits, - state.proxies, + proxies, config.SERVER_PUBLIC_EXTERNAL_IP)
if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'): diff --git a/bridgedb/proxy.py b/bridgedb/proxy.py index 39cd109..6d93c48 100644 --- a/bridgedb/proxy.py +++ b/bridgedb/proxy.py @@ -51,7 +51,7 @@ def downloadTorExits(proxyList, ipaddress, port=443, protocol=None): """ proto = ExitListProtocol() if protocol is None else protocol() args = [proto.script, '--stdout', '-a', ipaddress, '-p', str(port)] - proto.deferred.addCallback(proxyList.addExitRelays) + proto.deferred.addCallback(proxyList.replaceExitRelays) proto.deferred.addErrback(logging.exception) transport = reactor.spawnProcess(proto, proto.script, args=args, env={}) return proto.deferred @@ -76,7 +76,7 @@ def loadProxiesFromFile(filename, proxySet=None, removeStale=False): :returns: A list of all the proxies listed in the **files* (regardless of whether they were added or removed). """ - logging.info("Reloading proxy lists...") + logging.info("Reloading proxy lists from file %s" % filename)
addresses = []
@@ -256,6 +256,17 @@ class ProxySet(MutableSet): logging.info("Loading exit relays into proxy list...") [self.add(x, self._exitTag) for x in relays]
+ def replaceExitRelays(self, relays): + existingExitRelays = self.getAllWithTag(self._exitTag) + logging.debug("Replacing %d existing with %d new exit relays." % + (len(existingExitRelays), len(relays))) + + for relay in existingExitRelays: + self.discard(relay) + + self.addExitRelays(relays) + + def getTag(self, ip): """Get the tag for an **ip** in this ``ProxySet``, if available.
diff --git a/scripts/get-tor-exits b/scripts/get-tor-exits index fcd9fff..6ab2201 100755 --- a/scripts/get-tor-exits +++ b/scripts/get-tor-exits @@ -36,7 +36,7 @@ from twisted.internet.error import DNSLookupError from twisted.internet.error import TimeoutError
-log.startLogging(sys.stderr) +log.startLogging(sys.stderr, setStdout=False)
def backupFile(filename):
tor-commits@lists.torproject.org