[bridgedb/develop] Clean up *.unparseable descriptor files more than 24 hours old.

commit b6c740f79a4b9263e49dfe9f14314425b496aa40 Author: Isis Lovecruft <isis@torproject.org> Date: Mon May 2 14:03:21 2016 +0000 Clean up *.unparseable descriptor files more than 24 hours old. * ADD a config options for a new DELETE_UNPARSEABLE_DESCRIPTORS task, which, by default, runs once every 24 hours and removes *.unparseable descriptor files more than 24 hours old. * ADD file deletion utility `bridgedb.util.deleteFilesOlderThan` and a scheduled function, `bridgedb.runner.cleanupUnparseableDescriptors`, which calls the former. * ADD unittests for `bridgedb.util.deleteFilesOlderThan`. * FIXES #18237: https://bugs.torproject.org/18237 --- bridgedb.conf | 2 ++ bridgedb/Main.py | 12 ++++++++++++ bridgedb/runner.py | 29 +++++++++++++++++++++++++++++ bridgedb/util.py | 21 +++++++++++++++++++++ test/test_util.py | 31 +++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+) diff --git a/bridgedb.conf b/bridgedb.conf index 7805e15..52a8ca7 100644 --- a/bridgedb.conf +++ b/bridgedb.conf @@ -260,6 +260,8 @@ TASKS = { # scripts/get-exit-list) and add those exit relays to the list of proxies # loaded from the PROXY_LIST_FILES: 'GET_TOR_EXIT_LIST': 3 * 60 * 60, + # Delete *.unparseable descriptor files which are more than 24 hours old: + 'DELETE_UNPARSEABLE_DESCRIPTORS': 24 * 60 * 60, } # SUPPORTED_TRANSPORTS is a dictionary mapping Pluggable Transport methodnames diff --git a/bridgedb/Main.py b/bridgedb/Main.py index b281d21..bf4c213 100644 --- a/bridgedb/Main.py +++ b/bridgedb/Main.py @@ -23,6 +23,7 @@ from twisted.internet import task from bridgedb import crypto from bridgedb import persistent from bridgedb import proxy +from bridgedb import runner from bridgedb import util from bridgedb.bridges import MalformedBridgeInfo from bridgedb.bridges import MissingServerDescriptorDigest @@ -453,6 +454,17 @@ def run(options, reactor=reactor): state.proxies, config.SERVER_PUBLIC_EXTERNAL_IP) + if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'): + delUnparseableSecs = config.TASKS['DELETE_UNPARSEABLE_DESCRIPTORS'] + else: + delUnparseableSecs = 24 * 60 * 60 # Default to 24 hours + + # We use the directory name of STATUS_FILE, since that directory + # is where the *.unparseable descriptor files will be written to. + tasks['DELETE_UNPARSEABLE_DESCRIPTORS'] = task.LoopingCall( + runner.cleanupUnparseableDescriptors, + os.path.dirname(config.STATUS_FILE), delUnparseableSecs) + # Schedule all configured repeating tasks: for name, seconds in config.TASKS.items(): if seconds: diff --git a/bridgedb/runner.py b/bridgedb/runner.py index 6ac069f..597b1b2 100644 --- a/bridgedb/runner.py +++ b/bridgedb/runner.py @@ -17,12 +17,41 @@ from __future__ import print_function +import glob import logging import sys import os from twisted.python import procutils +from bridgedb import util + + +def cleanupUnparseableDescriptors(directory, seconds): + """Delete any ``*.unparseable`` descriptor files in ``directory`` with + mtimes more than ``seconds`` ago. + + The :func:`bridgedb.parsers._copyUnparseableDescriptors` function + will make copies of any files we attempt to parse which contain + unparseable descriptors. This function should run on a timer to + clean them up. + + :param str directory: The directory in which to search for unparseable + descriptors. + :param int olderThan: If a file's mtime is more than this number + (in seconds), it will be deleted. + """ + files = [] + + for pattern in ["*.unparseable", "*.unparseable.xz"]: + files.extend(glob.glob(os.sep.join([directory, pattern]))) + + if files: + logging.info("Deleting old unparseable descriptor files...") + logging.debug("Considered for deletion: %s" % "\n".join(files)) + + deleted = util.deleteFilesOlderThan(files, seconds) + logging.info("Deleted %d unparseable descriptor files." % len(deleted)) def find(filename): """Find the executable ``filename``. diff --git a/bridgedb/util.py b/bridgedb/util.py index 4c558c4..42e4664 100644 --- a/bridgedb/util.py +++ b/bridgedb/util.py @@ -18,6 +18,7 @@ import logging import logging.config import logging.handlers import os +import time from twisted.python import components @@ -144,6 +145,26 @@ def configureLogging(cfg): logging.info("Level: %s", logLevel) logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis")) +def deleteFilesOlderThan(files, seconds): + """Delete any file in ``files`` with an mtime more than ``seconds`` ago. + + :param list files: A list of paths to files which should be + considered for deletion. + :param int seconds: If a file's mtime is more than this number (in + seconds), it will be deleted. + :rtype: list + :returns: A list of the deleted files. + """ + deleted = [] + now = int(time.time()) + + for fn in files: + if (now - os.stat(fn).st_mtime) > seconds: + os.unlink(fn) + deleted.append(fn) + + return deleted + def levenshteinDistance(s1, s2, len1=None, len2=None, offset1=0, offset2=0, memo=None): """Compute the Levenstein Distance between two strings. diff --git a/test/test_util.py b/test/test_util.py index da4ddf4..848ce12 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -17,6 +17,7 @@ from __future__ import unicode_literals import logging import os +import time from twisted.mail.smtp import Address from twisted.trial import unittest @@ -71,6 +72,36 @@ class MiscLoggingUtilTests(unittest.TestCase): util.logging.info("BridgeDB's email address: bridges@torproject.org") +class FileUtilityTests(unittest.TestCase): + """Unittests for `bridgedb.util.deleteFilesOlderThan`.""" + + def setUp(self): + self._directory = self.id() + self.newfile = os.sep.join([self._directory, "newfile"]) + self.oldfile = os.sep.join([self._directory, "oldfile"]) + self.testfiles = [self.newfile, self.oldfile] + os.mkdir(self._directory) + + now = time.time() + + for fn in self.testfiles: + with open(fn, "w") as fd: + fd.flush() + + # Change the mtime of the "oldfile" to be two days old: + os.utime(self.oldfile, (now, now - (48 * 60 * 60))) + + def test_deleteFilesOlderThan_deletes_old_files(self): + """The function should delete appropriate files.""" + deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60) + self.assertIn(self.oldfile, deleted) + + def test_deleteFilesOlderThan_keeps_new_files(self): + """The function should delete appropriate files.""" + deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60) + self.assertNotIn(self.newfile, deleted) + + class LevenshteinDistanceTests(unittest.TestCase): """Unittests for `bridgedb.util.levenshteinDistance."""
participants (1)
-
isis@torproject.org