[tor-commits] [bridgedb/master] Clean up *.unparseable descriptor files more than 24 hours old.

isis at torproject.org isis at torproject.org
Thu Jul 28 16:41:11 UTC 2016


commit b6c740f79a4b9263e49dfe9f14314425b496aa40
Author: Isis Lovecruft <isis at torproject.org>
Date:   Mon May 2 14:03:21 2016 +0000

    Clean up *.unparseable descriptor files more than 24 hours old.
    
     * ADD a config options for a new DELETE_UNPARSEABLE_DESCRIPTORS task, which,
       by default, runs once every 24 hours and removes *.unparseable descriptor
       files more than 24 hours old.
     * ADD file deletion utility `bridgedb.util.deleteFilesOlderThan` and a
       scheduled function, `bridgedb.runner.cleanupUnparseableDescriptors`, which
       calls the former.
     * ADD unittests for `bridgedb.util.deleteFilesOlderThan`.
     * FIXES #18237: https://bugs.torproject.org/18237
---
 bridgedb.conf      |  2 ++
 bridgedb/Main.py   | 12 ++++++++++++
 bridgedb/runner.py | 29 +++++++++++++++++++++++++++++
 bridgedb/util.py   | 21 +++++++++++++++++++++
 test/test_util.py  | 31 +++++++++++++++++++++++++++++++
 5 files changed, 95 insertions(+)

diff --git a/bridgedb.conf b/bridgedb.conf
index 7805e15..52a8ca7 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -260,6 +260,8 @@ TASKS = {
     # scripts/get-exit-list) and add those exit relays to the list of proxies
     # loaded from the PROXY_LIST_FILES:
     'GET_TOR_EXIT_LIST': 3 * 60 * 60,
+    # Delete *.unparseable descriptor files which are more than 24 hours old:
+    'DELETE_UNPARSEABLE_DESCRIPTORS': 24 * 60 * 60,
 }
 
 # SUPPORTED_TRANSPORTS is a dictionary mapping Pluggable Transport methodnames
diff --git a/bridgedb/Main.py b/bridgedb/Main.py
index b281d21..bf4c213 100644
--- a/bridgedb/Main.py
+++ b/bridgedb/Main.py
@@ -23,6 +23,7 @@ from twisted.internet import task
 from bridgedb import crypto
 from bridgedb import persistent
 from bridgedb import proxy
+from bridgedb import runner
 from bridgedb import util
 from bridgedb.bridges import MalformedBridgeInfo
 from bridgedb.bridges import MissingServerDescriptorDigest
@@ -453,6 +454,17 @@ def run(options, reactor=reactor):
                 state.proxies,
                 config.SERVER_PUBLIC_EXTERNAL_IP)
 
+        if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
+            delUnparseableSecs = config.TASKS['DELETE_UNPARSEABLE_DESCRIPTORS']
+        else:
+            delUnparseableSecs = 24 * 60 * 60  # Default to 24 hours
+
+        # We use the directory name of STATUS_FILE, since that directory
+        # is where the *.unparseable descriptor files will be written to.
+        tasks['DELETE_UNPARSEABLE_DESCRIPTORS'] = task.LoopingCall(
+            runner.cleanupUnparseableDescriptors,
+            os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
+
         # Schedule all configured repeating tasks:
         for name, seconds in config.TASKS.items():
             if seconds:
diff --git a/bridgedb/runner.py b/bridgedb/runner.py
index 6ac069f..597b1b2 100644
--- a/bridgedb/runner.py
+++ b/bridgedb/runner.py
@@ -17,12 +17,41 @@
 
 from __future__ import print_function
 
+import glob
 import logging
 import sys
 import os
 
 from twisted.python import procutils
 
+from bridgedb import util
+
+
+def cleanupUnparseableDescriptors(directory, seconds):
+    """Delete any ``*.unparseable`` descriptor files in ``directory`` with
+    mtimes more than ``seconds`` ago.
+
+    The :func:`bridgedb.parsers._copyUnparseableDescriptors` function
+    will make copies of any files we attempt to parse which contain
+    unparseable descriptors.  This function should run on a timer to
+    clean them up.
+
+    :param str directory: The directory in which to search for unparseable
+        descriptors.
+    :param int olderThan: If a file's mtime is more than this number
+        (in seconds), it will be deleted.
+    """
+    files = []
+
+    for pattern in ["*.unparseable", "*.unparseable.xz"]:
+        files.extend(glob.glob(os.sep.join([directory, pattern])))
+
+    if files:
+        logging.info("Deleting old unparseable descriptor files...")
+        logging.debug("Considered for deletion: %s" % "\n".join(files))
+
+        deleted = util.deleteFilesOlderThan(files, seconds)
+        logging.info("Deleted %d unparseable descriptor files." % len(deleted))
 
 def find(filename):
     """Find the executable ``filename``.
diff --git a/bridgedb/util.py b/bridgedb/util.py
index 4c558c4..42e4664 100644
--- a/bridgedb/util.py
+++ b/bridgedb/util.py
@@ -18,6 +18,7 @@ import logging
 import logging.config
 import logging.handlers
 import os
+import time
 
 from twisted.python import components
 
@@ -144,6 +145,26 @@ def configureLogging(cfg):
     logging.info("Level: %s", logLevel)
     logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
 
+def deleteFilesOlderThan(files, seconds):
+    """Delete any file in ``files`` with an mtime more than ``seconds`` ago.
+
+    :param list files: A list of paths to files which should be
+        considered for deletion.
+    :param int seconds: If a file's mtime is more than this number (in
+        seconds), it will be deleted.
+    :rtype: list
+    :returns: A list of the deleted files.
+    """
+    deleted = []
+    now = int(time.time())
+
+    for fn in files:
+        if (now - os.stat(fn).st_mtime) > seconds:
+            os.unlink(fn)
+            deleted.append(fn)
+
+    return deleted
+
 def levenshteinDistance(s1, s2, len1=None, len2=None,
                         offset1=0, offset2=0, memo=None):
     """Compute the Levenstein Distance between two strings.
diff --git a/test/test_util.py b/test/test_util.py
index da4ddf4..848ce12 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -17,6 +17,7 @@ from __future__ import unicode_literals
 
 import logging
 import os
+import time
 
 from twisted.mail.smtp import Address
 from twisted.trial import unittest
@@ -71,6 +72,36 @@ class MiscLoggingUtilTests(unittest.TestCase):
         util.logging.info("BridgeDB's email address: bridges at torproject.org")
 
 
+class FileUtilityTests(unittest.TestCase):
+    """Unittests for `bridgedb.util.deleteFilesOlderThan`."""
+
+    def setUp(self):
+        self._directory = self.id()
+        self.newfile = os.sep.join([self._directory, "newfile"])
+        self.oldfile = os.sep.join([self._directory, "oldfile"])
+        self.testfiles = [self.newfile, self.oldfile]
+        os.mkdir(self._directory)
+
+        now = time.time()
+
+        for fn in self.testfiles:
+            with open(fn, "w") as fd:
+                fd.flush()
+
+        # Change the mtime of the "oldfile" to be two days old:
+        os.utime(self.oldfile, (now, now - (48 * 60 * 60)))
+
+    def test_deleteFilesOlderThan_deletes_old_files(self):
+        """The function should delete appropriate files."""
+        deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60)
+        self.assertIn(self.oldfile, deleted)
+
+    def test_deleteFilesOlderThan_keeps_new_files(self):
+        """The function should delete appropriate files."""
+        deleted = util.deleteFilesOlderThan(self.testfiles ,24 * 60 * 60)
+        self.assertNotIn(self.newfile, deleted)
+
+
 class LevenshteinDistanceTests(unittest.TestCase):
     """Unittests for `bridgedb.util.levenshteinDistance."""
 





More information about the tor-commits mailing list