[tor-commits] [sbws/master] Revert "Refactor to clean v3bw files too"

pastly at torproject.org pastly at torproject.org
Thu Aug 9 14:21:19 UTC 2018


commit 359f87a3dcec921f08af5a2c6b71a0913c8c0b2c
Author: juga0 <juga at riseup.net>
Date:   Tue Jul 17 16:37:31 2018 +0000

    Revert "Refactor to clean v3bw files too"
    
    This reverts commit 380461e56bd87bd2709abe591b6f4b11e1481e35.
---
 sbws/core/cleanup.py | 87 +++++++++++-----------------------------------------
 1 file changed, 18 insertions(+), 69 deletions(-)

diff --git a/sbws/core/cleanup.py b/sbws/core/cleanup.py
index 014b85d..149cdd2 100644
--- a/sbws/core/cleanup.py
+++ b/sbws/core/cleanup.py
@@ -10,8 +10,6 @@ import shutil
 import logging
 import time
 
-from sbws.util.timestamp import unixts_to_dt_obj
-
 log = logging.getLogger(__name__)
 
 
@@ -29,21 +27,17 @@ def gen_parser(sub):
     p.add_argument('--dry-run', action='store_true',
                    help='Don\'t actually compress or delete anything')
 
-    p.add_argument('--v3bw', action='store_true', help='Clean also v3bw files')
-
 
-def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False):
+def _get_older_files_than(dname, num_days_ago, extensions):
     assert os.path.isdir(dname)
-    assert isinstance(time_delta, int)
+    assert isinstance(num_days_ago, int)
     assert isinstance(extensions, list)
     for ext in extensions:
         assert isinstance(ext, str)
         assert ext[0] == '.'
     # Determine oldest allowed date
     today = datetime.utcfromtimestamp(time.time())
-    oldest_day = today - timedelta(days=time_delta)
-    if is_v3bw:
-        oldest = today - timedelta(minutes=time_delta)
+    oldest_day = today - timedelta(days=num_days_ago)
     # Compile a regex that can extract a date from a file name that looks like
     # /path/to/foo/YYYY-MM-DD*.extension
     extensions = [re.escape(e) for e in extensions]
@@ -56,52 +50,38 @@ def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False):
     for root, dirs, files in os.walk(dname):
         for f in files:
             fname = os.path.join(root, f)
-            if is_v3bw:  # or (v3bw_ext not in fname)
-                # not forcing files to have correct names just the extension
-                _, ext = os.path.splitext(fname)
-                if ext not in ['.v3bw']:
-                    log.debug('Ignoring %s because it doesn\'t have extension '
-                              '%s', fname, ext)
-                    continue
-                dt = unixts_to_dt_obj(os.path.getmtime(fname))
-                if dt < oldest and os.path.splitext:
-                    yield fname
-            else:
-                match = regex.match(fname)
-                if not match:
-                    log.debug('Ignoring %s because it doesn\'t look like '
-                              'YYYY-MM-DD', fname)
-                    continue
-                d = datetime(*[int(n) for n in match.group(1).split('-')])
-                if d < oldest_day:
-                    yield fname
-
-
-def _remove_rotten_files(datadir, rotten_days, dry_run=True, is_v3bw=False):
+            match = regex.match(fname)
+            if not match:
+                log.debug('Ignoring %s because it doesn\'t look like '
+                          'YYYY-MM-DD', fname)
+                continue
+            d = datetime(*[int(n) for n in match.group(1).split('-')])
+            if d < oldest_day:
+                yield fname
+
+
+def _remove_rotten_files(datadir, rotten_days, dry_run=True):
     assert os.path.isdir(datadir)
     assert isinstance(rotten_days, int)
     # Hold the lock for basically the entire time just in case someone else
     # moves files between when we get the list of files and when we try to
     # delete them.
-    exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
     with DirectoryLock(datadir):
-        for fname in _get_older_files_than(datadir, rotten_days, exts,
-                                           is_v3bw):
+        for fname in _get_older_files_than(datadir, rotten_days,
+                                           ['.txt', '.txt.gz']):
             log.info('Deleting %s', fname)
             if not dry_run:
                 os.remove(fname)
 
 
-def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False):
+def _compress_stale_files(datadir, stale_days, dry_run=True):
     assert os.path.isdir(datadir)
     assert isinstance(stale_days, int)
     # Hold the lock for basically the entire time just in case someone else
     # moves files between when we get the list of files and when we try to
     # compress them.
-    exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw']
     with DirectoryLock(datadir):
-        for fname in _get_older_files_than(datadir, stale_days, exts,
-                                           is_v3bw):
+        for fname in _get_older_files_than(datadir, stale_days, ['.txt']):
             log.info('Compressing %s', fname)
             if dry_run:
                 continue
@@ -112,24 +92,6 @@ def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False):
             os.remove(fname)
 
 
-def _check_validity_periods(valid, stale, rotten):
-    if stale - 2 < valid:
-        fail_hard('For safetly, cleanup/stale_* (%d) must be at least 2 '
-                  'days larger than general/data_period or general/valid_ * '
-                  '(%d)', stale, valid)
-    if rotten < stale:
-        fail_hard('cleanup/rotten_* (%d) must be the same or larger than '
-                  'cleanup/stale_* (%d)', rotten, stale)
-
-    if stale / 2 < valid:
-        log.warning(
-            'cleanup/stale_ (%d) is less than twice '
-            'general/data_period or general/valid_*(%d). '
-            'For ease of parsing older results '
-            'if necessary, it is recommended to make stale at least '
-            'twice the data_period.', stale, valid)
-
-
 def main(args, conf):
     '''
     Main entry point in to the cleanup command.
@@ -164,16 +126,3 @@ def main(args, conf):
 
     _remove_rotten_files(datadir, rotten_days, dry_run=args.dry_run)
     _compress_stale_files(datadir, stale_days, dry_run=args.dry_run)
-
-    if args.v3bw:
-        v3bw_dir = conf['paths']['v3bw_dname']
-        if not os.path.isdir(datadir):
-            fail_hard('%s does not exist', v3bw_dir)
-        valid = conf.getint('general', 'valid_mins_v3bw_files')
-        stale = conf.getint('cleanup', 'stale_mins_v3bw_files')
-        rotten = conf.getint('cleanup', 'rotten_mins_v3bw_files')
-        _check_validity_periods(valid, stale, rotten)
-        _remove_rotten_files(v3bw_dir, rotten, dry_run=args.dry_run,
-                             is_v3bw=True)
-        _compress_stale_files(v3bw_dir, stale, dry_run=args.dry_run,
-                              is_v3bw=True)





More information about the tor-commits mailing list