commit 359f87a3dcec921f08af5a2c6b71a0913c8c0b2c Author: juga0 juga@riseup.net Date: Tue Jul 17 16:37:31 2018 +0000
Revert "Refactor to clean v3bw files too"
This reverts commit 380461e56bd87bd2709abe591b6f4b11e1481e35. --- sbws/core/cleanup.py | 87 +++++++++++----------------------------------------- 1 file changed, 18 insertions(+), 69 deletions(-)
diff --git a/sbws/core/cleanup.py b/sbws/core/cleanup.py index 014b85d..149cdd2 100644 --- a/sbws/core/cleanup.py +++ b/sbws/core/cleanup.py @@ -10,8 +10,6 @@ import shutil import logging import time
-from sbws.util.timestamp import unixts_to_dt_obj - log = logging.getLogger(__name__)
@@ -29,21 +27,17 @@ def gen_parser(sub): p.add_argument('--dry-run', action='store_true', help='Don't actually compress or delete anything')
- p.add_argument('--v3bw', action='store_true', help='Clean also v3bw files') -
-def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False): +def _get_older_files_than(dname, num_days_ago, extensions): assert os.path.isdir(dname) - assert isinstance(time_delta, int) + assert isinstance(num_days_ago, int) assert isinstance(extensions, list) for ext in extensions: assert isinstance(ext, str) assert ext[0] == '.' # Determine oldest allowed date today = datetime.utcfromtimestamp(time.time()) - oldest_day = today - timedelta(days=time_delta) - if is_v3bw: - oldest = today - timedelta(minutes=time_delta) + oldest_day = today - timedelta(days=num_days_ago) # Compile a regex that can extract a date from a file name that looks like # /path/to/foo/YYYY-MM-DD*.extension extensions = [re.escape(e) for e in extensions] @@ -56,52 +50,38 @@ def _get_older_files_than(dname, time_delta, extensions, is_v3bw=False): for root, dirs, files in os.walk(dname): for f in files: fname = os.path.join(root, f) - if is_v3bw: # or (v3bw_ext not in fname) - # not forcing files to have correct names just the extension - _, ext = os.path.splitext(fname) - if ext not in ['.v3bw']: - log.debug('Ignoring %s because it doesn't have extension ' - '%s', fname, ext) - continue - dt = unixts_to_dt_obj(os.path.getmtime(fname)) - if dt < oldest and os.path.splitext: - yield fname - else: - match = regex.match(fname) - if not match: - log.debug('Ignoring %s because it doesn't look like ' - 'YYYY-MM-DD', fname) - continue - d = datetime(*[int(n) for n in match.group(1).split('-')]) - if d < oldest_day: - yield fname - - -def _remove_rotten_files(datadir, rotten_days, dry_run=True, is_v3bw=False): + match = regex.match(fname) + if not match: + log.debug('Ignoring %s because it doesn't look like ' + 'YYYY-MM-DD', fname) + continue + d = datetime(*[int(n) for n in match.group(1).split('-')]) + if d < oldest_day: + yield fname + + +def _remove_rotten_files(datadir, rotten_days, dry_run=True): assert os.path.isdir(datadir) assert isinstance(rotten_days, int) # Hold the lock for basically the entire time just in case someone else # moves files between when we get the list of files and when we try to # delete them. - exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw'] with DirectoryLock(datadir): - for fname in _get_older_files_than(datadir, rotten_days, exts, - is_v3bw): + for fname in _get_older_files_than(datadir, rotten_days, + ['.txt', '.txt.gz']): log.info('Deleting %s', fname) if not dry_run: os.remove(fname)
-def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False): +def _compress_stale_files(datadir, stale_days, dry_run=True): assert os.path.isdir(datadir) assert isinstance(stale_days, int) # Hold the lock for basically the entire time just in case someone else # moves files between when we get the list of files and when we try to # compress them. - exts = ['.txt', '.txt.gz'] if not is_v3bw else ['.v3bw'] with DirectoryLock(datadir): - for fname in _get_older_files_than(datadir, stale_days, exts, - is_v3bw): + for fname in _get_older_files_than(datadir, stale_days, ['.txt']): log.info('Compressing %s', fname) if dry_run: continue @@ -112,24 +92,6 @@ def _compress_stale_files(datadir, stale_days, dry_run=True, is_v3bw=False): os.remove(fname)
-def _check_validity_periods(valid, stale, rotten): - if stale - 2 < valid: - fail_hard('For safetly, cleanup/stale_* (%d) must be at least 2 ' - 'days larger than general/data_period or general/valid_ * ' - '(%d)', stale, valid) - if rotten < stale: - fail_hard('cleanup/rotten_* (%d) must be the same or larger than ' - 'cleanup/stale_* (%d)', rotten, stale) - - if stale / 2 < valid: - log.warning( - 'cleanup/stale_ (%d) is less than twice ' - 'general/data_period or general/valid_*(%d). ' - 'For ease of parsing older results ' - 'if necessary, it is recommended to make stale at least ' - 'twice the data_period.', stale, valid) - - def main(args, conf): ''' Main entry point in to the cleanup command. @@ -164,16 +126,3 @@ def main(args, conf):
_remove_rotten_files(datadir, rotten_days, dry_run=args.dry_run) _compress_stale_files(datadir, stale_days, dry_run=args.dry_run) - - if args.v3bw: - v3bw_dir = conf['paths']['v3bw_dname'] - if not os.path.isdir(datadir): - fail_hard('%s does not exist', v3bw_dir) - valid = conf.getint('general', 'valid_mins_v3bw_files') - stale = conf.getint('cleanup', 'stale_mins_v3bw_files') - rotten = conf.getint('cleanup', 'rotten_mins_v3bw_files') - _check_validity_periods(valid, stale, rotten) - _remove_rotten_files(v3bw_dir, rotten, dry_run=args.dry_run, - is_v3bw=True) - _compress_stale_files(v3bw_dir, stale, dry_run=args.dry_run, - is_v3bw=True)
tor-commits@lists.torproject.org