commit 3a1789bddf7aecd74a9ed784f38ebb1a9f45f2de Author: Matt Traudt sirmatt@ksu.edu Date: Wed Aug 1 19:35:53 2018 -0400
Use juga's code to cleanup result files too --- sbws/core/cleanup.py | 132 +++++++++++++++++---------------------------------- 1 file changed, 43 insertions(+), 89 deletions(-)
diff --git a/sbws/core/cleanup.py b/sbws/core/cleanup.py index ad5b286..7dc7e78 100644 --- a/sbws/core/cleanup.py +++ b/sbws/core/cleanup.py @@ -7,7 +7,6 @@ from sbws.util.timestamp import unixts_to_dt_obj from argparse import ArgumentDefaultsHelpFormatter from datetime import datetime from datetime import timedelta -import re import os import gzip import shutil @@ -64,38 +63,6 @@ def _get_files_mtime_older_than(dname, days_delta, extensions): yield fname
-def _get_older_files_than(dname, num_days_ago, extensions): - assert os.path.isdir(dname) - assert isinstance(num_days_ago, int) - assert isinstance(extensions, list) - for ext in extensions: - assert isinstance(ext, str) - assert ext[0] == '.' - # Determine oldest allowed date - today = datetime.utcfromtimestamp(time.time()) - oldest_day = today - timedelta(days=num_days_ago) - # Compile a regex that can extract a date from a file name that looks like - # /path/to/foo/YYYY-MM-DD*.extension - extensions = [re.escape(e) for e in extensions] - day_part = '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]' - regex = re.compile(r'^.*/({}).*({})$' - .format(day_part, '|'.join(extensions))) - # Walk through all files in the given dname, find files that match the - # regex, and yield the ones that contain a date in the file name that is - # too old. - for root, dirs, files in os.walk(dname): - for f in files: - fname = os.path.join(root, f) - match = regex.match(fname) - if not match: - log.debug('Ignoring %s because it doesn't look like ' - 'YYYY-MM-DD', fname) - continue - d = datetime(*[int(n) for n in match.group(1).split('-')]) - if d < oldest_day: - yield fname - - def _delete_files(dname, files, dry_run=True): """Delete the files passed as argument.""" assert os.path.isdir(dname) @@ -108,20 +75,6 @@ def _delete_files(dname, files, dry_run=True): os.remove(fname)
-def _remove_rotten_files(datadir, rotten_days, dry_run=True): - assert os.path.isdir(datadir) - assert isinstance(rotten_days, int) - # Hold the lock for basically the entire time just in case someone else - # moves files between when we get the list of files and when we try to - # delete them. - with DirectoryLock(datadir): - for fname in _get_older_files_than(datadir, rotten_days, - ['.txt', '.txt.gz']): - log.info('Deleting %s', fname) - if not dry_run: - os.remove(fname) - - def _compress_files(dname, files, dry_run=True): """Compress the files passed as argument.""" assert os.path.isdir(dname) @@ -139,24 +92,6 @@ def _compress_files(dname, files, dry_run=True): os.remove(fname)
-def _compress_stale_files(datadir, stale_days, dry_run=True): - assert os.path.isdir(datadir) - assert isinstance(stale_days, int) - # Hold the lock for basically the entire time just in case someone else - # moves files between when we get the list of files and when we try to - # compress them. - with DirectoryLock(datadir): - for fname in _get_older_files_than(datadir, stale_days, ['.txt']): - log.info('Compressing %s', fname) - if dry_run: - continue - with open(fname, 'rt') as in_fd: - out_fname = fname + '.gz' - with gzip.open(out_fname, 'wt') as out_fd: - shutil.copyfileobj(in_fd, out_fd) - os.remove(fname) - - def _check_validity_periods_v3bw(compress_after_days, delete_after_days): if 1 <= compress_after_days and compress_after_days < delete_after_days: return True @@ -164,6 +99,26 @@ def _check_validity_periods_v3bw(compress_after_days, delete_after_days): "after a bigger number of days.")
+def _check_validity_periods_results( + data_period, compress_after_days, delete_after_days): + if compress_after_days - 2 < data_period: + fail_hard( + 'For safetly, cleanup/stale_days (%d) must be at least 2 days ' + 'larger than general/data_period (%d)', compress_after_days, + data_period) + if delete_after_days < compress_after_days: + fail_hard( + 'cleanup/rotten_days (%d) must be the same or larger than ' + 'cleanup/stale_days (%d)', delete_after_days, compress_after_days) + if compress_after_days / 2 < data_period: + log.warning( + 'cleanup/stale_days (%d) is less than twice ' + 'general/data_period (%d). For ease of parsing older results ' + 'if necessary, it is recommended to make stale_days at least ' + 'twice the data_period.', compress_after_days, data_period) + return True + + def _clean_v3bw_files(args, conf): v3bw_dname = conf['paths']['v3bw_dname'] if not os.path.isdir(v3bw_dname): @@ -186,6 +141,28 @@ def _clean_v3bw_files(args, conf): _compress_files(v3bw_dname, files_to_compress, dry_run=args.dry_run)
+def _clean_result_files(args, conf): + datadir = conf['paths']['datadir'] + if not os.path.isdir(datadir): + fail_hard('%s does not exist', datadir) + data_period = conf.getint('general', 'data_period') + compress_after_days = conf.getint('cleanup', 'stale_days') + delete_after_days = conf.getint('cleanup', 'rotten_days') + _check_validity_periods_results( + data_period, compress_after_days, delete_after_days) + + # first delete so that the files to be deleted are not compressed first + files_to_delete = _get_files_mtime_older_than( + datadir, delete_after_days, ['.txt', '.gz']) + _delete_files(datadir, files_to_delete, dry_run=args.dry_run) + + # when dry_run is true, compress will also show all the files that + # would have been deleted, since they are not really deleted + files_to_compress = _get_files_mtime_older_than( + datadir, compress_after_days, ['.txt']) + _compress_files(datadir, files_to_compress, dry_run=args.dry_run) + + def main(args, conf): ''' Main entry point in to the cleanup command. @@ -200,30 +177,7 @@ def main(args, conf): fail_hard('Nothing to clean.')
if not args.no_results: - datadir = conf['paths']['datadir'] - if not os.path.isdir(datadir): - fail_hard('%s does not exist', datadir) - - fresh_days = conf.getint('general', 'data_period') - stale_days = conf.getint('cleanup', 'stale_days') - rotten_days = conf.getint('cleanup', 'rotten_days') - if stale_days - 2 < fresh_days: - fail_hard('For safetly, cleanup/stale_days (%d) must be at least ' - '2 days larger than general/data_period (%d)', - stale_days, fresh_days) - if rotten_days < stale_days: - fail_hard('cleanup/rotten_days (%d) must be the same or larger ' - 'than cleanup/stale_days (%d)', rotten_days, stale_days) - - if stale_days / 2 < fresh_days: - log.warning( - 'cleanup/stale_days (%d) is less than twice ' - 'general/data_period (%d). For ease of parsing older results ' - 'if necessary, it is recommended to make stale_days at least ' - 'twice the data_period.', stale_days, fresh_days) - - _remove_rotten_files(datadir, rotten_days, dry_run=args.dry_run) - _compress_stale_files(datadir, stale_days, dry_run=args.dry_run) + _clean_result_files(args, conf)
if not args.no_v3bw: _clean_v3bw_files(args, conf)
tor-commits@lists.torproject.org