commit f0b985e4e1b2a20c1968014e25126f874de684c6 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon May 11 10:16:36 2020 +0200
Integrate reprocessing mode into analysis mode.
Tweaked by acute to carry the -s and -t arguments to reprocessing.
Implements #34142. --- onionperf/onionperf | 106 +++++++++-------------------------- onionperf/reprocessing.py | 13 +++-- onionperf/tests/test_reprocessing.py | 6 +- 3 files changed, 37 insertions(+), 88 deletions(-)
diff --git a/onionperf/onionperf b/onionperf/onionperf index 536d6e2..cb1899c 100755 --- a/onionperf/onionperf +++ b/onionperf/onionperf @@ -63,18 +63,12 @@ The standard way to run this subcommand is to give the path to a TGen and/or a TorCtl file (e.g., those produced with the `measure` subcommand) using the `--tgen` and `--torctl` options, and the statistics file resulting from the analysis will be dumped to `onionperf.analysis.json.xz`. -(See https://collector.torproject.org/#type-torperf.) +Another way to run this subcommand is to give two paths to directories +containing TGen and TorCtl files to have files matched by filename and analysis +files dumped to `%Y-%m-%d.onionperf.analysis.json.xz`. +(See https://collector.torproject.org/#type-onionperf.) Stats files in the default Torperf format can also be exported. """ -DESC_REPROCESS = """ -Reprocesses results in bulk from the TGen traffic generator and Tor. - -This subcommand scans for TGen and Tor log files in the given paths, -matches them by filename and then runs the analysis command on each pair, -reproducing all analysis results. - -This is useful when reprocessing logs in bulk, for example -"""
HELP_ANALYZE = """ Analyze Tor and TGen output @@ -86,9 +80,6 @@ and plots various interesting performance metrics to PDF files. HELP_VISUALIZE = """ Visualize OnionPerf analysis results """ -HELP_REPROCESS = """ -Reprocesses all OnionPerf log files in the given paths -"""
logging.basicConfig(format='%(asctime)s %(created)f [onionperf] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') logging.getLogger("stem").setLevel(logging.WARN) @@ -240,13 +231,13 @@ built-in Torperf (50KiB, 1MiB, 5MiB) traffic model""", analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)
analyze_parser.add_argument('--tgen', - help="""a file PATH to a TGen logfile""", + help="""a file or directory PATH to a TGen logfile or logfile directory""", metavar="PATH", type=type_str_path_in, action="store", dest="tgen_logpath", default=None)
analyze_parser.add_argument('--torctl', - help="""a file PATH to a TorCtl logfile (in the format output by the monitor subcommmand)""", + help="""a file or directory PATH to a TorCtl logfile or logfile directory (in the format output by the monitor subcommmand)""", metavar="PATH", type=type_str_path_in, action="store", dest="torctl_logpath", default=None) @@ -355,43 +346,6 @@ files generated by this script will be written""", action="store", dest="lineformats", default=util.LINEFORMATS)
- # reprocess - reprocess_parser = sub_parser.add_parser('reprocess', description=DESC_REPROCESS, help=HELP_REPROCESS, - formatter_class=my_formatter_class) - reprocess_parser.set_defaults(func=reprocess, formatter_class=my_formatter_class) - - reprocess_parser.add_argument('--tgen-dir', - help="""a dictory PATH to a TGen logfile directory""", - metavar="PATH", type=type_str_path_in, - action="store", dest="tgen_dirpath", - default=None) - - reprocess_parser.add_argument('--torctl-dir', - help="""a directory PATH to a TorCtl logfile directory""", - metavar="PATH", type=type_str_path_in, - action="store", dest="torctl_dirpath", - default=None) - - reprocess_parser.add_argument('-p', '--prefix', - help="""A directory PATH prefix where the processed data -files generated by this script will be written""", - metavar="PATH", type=type_str_dir_path_out, - action="store", dest="prefix", - default=os.getcwd()) - - reprocess_parser.add_argument('-d', '--date-filter', - help="""a DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""", - metavar="DATE", type=type_str_date_in, - action="store", dest="date_filter", - default=None) - - reprocess_parser.add_argument('-n', '--nickname', - help="""a nickname STRING that identifies the machine where the input logfiles were produced""", - metavar="STRING", type=str, - action="store", dest="nickname", - default=None) - - # get args and call the command handler for the chosen mode args = main_parser.parse_args() args.func(args) @@ -450,23 +404,32 @@ def measure(args): logging.info("Please fix path errors to continue")
def analyze(args): - from onionperf.analysis import Analysis
if args.tgen_logpath is None and args.torctl_logpath is None: logging.warning("No logfile paths were given, nothing will be analyzed") - return - - analysis = Analysis(nickname=args.nickname, ip_address=args.ip_address)
- if args.tgen_logpath is not None: - analysis.add_tgen_file(args.tgen_logpath) - if args.torctl_logpath is not None: - analysis.add_torctl_file(args.torctl_logpath) + elif (args.tgen_logpath is None or os.path.isfile(args.tgen_logpath)) and (args.torctl_logpath is None or os.path.isfile(args.torctl_logpath)): + from onionperf.analysis import Analysis + analysis = Analysis(nickname=args.nickname, ip_address=args.ip_address) + if args.tgen_logpath is not None: + analysis.add_tgen_file(args.tgen_logpath) + if args.torctl_logpath is not None: + analysis.add_torctl_file(args.torctl_logpath) + analysis.analyze(args.do_simple, date_filter=args.date_filter) + analysis.save(output_prefix=args.prefix) + if args.save_torperf: + analysis.export_torperf_version_1_1(output_prefix=args.prefix, do_compress=False) + + elif args.tgen_logpath is not None and os.path.isdir(args.tgen_logpath) and args.torctl_logpath is not None and os.path.isdir(args.torctl_logpath): + from onionperf import reprocessing + tgen_logs = reprocessing.collect_logs(args.tgen_logpath, '*tgen.log*') + torctl_logs = reprocessing.collect_logs(args.torctl_logpath, '*torctl.log*') + log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter) + logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs))) + reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname, args.save_torperf, args.do_simple)
- analysis.analyze(args.do_simple, date_filter=args.date_filter) - analysis.save(output_prefix=args.prefix) - if args.save_torperf: - analysis.export_torperf_version_1_1(output_prefix=args.prefix, do_compress=False) + else: + logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")
def visualize(args): from onionperf.visualization import TGenVisualization, TorVisualization @@ -489,21 +452,6 @@ def visualize(args): tgen_viz.plot_all(args.prefix) tor_viz.plot_all(args.prefix)
-def reprocess(args): - from onionperf import reprocessing - if args.tgen_dirpath is None or args.torctl_dirpath is None: - logging.error("Required directory paths were not given - nothing will be reprocessed") - return - elif not os.path.isdir(args.tgen_dirpath) or not os.path.isdir(args.torctl_dirpath): - logging.error("One or more given paths do not exist or are not directories - nothing will be reprocessed") - return - else: - tgen_logs = reprocessing.collect_logs(args.tgen_dirpath, '*tgen.log') - torctl_logs = reprocessing.collect_logs(args.torctl_dirpath, '*torctl.log') - log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter) - logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs))) - reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname) - def type_nonnegative_integer(value): i = int(value) if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value) diff --git a/onionperf/reprocessing.py b/onionperf/reprocessing.py index 7acf539..48f67bb 100644 --- a/onionperf/reprocessing.py +++ b/onionperf/reprocessing.py @@ -46,23 +46,24 @@ def match(tgen_logs, tor_logs, date_filter): return log_pairs
-def analyze_func(prefix, nick, pair): +def analyze_func(prefix, nick, save_torperf, do_simple, pair): analysis = Analysis(nickname=nick) logging.info('Analysing pair for date {0}'.format(pair[2])) analysis.add_tgen_file(pair[0]) analysis.add_torctl_file(pair[1]) - analysis.analyze(do_simple=False, date_filter=pair[2]) + analysis.analyze(do_simple=do_simple, date_filter=pair[2]) analysis.save(output_prefix=prefix) - analysis.export_torperf_version_1_1( - output_prefix=prefix, do_compress=False) + if save_torperf: + analysis.export_torperf_version_1_1( + output_prefix=prefix, do_compress=False) return 1
-def multiprocess_logs(log_pairs, prefix, nick=None): +def multiprocess_logs(log_pairs, prefix, nick=None, save_torperf=False, do_simple=False): pool = Pool(cpu_count()) analyses = None try: - func = partial(analyze_func, prefix, nick) + func = partial(analyze_func, prefix, nick, save_torperf, do_simple) mr = pool.map_async(func, log_pairs) pool.close() while not mr.ready(): diff --git a/onionperf/tests/test_reprocessing.py b/onionperf/tests/test_reprocessing.py index efacc5f..a120587 100644 --- a/onionperf/tests/test_reprocessing.py +++ b/onionperf/tests/test_reprocessing.py @@ -61,7 +61,7 @@ def test_log_match_with_wrong_filter_date(): def test_analyze_func_json(): pair = (DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0)) work_dir = tempfile.mkdtemp() - reprocessing.analyze_func(work_dir, None, pair) + reprocessing.analyze_func(work_dir, None, True, False, pair) json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") assert(os.path.exists(json_file)) for i in ['51200', '5242880', '1048576']: @@ -72,7 +72,7 @@ def test_analyze_func_json(): def test_multiprocess_logs(): pairs = [(DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))] work_dir = tempfile.mkdtemp() - reprocessing.multiprocess_logs(pairs, work_dir) + reprocessing.multiprocess_logs(pairs, work_dir, save_torperf=True) json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") assert(os.path.exists(json_file)) for i in ['51200', '5242880', '1048576']: @@ -85,7 +85,7 @@ def test_end_to_end(): torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log') log_pairs = reprocessing.match(tgen_logs, torctl_logs, None) work_dir = tempfile.mkdtemp() - reprocessing.multiprocess_logs(log_pairs, work_dir) + reprocessing.multiprocess_logs(log_pairs, work_dir, save_torperf=True) json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz") assert(os.path.exists(json_file)) for i in ['51200', '5242880', '1048576']: