[tor-commits] [onionperf/master] Integrate reprocessing mode into analysis mode.

karsten at torproject.org karsten at torproject.org
Tue May 12 10:27:07 UTC 2020


commit f0b985e4e1b2a20c1968014e25126f874de684c6
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon May 11 10:16:36 2020 +0200

    Integrate reprocessing mode into analysis mode.
    
    Tweaked by acute to carry the -s and -t arguments to reprocessing.
    
    Implements #34142.
---
 onionperf/onionperf                  | 106 +++++++++--------------------------
 onionperf/reprocessing.py            |  13 +++--
 onionperf/tests/test_reprocessing.py |   6 +-
 3 files changed, 37 insertions(+), 88 deletions(-)

diff --git a/onionperf/onionperf b/onionperf/onionperf
index 536d6e2..cb1899c 100755
--- a/onionperf/onionperf
+++ b/onionperf/onionperf
@@ -63,18 +63,12 @@ The standard way to run this subcommand is to give the path to a TGen and/or
 a TorCtl file (e.g., those produced with the `measure` subcommand) using the
 `--tgen` and `--torctl` options, and the statistics file resulting from the
 analysis will be dumped to `onionperf.analysis.json.xz`.
-(See https://collector.torproject.org/#type-torperf.)
+Another way to run this subcommand is to give two paths to directories
+containing TGen and TorCtl files to have files matched by filename and analysis
+files dumped to `%Y-%m-%d.onionperf.analysis.json.xz`.
+(See https://collector.torproject.org/#type-onionperf.)
 Stats files in the default Torperf format can also be exported.
 """
-DESC_REPROCESS = """
-Reprocesses results in bulk from the TGen traffic generator and Tor.
-
-This subcommand scans for TGen and Tor log files in the given paths,
-matches them by filename and then runs the analysis command on each pair,
-reproducing all analysis results.
-
-This is useful when reprocessing logs in bulk, for example
-"""
 
 HELP_ANALYZE = """
 Analyze Tor and TGen output
@@ -86,9 +80,6 @@ and plots various interesting performance metrics to PDF files.
 HELP_VISUALIZE = """
 Visualize OnionPerf analysis results
 """
-HELP_REPROCESS = """
-Reprocesses all OnionPerf log files in the given paths
-"""
 
 logging.basicConfig(format='%(asctime)s %(created)f [onionperf] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
 logging.getLogger("stem").setLevel(logging.WARN)
@@ -240,13 +231,13 @@ built-in Torperf (50KiB, 1MiB, 5MiB) traffic model""",
     analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)
 
     analyze_parser.add_argument('--tgen',
-        help="""a file PATH to a TGen logfile""",
+        help="""a file or directory PATH to a TGen logfile or logfile directory""",
         metavar="PATH", type=type_str_path_in,
         action="store", dest="tgen_logpath",
         default=None)
 
     analyze_parser.add_argument('--torctl',
-        help="""a file PATH to a TorCtl logfile (in the format output by the monitor subcommmand)""",
+        help="""a file or directory PATH to a TorCtl logfile or logfile directory (in the format output by the monitor subcommmand)""",
         metavar="PATH", type=type_str_path_in,
         action="store", dest="torctl_logpath",
         default=None)
@@ -355,43 +346,6 @@ files generated by this script will be written""",
         action="store", dest="lineformats",
         default=util.LINEFORMATS)
 
-    # reprocess
-    reprocess_parser = sub_parser.add_parser('reprocess', description=DESC_REPROCESS, help=HELP_REPROCESS,
-        formatter_class=my_formatter_class)
-    reprocess_parser.set_defaults(func=reprocess, formatter_class=my_formatter_class)
-
-    reprocess_parser.add_argument('--tgen-dir',
-        help="""a dictory PATH to a TGen logfile directory""",
-        metavar="PATH", type=type_str_path_in,
-        action="store", dest="tgen_dirpath",
-        default=None)
-
-    reprocess_parser.add_argument('--torctl-dir',
-        help="""a directory PATH to a TorCtl logfile directory""",
-        metavar="PATH", type=type_str_path_in,
-        action="store", dest="torctl_dirpath",
-        default=None)
-
-    reprocess_parser.add_argument('-p', '--prefix',
-        help="""A directory PATH prefix where the processed data
-files generated by this script will be written""",
-        metavar="PATH", type=type_str_dir_path_out,
-        action="store", dest="prefix",
-        default=os.getcwd())
-
-    reprocess_parser.add_argument('-d', '--date-filter',
-        help="""a DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""",
-        metavar="DATE", type=type_str_date_in,
-        action="store", dest="date_filter",
-        default=None)
-
-    reprocess_parser.add_argument('-n', '--nickname',
-        help="""a nickname STRING that identifies the machine where the input logfiles were produced""",
-        metavar="STRING", type=str,
-        action="store", dest="nickname",
-        default=None)
-
-
     # get args and call the command handler for the chosen mode
     args = main_parser.parse_args()
     args.func(args)
@@ -450,23 +404,32 @@ def measure(args):
         logging.info("Please fix path errors to continue")
 
 def analyze(args):
-    from onionperf.analysis import Analysis
 
     if args.tgen_logpath is None and args.torctl_logpath is None:
         logging.warning("No logfile paths were given, nothing will be analyzed")
-        return
-
-    analysis = Analysis(nickname=args.nickname, ip_address=args.ip_address)
 
-    if args.tgen_logpath is not None:
-        analysis.add_tgen_file(args.tgen_logpath)
-    if args.torctl_logpath is not None:
-        analysis.add_torctl_file(args.torctl_logpath)
+    elif (args.tgen_logpath is None or os.path.isfile(args.tgen_logpath)) and (args.torctl_logpath is None or os.path.isfile(args.torctl_logpath)):
+        from onionperf.analysis import Analysis
+        analysis = Analysis(nickname=args.nickname, ip_address=args.ip_address)
+        if args.tgen_logpath is not None:
+            analysis.add_tgen_file(args.tgen_logpath)
+        if args.torctl_logpath is not None:
+            analysis.add_torctl_file(args.torctl_logpath)
+        analysis.analyze(args.do_simple, date_filter=args.date_filter)
+        analysis.save(output_prefix=args.prefix)
+        if args.save_torperf:
+            analysis.export_torperf_version_1_1(output_prefix=args.prefix, do_compress=False)
+
+    elif args.tgen_logpath is not None and os.path.isdir(args.tgen_logpath) and args.torctl_logpath is not None and os.path.isdir(args.torctl_logpath):
+        from onionperf import reprocessing
+        tgen_logs = reprocessing.collect_logs(args.tgen_logpath, '*tgen.log*')
+        torctl_logs = reprocessing.collect_logs(args.torctl_logpath, '*torctl.log*')
+        log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter)
+        logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs)))
+        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname, args.save_torperf, args.do_simple)
 
-    analysis.analyze(args.do_simple, date_filter=args.date_filter)
-    analysis.save(output_prefix=args.prefix)
-    if args.save_torperf:
-        analysis.export_torperf_version_1_1(output_prefix=args.prefix, do_compress=False)
+    else:
+        logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")
 
 def visualize(args):
     from onionperf.visualization import TGenVisualization, TorVisualization
@@ -489,21 +452,6 @@ def visualize(args):
     tgen_viz.plot_all(args.prefix)
     tor_viz.plot_all(args.prefix)
 
-def reprocess(args):
-    from onionperf import reprocessing
-    if args.tgen_dirpath is None or args.torctl_dirpath is None:
-        logging.error("Required directory paths were not given - nothing will be reprocessed")
-        return
-    elif not os.path.isdir(args.tgen_dirpath) or not os.path.isdir(args.torctl_dirpath):
-        logging.error("One or more given paths do not exist or are not directories - nothing will be reprocessed")
-        return
-    else:
-        tgen_logs = reprocessing.collect_logs(args.tgen_dirpath, '*tgen.log')
-        torctl_logs = reprocessing.collect_logs(args.torctl_dirpath, '*torctl.log')
-        log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter)
-        logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs)))
-        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname)
-
 def type_nonnegative_integer(value):
     i = int(value)
     if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
diff --git a/onionperf/reprocessing.py b/onionperf/reprocessing.py
index 7acf539..48f67bb 100644
--- a/onionperf/reprocessing.py
+++ b/onionperf/reprocessing.py
@@ -46,23 +46,24 @@ def match(tgen_logs, tor_logs, date_filter):
     return log_pairs
 
 
-def analyze_func(prefix, nick, pair):
+def analyze_func(prefix, nick, save_torperf, do_simple, pair):
     analysis = Analysis(nickname=nick)
     logging.info('Analysing pair for date {0}'.format(pair[2]))
     analysis.add_tgen_file(pair[0])
     analysis.add_torctl_file(pair[1])
-    analysis.analyze(do_simple=False, date_filter=pair[2])
+    analysis.analyze(do_simple=do_simple, date_filter=pair[2])
     analysis.save(output_prefix=prefix)
-    analysis.export_torperf_version_1_1(
-        output_prefix=prefix, do_compress=False)
+    if save_torperf:
+        analysis.export_torperf_version_1_1(
+            output_prefix=prefix, do_compress=False)
     return 1
 
 
-def multiprocess_logs(log_pairs, prefix, nick=None):
+def multiprocess_logs(log_pairs, prefix, nick=None, save_torperf=False, do_simple=False):
     pool = Pool(cpu_count())
     analyses = None
     try:
-        func = partial(analyze_func, prefix, nick)
+        func = partial(analyze_func, prefix, nick, save_torperf, do_simple)
         mr = pool.map_async(func, log_pairs)
         pool.close()
         while not mr.ready():
diff --git a/onionperf/tests/test_reprocessing.py b/onionperf/tests/test_reprocessing.py
index efacc5f..a120587 100644
--- a/onionperf/tests/test_reprocessing.py
+++ b/onionperf/tests/test_reprocessing.py
@@ -61,7 +61,7 @@ def test_log_match_with_wrong_filter_date():
 def test_analyze_func_json():
     pair = (DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))
     work_dir = tempfile.mkdtemp()
-    reprocessing.analyze_func(work_dir, None, pair)
+    reprocessing.analyze_func(work_dir, None, True, False, pair)
     json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz")
     assert(os.path.exists(json_file))
     for i in ['51200',  '5242880', '1048576']: 
@@ -72,7 +72,7 @@ def test_analyze_func_json():
 def test_multiprocess_logs():
     pairs = [(DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))]
     work_dir = tempfile.mkdtemp()
-    reprocessing.multiprocess_logs(pairs, work_dir)
+    reprocessing.multiprocess_logs(pairs, work_dir, save_torperf=True)
     json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz")
     assert(os.path.exists(json_file))
     for i in ['51200',  '5242880', '1048576']: 
@@ -85,7 +85,7 @@ def test_end_to_end():
     torctl_logs = reprocessing.collect_logs(DATA_DIR, '*torctl.log')
     log_pairs =  reprocessing.match(tgen_logs, torctl_logs, None)
     work_dir = tempfile.mkdtemp()
-    reprocessing.multiprocess_logs(log_pairs, work_dir)
+    reprocessing.multiprocess_logs(log_pairs, work_dir, save_torperf=True)
     json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz")
     assert(os.path.exists(json_file))
     for i in ['51200',  '5242880', '1048576']: 



More information about the tor-commits mailing list