[tor-commits] [onionperf/develop] Take out Tor summaries and the do_complete switch.

karsten at torproject.org karsten at torproject.org
Thu Jul 16 07:56:03 UTC 2020


commit 95a2f8b482ee882b4ec898a7bbc76018a6d98659
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Jul 14 10:14:22 2020 +0200

    Take out Tor summaries and the do_complete switch.
    
    Implements tpo/metrics/onionperf#40005.
---
 onionperf/analysis.py                | 70 ++++++++++++------------------------
 onionperf/measurement.py             |  2 +-
 onionperf/onionperf                  |  9 ++---
 onionperf/reprocessing.py            |  8 ++---
 onionperf/tests/test_reprocessing.py |  2 +-
 5 files changed, 30 insertions(+), 61 deletions(-)

diff --git a/onionperf/analysis.py b/onionperf/analysis.py
index 19c0192..26b00b2 100644
--- a/onionperf/analysis.py
+++ b/onionperf/analysis.py
@@ -29,13 +29,7 @@ class OPAnalysis(Analysis):
     def add_torctl_file(self, filepath):
         self.torctl_filepaths.append(filepath)
 
-    def get_tor_bandwidth_summary(self, node, direction):
-        try:
-            return self.json_db['data'][node]['tor']['bandwidth_summary'][direction]
-        except:
-            return None
-
-    def analyze(self, do_complete=False, date_filter=None):
+    def analyze(self, date_filter=None):
         if self.did_analysis:
             return
 
@@ -47,7 +41,7 @@ class OPAnalysis(Analysis):
             if len(filepaths) > 0:
                 for filepath in filepaths:
                     logging.info("parsing log file at {0}".format(filepath))
-                    parser.parse(util.DataSource(filepath), do_complete=do_complete)
+                    parser.parse(util.DataSource(filepath), do_complete=True)
 
                 if self.nickname is None:
                     parsed_name = parser.get_name()
@@ -134,7 +128,7 @@ class OPAnalysis(Analysis):
 
 class Parser(object, metaclass=ABCMeta):
     @abstractmethod
-    def parse(self, source, do_complete):
+    def parse(self, source, do_complete=True):
         pass
     @abstractmethod
     def get_data(self):
@@ -270,14 +264,10 @@ class TorCtlParser(Parser):
 
     def __init__(self, date_filter=None):
         ''' date_filter should be given in UTC '''
-        self.do_complete = False
-        self.bandwidth_summary = {'bytes_read':{}, 'bytes_written':{}}
         self.circuits_state = {}
         self.circuits = {}
-        self.circuits_summary = {'buildtimes':[], 'lifetimes':[]}
         self.streams_state = {}
         self.streams = {}
-        self.streams_summary = {'lifetimes':{}}
         self.name = None
         self.boot_succeeded = False
         self.build_timeout_last = None
@@ -320,15 +310,10 @@ class TorCtlParser(Parser):
 
                 data = circ.get_data()
                 if data is not None:
-                    if built is not None and started is not None and len(data['path']) == 3:
-                        self.circuits_summary['buildtimes'].append(built - started)
-                    if ended is not None and started is not None:
-                        self.circuits_summary['lifetimes'].append(ended - started)
-                    if self.do_complete:
-                        self.circuits[cid] = data
+                    self.circuits[cid] = data
                 self.circuits_state.pop(cid)
 
-        elif self.do_complete and isinstance(event, CircMinorEvent):
+        elif isinstance(event, CircMinorEvent):
             if event.purpose != event.old_purpose or event.event != CircEvent.PURPOSE_CHANGED:
                 key = "{0}:{1}".format(event.event, event.purpose)
                 circ.add_event(key, arrival_dt)
@@ -364,15 +349,9 @@ class TorCtlParser(Parser):
 
             data = strm.get_data()
             if data is not None:
-                if self.do_complete:
-                    self.streams[sid] = data
-                self.streams_summary['lifetimes'].setdefault(stream_type, []).append(ended - started)
+                self.streams[sid] = data
             self.streams_state.pop(sid)
 
-    def __handle_bw(self, event, arrival_dt):
-        self.bandwidth_summary['bytes_read'][int(arrival_dt)] = event.read
-        self.bandwidth_summary['bytes_written'][int(arrival_dt)] = event.written
-
     def __handle_buildtimeout(self, event, arrival_dt):
         self.build_timeout_last = event.timeout
         self.build_quantile_last = event.quantile
@@ -382,8 +361,6 @@ class TorCtlParser(Parser):
             self.__handle_circuit(event, arrival_dt)
         elif isinstance(event, StreamEvent):
             self.__handle_stream(event, arrival_dt)
-        elif isinstance(event, BandwidthEvent):
-            self.__handle_bw(event, arrival_dt)
         elif isinstance(event, BuildTimeoutSetEvent):
             self.__handle_buildtimeout(event, arrival_dt)
 
@@ -408,27 +385,26 @@ class TorCtlParser(Parser):
             elif re.search("BOOTSTRAP", line) is not None and re.search("PROGRESS=100", line) is not None:
                 self.boot_succeeded = True
 
-        if self.do_complete or (self.do_complete is False and re.search("650\sBW", line) is not None):
-            # parse with stem
-            timestamps, sep, raw_event_str = line.partition(" 650 ")
-            if sep == '':
-                return True
+        # parse with stem
+        timestamps, sep, raw_event_str = line.partition(" 650 ")
+        if sep == '':
+            return True
+
+        # event.arrived_at is also available but at worse granularity
+        unix_ts = float(timestamps.strip().split()[2])
 
-            # event.arrived_at is also available but at worse granularity
-            unix_ts = float(timestamps.strip().split()[2])
+        # check if we should ignore the line
+        line_date = datetime.datetime.utcfromtimestamp(unix_ts).date()
+        if not self.__is_date_valid(line_date):
+            return True
 
-            # check if we should ignore the line
-            line_date = datetime.datetime.utcfromtimestamp(unix_ts).date()
-            if not self.__is_date_valid(line_date):
-                return True
+        event = ControlMessage.from_str("{0} {1}".format(sep.strip(), raw_event_str))
+        convert('EVENT', event)
+        self.__handle_event(event, unix_ts)
 
-            event = ControlMessage.from_str("{0} {1}".format(sep.strip(), raw_event_str))
-            convert('EVENT', event)
-            self.__handle_event(event, unix_ts)
         return True
 
-    def parse(self, source, do_complete=False):
-        self.do_complete = do_complete
+    def parse(self, source, do_complete=True):
         source.open(newline='\r\n')
         for line in source:
             # ignore line parsing errors
@@ -442,9 +418,7 @@ class TorCtlParser(Parser):
         source.close()
 
     def get_data(self):
-        return {'circuits': self.circuits, 'circuits_summary': self.circuits_summary,
-                'streams':self.streams, 'streams_summary': self.streams_summary,
-                'bandwidth_summary': self.bandwidth_summary}
+        return {'circuits': self.circuits, 'streams': self.streams}
 
     def get_name(self):
         return self.name
diff --git a/onionperf/measurement.py b/onionperf/measurement.py
index 9d84d4e..823f37a 100644
--- a/onionperf/measurement.py
+++ b/onionperf/measurement.py
@@ -157,7 +157,7 @@ def logrotate_thread_task(writables, tgen_writable, torctl_writable, docroot, ni
                         anal.add_torctl_file(torctl_writable.rotate_file(filename_datetime=next_midnight))
 
                     # run the analysis, i.e. parse the files
-                    anal.analyze(do_simple=False)
+                    anal.analyze()
 
                     # save the results in onionperf json format in the www docroot
                     anal.save(output_prefix=docroot, do_compress=True, date_prefix=next_midnight.date())
diff --git a/onionperf/onionperf b/onionperf/onionperf
index 5e2c2fb..36520ad 100755
--- a/onionperf/onionperf
+++ b/onionperf/onionperf
@@ -279,11 +279,6 @@ files generated by this script will be written""",
         action="store", dest="date_prefix",
         default=None)
 
-    analyze_parser.add_argument('-s', '--do-simple-parse',
-        help="""parse and export only summary statistics rather than full transfer/circuit/stream data""",
-        action="store_false", dest="do_complete",
-        default=True)
-
     # visualize
     visualize_parser = sub_parser.add_parser('visualize', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
         formatter_class=my_formatter_class)
@@ -387,7 +382,7 @@ def analyze(args):
             analysis.add_tgen_file(args.tgen_logpath)
         if args.torctl_logpath is not None:
             analysis.add_torctl_file(args.torctl_logpath)
-        analysis.analyze(args.do_complete, date_filter=args.date_filter)
+        analysis.analyze(date_filter=args.date_filter)
         analysis.save(output_prefix=args.prefix, date_prefix=args.date_prefix)
 
     elif args.tgen_logpath is not None and os.path.isdir(args.tgen_logpath) and args.torctl_logpath is not None and os.path.isdir(args.torctl_logpath):
@@ -396,7 +391,7 @@ def analyze(args):
         torctl_logs = reprocessing.collect_logs(args.torctl_logpath, '*torctl.log*')
         log_pairs = reprocessing.match(tgen_logs, torctl_logs, args.date_filter)
         logging.info("Found {0} matching log pairs to be reprocessed".format(len(log_pairs)))
-        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname, args.do_complete)
+        reprocessing.multiprocess_logs(log_pairs, args.prefix, args.nickname)
 
     else:
         logging.error("Given paths were an unrecognized mix of file and directory paths, nothing will be analyzed")
diff --git a/onionperf/reprocessing.py b/onionperf/reprocessing.py
index 8d8dcd4..d949dca 100644
--- a/onionperf/reprocessing.py
+++ b/onionperf/reprocessing.py
@@ -46,21 +46,21 @@ def match(tgen_logs, tor_logs, date_filter):
     return log_pairs
 
 
-def analyze_func(prefix, nick, do_complete, pair):
+def analyze_func(prefix, nick, pair):
     analysis = OPAnalysis(nickname=nick)
     logging.info('Analysing pair for date {0}'.format(pair[2]))
     analysis.add_tgen_file(pair[0])
     analysis.add_torctl_file(pair[1])
-    analysis.analyze(do_complete=do_complete, date_filter=pair[2])
+    analysis.analyze(date_filter=pair[2])
     analysis.save(output_prefix=prefix)
     return 1
 
 
-def multiprocess_logs(log_pairs, prefix, nick=None, do_complete=False):
+def multiprocess_logs(log_pairs, prefix, nick=None):
     pool = Pool(cpu_count())
     analyses = None
     try:
-        func = partial(analyze_func, prefix, nick, do_complete)
+        func = partial(analyze_func, prefix, nick)
         mr = pool.map_async(func, log_pairs)
         pool.close()
         while not mr.ready():
diff --git a/onionperf/tests/test_reprocessing.py b/onionperf/tests/test_reprocessing.py
index 5e758d2..b51d154 100644
--- a/onionperf/tests/test_reprocessing.py
+++ b/onionperf/tests/test_reprocessing.py
@@ -61,7 +61,7 @@ def test_log_match_with_wrong_filter_date():
 def test_analyze_func_json():
     pair = (DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.tgen.log', DATA_DIR + 'logs/onionperf_2019-01-10_23:59:59.torctl.log', datetime.datetime(2019, 1, 10, 0, 0))
     work_dir = tempfile.mkdtemp()
-    reprocessing.analyze_func(work_dir, None, False, pair)
+    reprocessing.analyze_func(work_dir, None, pair)
     json_file = os.path.join(work_dir, "2019-01-10.onionperf.analysis.json.xz")
     assert(os.path.exists(json_file))
     shutil.rmtree(work_dir)





More information about the tor-commits mailing list