commit 95b749a8fc690825c0a828b8473c58faea7ad912 Author: Ana Custura ana@netstat.org.uk Date: Thu Sep 10 01:51:54 2020 +0100
Move filters and filter metadata to analysis files --- onionperf/filtering.py | 25 ++++++++++++++++++------- onionperf/onionperf | 9 +-------- onionperf/visualization.py | 14 +++++++++----- 3 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/onionperf/filtering.py b/onionperf/filtering.py index 1b614d6..c008c03 100644 --- a/onionperf/filtering.py +++ b/onionperf/filtering.py @@ -7,6 +7,7 @@
import re from onionperf.analysis import OPAnalysis +from collections import defaultdict
class Filtering(object):
@@ -14,9 +15,11 @@ class Filtering(object): self.fingerprints_to_include = None self.fingerprints_to_exclude = None self.fingerprint_pattern = re.compile("$?([0-9a-fA-F]{40})") + self.filters = defaultdict(list)
def include_fingerprints(self, path): self.fingerprints_to_include = [] + self.fingerprints_to_include_path = path with open(path, 'rt') as f: for line in f: fingerprint_match = self.fingerprint_pattern.match(line) @@ -26,6 +29,7 @@ class Filtering(object):
def exclude_fingerprints(self, path): self.fingerprints_to_exclude = [] + self.fingerprints_to_exclude_path = path with open(path, 'rt') as f: for line in f: fingerprint_match = self.fingerprint_pattern.match(line) @@ -33,12 +37,16 @@ class Filtering(object): fingerprint = fingerprint_match.group(1).upper() self.fingerprints_to_exclude.append(fingerprint)
- def apply_filters(self, input_path, output_dir, output_file): - self.analysis = OPAnalysis.load(filename=input_path) + def filter_tor_circuits(self, analysis): if self.fingerprints_to_include is None and self.fingerprints_to_exclude is None: return - for source in self.analysis.get_nodes(): - tor_circuits = self.analysis.get_tor_circuits(source) + self.filters["tor/circuits"] = [] + if self.fingerprints_to_include: + self.filters["tor/circuits"].append({"name": "include_fingerprints", "filepath": self.fingerprints_to_include_path }) + if self.fingerprints_to_exclude: + self.filters["tor/circuits"].append({"name": "exclude_fingerprints", "filepath": self.fingerprints_to_exclude_path }) + for source in analysis.get_nodes(): + tor_circuits = analysis.get_tor_circuits(source) filtered_circuit_ids = [] for circuit_id, tor_circuit in tor_circuits.items(): keep = False @@ -56,8 +64,11 @@ class Filtering(object): keep = False break if not keep: - filtered_circuit_ids.append(circuit_id) - for circuit_id in filtered_circuit_ids: - del(tor_circuits[circuit_id]) + tor_circuits[circuit_id]["filtered"] = True + + def apply_filters(self, input_path, output_dir, output_file): + self.analysis = OPAnalysis.load(filename=input_path) + self.filter_tor_circuits(self.analysis) + self.analysis.json_db["filters"] = self.filters self.analysis.save(filename=output_file, output_prefix=output_dir, sort_keys=False)
diff --git a/onionperf/onionperf b/onionperf/onionperf index 1efa8cb..108af4e 100755 --- a/onionperf/onionperf +++ b/onionperf/onionperf @@ -342,13 +342,6 @@ files generated by this script will be written""", required="True", action=PathStringArgsAction, dest="datasets")
- visualize_parser.add_argument('--outer-join', - help="""Include measurements without an existing mapping between TGen - transfers/streams and Tor streams/circuits, which is the - equivalent of an outer join in the database sense""", - action="store_true", dest="outer_join", - default=False) - visualize_parser.add_argument('-p', '--prefix', help="a STRING filename prefix for graphs we generate", metavar="STRING", type=str, @@ -489,7 +482,7 @@ def visualize(args): if analysis is not None: analyses.append(analysis) tgen_viz.add_dataset(analyses, label) - tgen_viz.plot_all(args.prefix, outer_join=args.outer_join) + tgen_viz.plot_all(args.prefix)
def type_nonnegative_integer(value): i = int(value) diff --git a/onionperf/visualization.py b/onionperf/visualization.py index 0f69879..f5bc03f 100644 --- a/onionperf/visualization.py +++ b/onionperf/visualization.py @@ -31,11 +31,11 @@ class Visualization(object, metaclass=ABCMeta):
class TGenVisualization(Visualization):
- def plot_all(self, output_prefix, outer_join=False): + def plot_all(self, output_prefix): if len(self.datasets) > 0: prefix = output_prefix + '.' if output_prefix is not None else '' ts = time.strftime("%Y-%m-%d_%H:%M:%S") - self.__extract_data_frame(outer_join) + self.__extract_data_frame() self.data.to_csv("{0}onionperf.viz.{1}.csv".format(prefix, ts)) sns.set_context("paper") self.page = PdfPages("{0}onionperf.viz.{1}.pdf".format(prefix, ts)) @@ -51,7 +51,7 @@ class TGenVisualization(Visualization): self.__plot_errors_time() self.page.close()
- def __extract_data_frame(self, outer_join=False): + def __extract_data_frame(self): streams = [] for (analyses, label) in self.datasets: for analysis in analyses: @@ -145,8 +145,12 @@ class TGenVisualization(Visualization): if "failure_reason_remote" in tor_stream: error_code_parts.append(tor_stream["failure_reason_remote"]) stream["error_code"] = "/".join(error_code_parts) - if tor_circuit or outer_join: - streams.append(stream) + + if "filters" in analysis.json_db.keys() and analysis.json_db["filters"]["tor/circuits"]: + if tor_circuit and "filtered" not in tor_circuit.keys(): + streams.append(stream) + else: + streams.append(stream) self.data = pd.DataFrame.from_records(streams, index="id")
def __plot_firstbyte_ecdf(self):