[metrics-tasks/master] Add probability graphs (#1854).

commit ad05f28662ddb22c44c88111315ae5b3f2ae7f66 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Mon Nov 26 20:41:29 2012 -0500 Add probability graphs (#1854). --- task-1854/plot-entropy.R | 22 ++++++++++++++- task-1854/pyextract.py | 10 +++++- task-1854/pylinf.py | 66 +++++++++++++++++++++++++++++++++------------ 3 files changed, 77 insertions(+), 21 deletions(-) diff --git a/task-1854/plot-entropy.R b/task-1854/plot-entropy.R index 95b8b18..d62657e 100644 --- a/task-1854/plot-entropy.R +++ b/task-1854/plot-entropy.R @@ -2,7 +2,27 @@ library(ggplot2) library(reshape) library(scales) -e <- read.csv("extracted.csv", header = FALSE, +p <- read.csv("prob-extracted.csv", header = FALSE, + col.names = c("validafter", "minadvbw", "advbw", "cumprob"), + stringsAsFactor = FALSE) +p <- p[p$minadvbw >= 20480, ] +c <- data.frame(x = p$advbw, y = p$cumprob, + colour = as.factor(p$minadvbw)) +ggplot(c, aes(x = x, y = y, colour = colour)) + +geom_line() + +scale_x_log10(name = "\nAdvertised bandwidth in B/s (log scale)") + +scale_y_continuous(name = "Cumulated probability\n") + +scale_colour_hue(name = "Adv. bw. cutoff in B/s") + +opts(legend.position = "top") + +ggplot(c, aes(x = x, y = y, colour = colour)) + +geom_line() + +scale_x_log10(name = "\nAdvertised bandwidth in B/s (log scale)") + +scale_y_log10(name = "Cumulated probability (log scale)\n") + +scale_colour_hue(name = "Adv. bw. cutoff in B/s") + +opts(legend.position = "top") + +e <- read.csv("linf-extracted.csv", header = FALSE, col.names = c("validafter", "min_adv_bw", "relays", "linf", "excl_adv_bw", "graph"), stringsAsFactor = FALSE) diff --git a/task-1854/pyextract.py b/task-1854/pyextract.py index 33614e2..bd11ea4 100644 --- a/task-1854/pyextract.py +++ b/task-1854/pyextract.py @@ -2,11 +2,11 @@ import os import sys def main(): - out_file = open('extracted.csv', 'w') + out_file = open('linf-extracted.csv', 'w') prev_validafter, max_validafter = '', '' max_lines = [] prev_relays, prev_min_adv_bw = 0, 0 - for line in open('entropy.csv'): + for line in open('linf.csv'): parts = line.strip().split(',') validafter = parts[0] min_adv_bw = int(parts[1]) @@ -32,6 +32,12 @@ def main(): prev_min_adv_bw = min_adv_bw for line in max_lines: out_file.write(line + ",last\n") + out_file.close() + prob_out_file = open('prob-extracted.csv', 'w') + for line in open('prob.csv'): + if line.startswith(max_validafter): + prob_out_file.write(line.strip() + '\n') + prob_out_file.close() if __name__ == '__main__': main() diff --git a/task-1854/pylinf.py b/task-1854/pylinf.py index 88cc773..3bdd8a2 100644 --- a/task-1854/pylinf.py +++ b/task-1854/pylinf.py @@ -102,9 +102,12 @@ def load_server_desc(tar_file_path): tar_fh.close() def run(data): + """ Return tuple of two strings, one string containing linf values for + all possible advertised bandwidth cutoffs, and one containing + probability distributions for predefined cutoffs. """ routers = [] router = None - result_string = [] + linf_string, prob_string = [], [] Wed, Wee, Wgd, Wgg = 1, 1, 1, 1 # parse consensus @@ -151,6 +154,8 @@ def run(data): omitted_routers = 0 min_adv_bw = routers[0].advertised_bw + cutoffs = [10240, 20480, 51200, 102400, 204800, 512000, 1048576] + while(omitted_routers<len(routers)): total_bw = 0 @@ -170,11 +175,29 @@ def run(data): diff = abs(new_prob - router.prob) prob_diff.append(diff) - result_string.append(','.join([valid_after, + linf_string.append(','.join([valid_after, str(min_adv_bw), str(len(routers)-omitted_routers), str(max(prob_diff))])) + while len(cutoffs) > 0 and min_adv_bw > cutoffs[0]: + cumulated_prob = 0.0 + prev_advertised_bw = 0 + for router in routers: + if router.advertised_bw > cutoffs[0] and \ + prev_advertised_bw != router.advertised_bw: + prob_string.append(','.join([valid_after, + str(cutoffs[0]), + str(prev_advertised_bw), + str(cumulated_prob)])) + prev_advertised_bw = router.advertised_bw + cumulated_prob += float(router.bandwidth)/float(total_bw) + prob_string.append(','.join([valid_after, + str(cutoffs[0]), + str(prev_advertised_bw), + str(cumulated_prob)])) + cutoffs.pop(0) + # remove routers with min adv_bw for router in routers: if router.advertised_bw == min_adv_bw: @@ -184,7 +207,7 @@ def run(data): min_adv_bw = router.advertised_bw break - return '\n'.join(result_string) + return ('\n'.join(linf_string), '\n'.join(prob_string)) def parse_args(): usage = "Usage - python pyentropy.py [options]" @@ -196,8 +219,10 @@ def parse_args(): help="Input AS GeoIP database") parser.add_option("-s", "--server_desc", dest="server_desc", default=False, help="Server descriptors directory") - parser.add_option("-o", "--output", dest="output", default="entropy.csv", - help="Output filename") + parser.add_option("-l", "--linf-output", dest="linf", default="linf.csv", + help="linf output filename") + parser.add_option("-r", "--prob-output", dest="prob", default="prob.csv", + help="Probabilities output filename") parser.add_option("-c", "--consensus", dest="consensus", default="in/consensus", help="Input consensus dir") parser.add_option("-p", "--pickled_data", dest="pickled_data", default=False, @@ -227,16 +252,21 @@ if __name__ == "__main__": with open('data.pkl', 'wb') as output: pickle.dump(descriptors, output) - with open(options.output, 'w') as out_fh: - for file_name in os.listdir(options.consensus): - file_path = os.path.join(options.consensus, file_name) - tar_fh = tarfile.open(file_path) - for member in tar_fh: - if not member.isfile(): - continue - tar_file_data=tar_fh.extractfile(member) - data=tar_file_data.read() - output_string = run(data) - if output_string: - out_fh.write("%s\n" % (output_string)) - tar_fh.close() + linf_fh = open(options.linf, 'w') + prob_fh = open(options.prob, 'w') + for file_name in os.listdir(options.consensus): + file_path = os.path.join(options.consensus, file_name) + tar_fh = tarfile.open(file_path) + for member in tar_fh: + if not member.isfile(): + continue + tar_file_data=tar_fh.extractfile(member) + data=tar_file_data.read() + (linf_string, prob_string) = run(data) + if linf_string: + linf_fh.write("%s\n" % (linf_string)) + if prob_string: + prob_fh.write("%s\n" % (prob_string)) + tar_fh.close() + linf_fh.close() + prob_fh.close()
participants (1)
-
karsten@torproject.org