commit b7a73f0ce0605e4d0f017cb0696cc924a75c986e Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Apr 4 09:15:30 2012 +0200
Commit graphing code for #3574.
Ticket is closed since 7 months now, just found the code on my disk. --- task-3574/.gitignore | 6 +++ task-3574/AggregatePerDay.java | 66 +++++++++++++++++++++++++++++++++++ task-3574/OneLinePerDescriptor.java | 20 ++++++++++ task-3574/README | 23 ++++++++++++ task-3574/eval.R | 12 ++++++ 5 files changed, 127 insertions(+), 0 deletions(-)
diff --git a/task-3574/.gitignore b/task-3574/.gitignore new file mode 100644 index 0000000..fae3c24 --- /dev/null +++ b/task-3574/.gitignore @@ -0,0 +1,6 @@ +*.txt +extra-infos/ +*.csv +*.class +*.pdf + diff --git a/task-3574/AggregatePerDay.java b/task-3574/AggregatePerDay.java new file mode 100644 index 0000000..cf625a6 --- /dev/null +++ b/task-3574/AggregatePerDay.java @@ -0,0 +1,66 @@ +import java.io.*; +import java.text.*; +import java.util.*; +public class AggregatePerDay { + public static void main(String[] args) throws Exception { + SortedMap<String, long[]> byteHistory = new TreeMap<String, long[]>(); + BufferedReader br = new BufferedReader(new FileReader( + "bridge-bandwidth-histories-sorted.txt")); + String line; + SimpleDateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dateFormat = new SimpleDateFormat( + "yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat timeFormat = new SimpleDateFormat( + "HH:mm:ss"); + timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + if (parts.length < 3) { + continue; + } + String fingerprint = parts[2]; + while (line.contains("-history")) { + line = line.substring(line.indexOf("-history") - 4); + boolean isReadHistory = line.startsWith("read"); + line = line.substring(5); + parts = line.split(" "); + if (parts.length >= 6 && parts[5].length() > 0 && !parts[5].contains("history")) { + String[] bytes = parts[5].split(","); + long intervalEnd = dateTimeFormat.parse(parts[1] + " " + parts[2]).getTime(); + for (int i = bytes.length - 1; i >= 0; i--) { + String key = fingerprint + "," + + dateFormat.format(intervalEnd) + + (isReadHistory ? ",read" : ",write"); + long timeIndex = timeFormat.parse( + dateTimeFormat.format(intervalEnd).split(" ")[1]).getTime() + / (15L * 60L * 1000L); + long value = Long.parseLong(bytes[i]); + if (!byteHistory.containsKey(key)) { + byteHistory.put(key, new long[96]); + } + byteHistory.get(key)[(int) timeIndex] = value + 1L; + intervalEnd -= 15L * 60L * 1000L; + } + } + } + } + br.close(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + "bridge-bandwidth-per-day.csv")); + for (Map.Entry<String, long[]> e : byteHistory.entrySet()) { + long total = 0L, count = 0L; + for (long val : e.getValue()) { + if (val > 0L) { + total += val - 1L; + count += 1L; + } + } + bw.write(e.getKey() + "," + total + "," + count + "\n"); + } + bw.close(); + } +} + diff --git a/task-3574/OneLinePerDescriptor.java b/task-3574/OneLinePerDescriptor.java new file mode 100644 index 0000000..78c83b1 --- /dev/null +++ b/task-3574/OneLinePerDescriptor.java @@ -0,0 +1,20 @@ +import java.io.*; +public class OneLinePerDescriptor { + public static void main(String[] args) throws Exception { + BufferedReader br = new BufferedReader(new FileReader( + "bridge-bandwidth-histories-raw.txt")); + String line; + BufferedWriter bw = new BufferedWriter(new FileWriter( + "bridge-bandwidth-histories-by-fingerprint.txt")); + while ((line = br.readLine()) != null) { + if (line.startsWith("extra-info ")) { + bw.write("\n" + line); + } else { + bw.write(" " + line); + } + } + bw.close(); + br.close(); + } +} + diff --git a/task-3574/README b/task-3574/README new file mode 100644 index 0000000..0151eb4 --- /dev/null +++ b/task-3574/README @@ -0,0 +1,23 @@ +Extract bridge fingerprints and byte histories from extra-info +descriptors: + + $ grep -hRE "^extra-info|^write-history|^read-history" extra-infos > + bridge-bandwidth-histories-raw.txt + +Convert the output in something we can sort by fingerprint: + + $ javac OneLinePerDescriptor.java && java OneLinePerDescriptor + +Sort by fingerprint and filter out duplicates: + + $ sort bridge-bandwidth-histories-by-fingerprint.txt | uniq > + bridge-bandwidth-histories-sorted.txt + +Aggregate bytes per day: + + $ javac AggregatePerDay.java && java AggregatePerDay + +Plot graphs: + + $ R --slave -f eval.R + diff --git a/task-3574/eval.R b/task-3574/eval.R new file mode 100644 index 0000000..ac36880 --- /dev/null +++ b/task-3574/eval.R @@ -0,0 +1,12 @@ +library(ggplot2) +data <- read.csv("bridge-bandwidth-per-day.csv", stringsAsFactors = FALSE, col.names = c("fingerprint", "date", "operation", "bytes", "intervals")) +d <- aggregate(list(bytes = data$bytes, intervals = data$intervals), by = list(fingerprint = data$fingerprint), sum) +ggplot(d, aes(x = sort(bytes) / 2^30, y = (1:length(bytes)) / length(bytes))) + +geom_line() + +scale_x_continuous(name = "\nTotal read and written GiB per month", limits = c(0, 100)) + +scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1)) +ggplot(d, aes(x = sort(bytes / intervals) / (15 * 60 * 2^10), y = (1:length(bytes)) / length(bytes))) + +geom_line() + +scale_x_continuous(name = "\nMean read and written KiB per second", limits = c(0, 10)) + +scale_y_continuous(name = "Fraction of bridges\n", formatter = "percent", limits = c(0, 1)) +
tor-commits@lists.torproject.org