commit a3fd963839d80da97d2e67384bd1c16021a82ac9 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Apr 16 15:20:46 2012 +0200
Update #3261 code.
We now parse bridge network statuses and append "running " lines to files tempDirectory/$date/$fingerprint-$date for later processing by fingerprint and date. We then add *running columns to the .csv file which contain fractions by bridges with the Running flag. --- task-3261/.gitignore | 1 + task-3261/AnalyzeStatsCoverage.java | 182 ++++++++++++++++++++++++++++------- task-3261/README | 4 +- task-3261/stats-coverage.R | 22 +++-- 4 files changed, 163 insertions(+), 46 deletions(-)
diff --git a/task-3261/.gitignore b/task-3261/.gitignore index 1e3dcc5..2bfd23b 100644 --- a/task-3261/.gitignore +++ b/task-3261/.gitignore @@ -4,4 +4,5 @@ *.csv in/ temp/ +*.jar
diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java index 21d791e..4688bde 100644 --- a/task-3261/AnalyzeStatsCoverage.java +++ b/task-3261/AnalyzeStatsCoverage.java @@ -1,6 +1,9 @@ import java.io.*; import java.text.*; import java.util.*; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.Hex; public class AnalyzeStatsCoverage { public static void main(String[] args) throws Exception { File inDirectory = new File("in"); @@ -26,6 +29,9 @@ public class AnalyzeStatsCoverage { while (!dirs.isEmpty()) { File file = dirs.pop(); if (file.isDirectory()) { + if (file.getName().equals("statuses")) { + continue; + } for (File f : file.listFiles()) { dirs.add(f); } @@ -179,22 +185,103 @@ public class AnalyzeStatsCoverage { } }
+ /* Parse bridge network statuses and append "running " lines to + * files tempDirectory/$date/$fingerprint-$date for later processing + * by fingerprint and date. */ + SimpleDateFormat statusFormat = + new SimpleDateFormat("yyyyMMdd-HHmmss"); + statusFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (inDirectory.exists() && inDirectory.isDirectory()) { + System.out.println("Parsing statuses in '" + + inDirectory.getAbsolutePath() + "'."); + long started = System.currentTimeMillis(); + tempDirectory.mkdirs(); + Stack<File> dirs = new Stack<File>(); + SortedSet<File> files = new TreeSet<File>(); + dirs.add(inDirectory); + while (!dirs.isEmpty()) { + File file = dirs.pop(); + if (file.isDirectory()) { + if (file.getName().equals("extra-infos")) { + continue; + } + for (File f : file.listFiles()) { + dirs.add(f); + } + } else { + files.add(file); + } + } + int totalFiles = files.size(), fileNumber = 0; + for (File file : files) { + if (++fileNumber % (totalFiles / 1000) == 0) { + int numberLength = String.valueOf(totalFiles).length(); + long minutesLeft = (((System.currentTimeMillis() - started) + * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L); + System.out.printf("Parsed %" + numberLength + "d of %" + + numberLength + "d statuses (%3d %%) %d minutes left%n", + fileNumber, totalFiles, (fileNumber * 100) / totalFiles, + minutesLeft); + } + long statusPublishedMillis = statusFormat.parse( + file.getName().substring(0, "YYYYMMdd-HHmmss".length())). + getTime(); + SortedSet<String> statusPublishedDates = new TreeSet<String>(); + String statusPublishedString = dateTimeFormat.format( + statusPublishedMillis); + statusPublishedDates.add(dateFormat.format( + statusPublishedMillis)); + statusPublishedDates.add(dateFormat.format( + statusPublishedMillis + 15L * 60L * 1000L)); + BufferedReader br = new BufferedReader(new FileReader(file)); + String line, rLine = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("r ")) { + rLine = line; + } else if (line.startsWith("s ") && line.contains(" Running") && + rLine != null) { + String[] parts = rLine.split(" "); + if (parts.length != 9) { + System.out.println("Illegal line '" + rLine + "' in " + + file.getAbsolutePath() + ". Skipping this line."); + continue; + } + String fingerprint = Hex.encodeHexString(Base64.decodeBase64( + parts[2] + "==")); + for (String date : statusPublishedDates) { + File outputFile = new File(tempDirectory, date + "/" + + fingerprint.toUpperCase() + "-" + date); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, true)); + bw.write("running " + statusPublishedString + "\n"); + bw.close(); + } + } + } + } + } + /* Parse relevant lines by fingerprint and date. The result will be * how many bytes that relay or bridge read/wrote in total, and how * many bytes were included in the different reported statistics. - * Another result is the number of seconds for which this relay or - * bridge reported byte histories and other statistics. */ + * Other results are the number of seconds for which this relay or + * bridge reported byte histories and other statistics, either based + * on self-reported bandwidth histories or based on the Running flag + * in bridge network statuses. */ if (tempDirectory.exists() && tempDirectory.isDirectory()) { System.out.println("Evaluating previously parsed descriptors in '" + tempDirectory.getAbsolutePath() + "'."); BufferedWriter bw = new BufferedWriter(new FileWriter(outFile)); bw.write("fingerprint,date,totalwritten,totalread,totalseconds," - + "dirreqwritten,dirreqread,dirreqseconds,entrywritten," - + "entryread,entryseconds,exitwritten,exitread,exitseconds," - + "cellwritten,cellread,cellseconds,connbidirectwritten," - + "connbidirectread,connbidirectseconds,bridgewritten," - + "bridgeread,bridgeseconds,geoipwritten,geoipread," - + "geoipseconds\n"); + + "totalrunning,dirreqwritten,dirreqread,dirreqseconds," + + "dirreqrunning,entrywritten,entryread,entryseconds," + + "entryrunning,exitwritten,exitread,exitseconds,exitrunning," + + "cellwritten,cellread,cellseconds,cellrunning," + + "connbidirectwritten,connbidirectread,connbidirectseconds," + + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds," + + "bridgerunning,geoipwritten,geoipread,geoipseconds," + + "geoiprunning\n"); Stack<File> dirs = new Stack<File>(); SortedSet<File> files = new TreeSet<File>(); dirs.add(tempDirectory); @@ -221,7 +308,8 @@ public class AnalyzeStatsCoverage { long dateStartMillis = dateFormat.parse(date).getTime(); long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L; long[] writeHistory = new long[96], readHistory = new long[96]; - boolean[] running = new boolean[96], + boolean[] upBridge = new boolean[96], + upStatus = new boolean[96], dirreqStats = new boolean[96], entryStats = new boolean[96], exitStats = new boolean[96], @@ -232,7 +320,17 @@ public class AnalyzeStatsCoverage { BufferedReader br = new BufferedReader(new FileReader(file)); String line; while ((line = br.readLine()) != null) { - if (line.startsWith("write-history ") || + if (line.startsWith("running ")) { + long statusPublishedMillis = dateTimeFormat.parse( + line.substring("running ".length())).getTime(); + int j = (int) ((statusPublishedMillis - dateStartMillis) + / (900L * 1000L)); + for (int i = 0; i < 2; i++) { + if (j + i >= 0 && j + i < 96) { + upStatus[j + i] = true; + } + } + } else if (line.startsWith("write-history ") || line.startsWith("read-history ")) { long[] history = line.startsWith("write-history ") ? writeHistory : readHistory; @@ -255,7 +353,7 @@ public class AnalyzeStatsCoverage { System.exit(1); } history[j] = Long.parseLong(historyValues[i]); - running[j] = true; + upBridge[j] = true; } currentMillis += 15L * 60L * 1000L; } @@ -312,52 +410,66 @@ public class AnalyzeStatsCoverage { br.close(); bw.write(fingerprint + "," + date + ","); long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L, - dirreqWritten = 0L, dirreqRead = 0L, dirreqSeconds = 0L, - entryWritten = 0L, entryRead = 0L, entrySeconds = 0L, + totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L, + dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L, + entryRead = 0L, entrySeconds = 0L, entryRunning = 0L, exitWritten = 0L, exitRead = 0L, exitSeconds = 0L, - cellWritten = 0L, cellRead = 0L, cellSeconds = 0L, - connBiDirectWritten = 0L, connBiDirectRead = 0L, - connBiDirectSeconds = 0L, bridgeWritten = 0L, bridgeRead = 0L, - bridgeSeconds = 0L, geoipWritten = 0L, geoipRead = 0L, - geoipSeconds = 0L; + exitRunning = 0L, cellWritten = 0L, cellRead = 0L, + cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L, + connBiDirectRead = 0L, connBiDirectSeconds = 0L, + connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L, + bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L, + geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L; for (int i = 0; i < 96; i++) { totalWritten += writeHistory[i]; totalRead += readHistory[i]; - totalSeconds += running[i] ? 900L : 0L; + totalSeconds += upBridge[i] ? 900L : 0L; + totalRunning += upStatus[i] ? 900L : 0L; dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L; dirreqRead += dirreqStats[i] ? readHistory[i] : 0L; - dirreqSeconds += dirreqStats[i] && running[i] ? 900L : 0L; + dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L; + dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L; entryWritten += entryStats[i] ? writeHistory[i] : 0L; entryRead += entryStats[i] ? readHistory[i] : 0L; - entrySeconds += entryStats[i] && running[i] ? 900L : 0L; + entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L; + entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L; exitWritten += exitStats[i] ? writeHistory[i] : 0L; exitRead += exitStats[i] ? readHistory[i] : 0L; - exitSeconds += exitStats[i] && running[i] ? 900L : 0L; + exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L; + exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L; cellWritten += cellStats[i] ? writeHistory[i] : 0L; cellRead += cellStats[i] ? readHistory[i] : 0L; - cellSeconds += cellStats[i] && running[i] ? 900L : 0L; + cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L; + cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L; connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i] : 0L; connBiDirectRead += connBiDirectStats[i] ? readHistory[i] : 0L; - connBiDirectSeconds += connBiDirectStats[i] && running[i] ? 900L - : 0L; + connBiDirectSeconds += connBiDirectStats[i] && upBridge[i] + ? 900L : 0L; + connBiDirectRunning += connBiDirectStats[i] && upStatus[i] + ? 900L : 0L; bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L; bridgeRead += bridgeStats[i] ? readHistory[i] : 0L; - bridgeSeconds += bridgeStats[i] && running[i] ? 900L : 0L; + bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L; + bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L; geoipWritten += geoipStats[i] ? writeHistory[i] : 0L; geoipRead += geoipStats[i] ? readHistory[i] : 0L; - geoipSeconds += geoipStats[i] && running[i] ? 900L : 0L; + geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L; + geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L; } bw.write(totalWritten + "," + totalRead + "," + totalSeconds + "," - + dirreqWritten + "," + dirreqRead + "," + dirreqSeconds + "," - + entryWritten + "," + entryRead + "," + entrySeconds + "," - + exitWritten + "," + exitRead + "," + exitSeconds + "," - + cellWritten + "," + cellRead + "," + cellSeconds + "," - + connBiDirectWritten + "," + connBiDirectRead + "," - + connBiDirectSeconds + "," + bridgeWritten + "," - + bridgeRead + "," + bridgeSeconds + "," + geoipWritten + "," - + geoipRead + "," + geoipSeconds + "\n"); + + totalRunning + "," + dirreqWritten + "," + dirreqRead + "," + + dirreqSeconds + "," + dirreqRunning + "," + entryWritten + + "," + entryRead + "," + entrySeconds + "," + entryRunning + + "," + exitWritten + "," + exitRead + "," + exitSeconds + "," + + exitRunning + "," + cellWritten + "," + cellRead + "," + + cellSeconds + "," + cellRunning + "," + connBiDirectWritten + + "," + connBiDirectRead + "," + connBiDirectSeconds + "," + + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead + + "," + bridgeSeconds + "," + bridgeRunning + "," + + geoipWritten + "," + geoipRead + "," + geoipSeconds + "," + + geoipRunning + "\n"); } bw.close(); } diff --git a/task-3261/README b/task-3261/README index 1b95743..cb430ac 100644 --- a/task-3261/README +++ b/task-3261/README @@ -1,4 +1,4 @@ -$ javac AnalyzeStatsCoverage.java -$ java -Xmx4g AnalyzeStatsCoverage +$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java +$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage $ R --slave -f stats-coverage.R
diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R index d0794b9..aef63f2 100644 --- a/task-3261/stats-coverage.R +++ b/task-3261/stats-coverage.R @@ -1,21 +1,25 @@ library(ggplot2) +library(scales) b <- read.csv("stats-coverage.csv") b <- aggregate(list( totalwritten = b$totalwritten, totalseconds = b$totalseconds, - bridgewritten = b$bridgewritten, bridgeseconds = b$bridgeseconds, - geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds), - by = list(date = as.Date(b$date)), sum) + totalrunning = b$totalrunning, bridgewritten = b$bridgewritten, + bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning, + geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds, + geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum) b <- rbind(data.frame(date = b$date, variable = "by written bytes", value = (b$bridgewritten + b$geoipwritten) / b$totalwritten), - data.frame(date = b$date, variable = "by uptime seconds", - value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds)) + data.frame(date = b$date, variable = "by uptime (bandwidth history)", + value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds), + data.frame(date = b$date, variable = "by uptime (Running flag)", + value = (b$bridgerunning + b$geoiprunning) / b$totalrunning)) +b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ] ggplot(b, aes(x = date, y = value)) + geom_line() + facet_grid(variable ~ .) + -scale_x_date(name = "", major = "3 months", minor = "1 month", - format = "%b %Y", limits = as.Date(c("2010-10-01", "2011-09-30"))) + -scale_y_continuous(name = "", limits = c(0, 1), formatter = "percent") + +scale_x_date(name = "") + +scale_y_continuous(name = "", limits = c(0, 1), labels = percent) + scale_colour_hue(name = "") + opts(title = "Fraction of bridges reporting statistics\n") -ggsave("stats-coverage-bridges.png", width = 8, height = 6, dpi = 72) +ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72)
tor-commits@lists.torproject.org