[metrics-tasks/master] Add bridge stats coverage analysis (#3261).
 
            commit 051ad27436a82861df92db088916184174609d3a Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Sat Oct 8 11:47:20 2011 +0200 Add bridge stats coverage analysis (#3261). --- task-3261/.gitignore | 4 + task-3261/AnalyzeStatsCoverage.java | 340 +++++++++++++++++++++++++++++++++++ task-3261/README | 4 + task-3261/stats-coverage.R | 15 ++ 4 files changed, 363 insertions(+), 0 deletions(-) diff --git a/task-3261/.gitignore b/task-3261/.gitignore new file mode 100644 index 0000000..2f247e9 --- /dev/null +++ b/task-3261/.gitignore @@ -0,0 +1,4 @@ +*.class +*.png +*.csv + diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java new file mode 100644 index 0000000..9d6cc84 --- /dev/null +++ b/task-3261/AnalyzeStatsCoverage.java @@ -0,0 +1,340 @@ +import java.io.*; +import java.text.*; +import java.util.*; +public class AnalyzeStatsCoverage { + public static void main(String[] args) throws Exception { + File inDirectory = new File("in"); + File tempDirectory = new File("temp"); + File outFile = new File("stats-coverage.csv"); + + /* Extract relevant lines from extra-info descriptors in inDirectory + * and write them to files tempDirectory/$date/$fingerprint-$date for + * later processing by fingerprint and date. */ + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (inDirectory.exists() && inDirectory.isDirectory()) { + System.out.println("Parsing descriptors in '" + + inDirectory.getAbsolutePath() + "'."); + tempDirectory.mkdirs(); + Stack<File> dirs = new Stack<File>(); + SortedSet<File> files = new TreeSet<File>(); + dirs.add(inDirectory); + while (!dirs.isEmpty()) { + File file = dirs.pop(); + if (file.isDirectory()) { + for (File f : file.listFiles()) { + dirs.add(f); + } + } else { + files.add(file); + } + } + int totalFiles = files.size(), fileNumber = 0; + for (File file : files) { + if (++fileNumber % (totalFiles / 1000) == 0) { + int numberLength = String.valueOf(totalFiles).length(); + System.out.printf("Parsed %" + numberLength + "d of %" + + numberLength + "d descriptors (%3d %%)%n", fileNumber, + totalFiles, (fileNumber * 100) / totalFiles); + } + BufferedReader br = new BufferedReader(new FileReader(file)); + String line, fingerprint = null, publishedLine = null; + SortedMap<String, SortedSet<String>> linesByDate = + new TreeMap<String, SortedSet<String>>(); + while ((line = br.readLine()) != null) { + if (line.startsWith("extra-info ")) { + fingerprint = line.split(" ")[2]; + } else if (line.startsWith("write-history ") || + line.startsWith("read-history ")) { + String[] parts = line.split(" "); + if (parts.length < 6) { + continue; + } + String historyEndDate = parts[1]; + long historyEndMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long intervalLength = Long.parseLong(parts[3].substring(1)); + if (intervalLength != 900L) { + System.out.println("Non-standard interval length in " + + "line '" + line + "' in file " + + file.getAbsolutePath() + ". Skipping this line."); + continue; + } + int intervals = parts[5].split(",").length; + long historyStartMillis = historyEndMillis + - (intervals * intervalLength * 1000L); + long currentMillis = historyStartMillis; + String currentDate; + while ((currentDate = dateFormat.format(currentMillis)). + compareTo(historyEndDate) <= 0) { + if (!linesByDate.containsKey(currentDate)) { + linesByDate.put(currentDate, new TreeSet<String>()); + } + linesByDate.get(currentDate).add(line); + currentMillis += 24L * 60L * 60L * 1000L; + } + } else if (line.startsWith("dirreq-stats-end ") || + line.startsWith("entry-stats-end ") || + line.startsWith("exit-stats-end ") || + line.startsWith("cell-stats-end ") || + line.startsWith("conn-bi-direct ") || + line.startsWith("bridge-stats-end ")) { + String[] parts = line.split(" "); + if (parts.length < 5) { + System.out.println("Malformed line '" + line + "' in " + + "file " + file.getAbsolutePath() + ". Skipping " + + "this line."); + continue; + } + String statsEndDate = parts[1]; + long statsEndMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long intervalLength = Long.parseLong(parts[3].substring(1)); + long statsStartMillis = statsEndMillis + - intervalLength * 1000L; + long currentMillis = statsStartMillis; + String currentDate; + while ((currentDate = dateFormat.format(currentMillis)). + compareTo(statsEndDate) <= 0) { + if (!linesByDate.containsKey(currentDate)) { + linesByDate.put(currentDate, new TreeSet<String>()); + } + linesByDate.get(currentDate).add(line); + currentMillis += 24L * 60L * 60L * 1000L; + } + } else if (line.startsWith("published ")) { + publishedLine = line; + } else if (line.startsWith("geoip-start-time ")) { + if (publishedLine == null) { + System.out.println("Missing published line in file " + + file.getAbsolutePath() + ". Skipping " + + "geoip-start-time line."); + continue; + } + String[] publishedParts = publishedLine.split(" "); + if (publishedParts.length < 3) { + System.out.println("Malformed line '" + publishedLine + + "' in file " + file.getAbsolutePath() + ". " + + "Skipping geoip-start-time line."); + continue; + } + String[] parts = line.split(" "); + if (parts.length < 3) { + System.out.println("Malformed line '" + line + "' in " + + "file " + file.getAbsolutePath() + ". Skipping " + + "this line."); + continue; + } + String statsEndDate = parts[1]; + long statsEndMillis = dateTimeFormat.parse( + publishedParts[1] + " " + publishedParts[2]).getTime(); + long statsStartMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long intervalLength = (statsEndMillis - statsStartMillis) + / 1000L; + String rewrittenLine = "geoip-stats-end " + + publishedParts[1] + " " + publishedParts[2] + " (" + + intervalLength + " s)"; + long currentMillis = statsStartMillis; + String currentDate; + while ((currentDate = dateFormat.format(currentMillis)). + compareTo(statsEndDate) <= 0) { + if (!linesByDate.containsKey(currentDate)) { + linesByDate.put(currentDate, new TreeSet<String>()); + } + linesByDate.get(currentDate).add(rewrittenLine); + currentMillis += 24L * 60L * 60L * 1000L; + } + } + } + br.close(); + for (Map.Entry<String, SortedSet<String>> e : + linesByDate.entrySet()) { + String date = e.getKey(); + SortedSet<String> lines = e.getValue(); + File outputFile = new File(tempDirectory, date + "/" + + fingerprint + "-" + date); + if (outputFile.exists()) { + br = new BufferedReader(new FileReader(outputFile)); + while ((line = br.readLine()) != null) { + lines.add(line); + } + br.close(); + } + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile)); + for (String l : lines) { + bw.write(l + "\n"); + } + bw.close(); + } + } + } + + /* Parse relevant lines by fingerprint and date. The result will be + * how many bytes that relay or bridge read/wrote in total, and how + * many bytes were included in the different reported statistics. */ + if (tempDirectory.exists() && tempDirectory.isDirectory()) { + System.out.println("Evaluating previously parsed descriptors in '" + + tempDirectory.getAbsolutePath() + "'."); + BufferedWriter bw = new BufferedWriter(new FileWriter(outFile)); + bw.write("fingerprint,date,totalwritten,totalread,dirreqwritten," + + "dirreqread,entrywritten,entryread,exitwritten,exitread," + + "cellwritten,cellread,connbidirectwritten,connbidirectread," + + "bridgewritten,bridgeread,geoipwritten,geoipread\n"); + Stack<File> dirs = new Stack<File>(); + SortedSet<File> files = new TreeSet<File>(); + dirs.add(tempDirectory); + while (!dirs.isEmpty()) { + File file = dirs.pop(); + if (file.isDirectory()) { + for (File f : file.listFiles()) { + dirs.add(f); + } + } else { + files.add(file); + } + } + int totalFiles = files.size(), fileNumber = 0; + for (File file : files) { + if (++fileNumber % (totalFiles / 1000) == 0) { + int numberLength = String.valueOf(totalFiles).length(); + System.out.printf("Evaluated %" + numberLength + "d of %" + + numberLength + "d descriptors (%3d %%)%n", fileNumber, + totalFiles, (fileNumber * 100) / totalFiles); + } + String fingerprint = file.getName().substring(0, 40); + String date = file.getName().substring(41); + long dateStartMillis = dateFormat.parse(date).getTime(); + long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L; + long[] writeHistory = new long[96], readHistory = new long[96]; + boolean[] dirreqStats = new boolean[96], + entryStats = new boolean[96], + exitStats = new boolean[96], + cellStats = new boolean[96], + connBiDirectStats = new boolean[96], + bridgeStats = new boolean[96], + geoipStats = new boolean[96]; + BufferedReader br = new BufferedReader(new FileReader(file)); + String line; + while ((line = br.readLine()) != null) { + if (line.startsWith("write-history ") || + line.startsWith("read-history ")) { + long[] history = line.startsWith("write-history ") + ? writeHistory : readHistory; + String[] parts = line.split(" "); + long historyEndMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + String[] historyValues = parts[5].split(","); + long historyStartMillis = historyEndMillis + - (historyValues.length * 900L * 1000L); + long currentMillis = historyStartMillis; + for (int i = 0; i < historyValues.length; i++) { + if (currentMillis >= dateStartMillis && + currentMillis < dateEndMillis) { + int j = (int) ((currentMillis - dateStartMillis) + / (900L * 1000L)); + if (j < 0 || j >= 96) { + System.out.println("Internal error when processing " + + "line '" + line + "'. Index = " + j + + ". Exiting."); + System.exit(1); + } + history[j] = Long.parseLong(historyValues[i]); + } + currentMillis += 15L * 60L * 1000L; + } + } else if (line.startsWith("dirreq-stats-end ") || + line.startsWith("entry-stats-end ") || + line.startsWith("exit-stats-end ") || + line.startsWith("cell-stats-end ") || + line.startsWith("conn-bi-direct ") || + line.startsWith("bridge-stats-end ") || + line.startsWith("geoip-stats-end ")) { + boolean[] stats = null; + if (line.startsWith("dirreq-stats-end ")) { + stats = dirreqStats; + } else if (line.startsWith("entry-stats-end ")) { + stats = entryStats; + } else if (line.startsWith("exit-stats-end ")) { + stats = exitStats; + } else if (line.startsWith("cell-stats-end ")) { + stats = cellStats; + } else if (line.startsWith("conn-bi-direct ")) { + stats = connBiDirectStats; + } else if (line.startsWith("bridge-stats-end ")) { + stats = bridgeStats; + } else if (line.startsWith("geoip-stats-end ")) { + stats = geoipStats; + } else { + System.out.println("Internal error when processing line '" + + line + "'. Exiting."); + System.exit(1); + } + String[] parts = line.split(" "); + long statsEndMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long intervalLength = Long.parseLong(parts[3].substring(1)); + long statsStartMillis = statsEndMillis + - intervalLength * 1000L; + long currentMillis = statsStartMillis; + while (currentMillis < dateEndMillis) { + if (currentMillis >= dateStartMillis) { + int j = (int) ((currentMillis - dateStartMillis) + / (900L * 1000L)); + if (j < 0 || j >= 96) { + System.out.println("Internal error when processing " + + "line '" + line + "'. Index = " + j + + ". Exiting."); + System.exit(1); + } + stats[j] = true; + } + currentMillis += 15L * 60L * 1000L; + } + } + } + br.close(); + bw.write(fingerprint + "," + date + ","); + long totalWritten = 0L, totalRead = 0L, dirreqWritten = 0L, + dirreqRead = 0L, entryWritten = 0L, entryRead = 0L, + exitWritten = 0L, exitRead = 0L, cellWritten = 0L, + cellRead = 0L, connBiDirectWritten = 0L, + connBiDirectRead = 0L, bridgeWritten = 0L, bridgeRead = 0L, + geoipWritten = 0L, geoipRead = 0L; + for (int i = 0; i < 96; i++) { + totalWritten += writeHistory[i]; + totalRead += readHistory[i]; + dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L; + dirreqRead += dirreqStats[i] ? readHistory[i] : 0L; + entryWritten += entryStats[i] ? writeHistory[i] : 0L; + entryRead += entryStats[i] ? readHistory[i] : 0L; + exitWritten += exitStats[i] ? writeHistory[i] : 0L; + exitRead += exitStats[i] ? readHistory[i] : 0L; + cellWritten += cellStats[i] ? writeHistory[i] : 0L; + cellRead += cellStats[i] ? readHistory[i] : 0L; + connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i] + : 0L; + connBiDirectRead += connBiDirectStats[i] ? readHistory[i] + : 0L; + bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L; + bridgeRead += bridgeStats[i] ? readHistory[i] : 0L; + geoipWritten += geoipStats[i] ? writeHistory[i] : 0L; + geoipRead += geoipStats[i] ? readHistory[i] : 0L; + } + bw.write(totalWritten + "," + totalRead + "," + dirreqWritten + + "," + dirreqRead + "," + entryWritten + "," + entryRead + + "," + exitWritten + "," + exitRead + "," + cellWritten + + "," + cellRead + "," + connBiDirectWritten + "," + + connBiDirectRead + "," + bridgeWritten + "," + bridgeRead + + "," + geoipWritten + "," + geoipRead + "\n"); + } + bw.close(); + } + } +} + diff --git a/task-3261/README b/task-3261/README new file mode 100644 index 0000000..1796e15 --- /dev/null +++ b/task-3261/README @@ -0,0 +1,4 @@ +$ javac AnalyzeStatsCoverage.java +$ java -Xmx4g AnalyzeStatsCoverage.java +$ R --slave -f stats-coverage.R + diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R new file mode 100644 index 0000000..b9c1810 --- /dev/null +++ b/task-3261/stats-coverage.R @@ -0,0 +1,15 @@ +library(ggplot2) +b <- read.csv("stats-coverage.csv") +b <- aggregate(list(totalwritten = b$totalwritten, + bridgewritten = b$bridgewritten, geoipwritten = b$geoipwritten), + by = list(date = b$date), sum) +b <- data.frame(date = b$date, value = b$bridgewritten / b$totalwritten) +ggplot(b, aes(x = as.Date(date), y = value)) + +geom_line() + +scale_x_date(name = "", major = "3 months", minor = "1 month", + format = "%b %Y", limits = as.Date(c("2010-10-01", "2011-09-30"))) + +scale_y_continuous(name = "", limits = c(0, 1), formatter = "percent") + +scale_colour_hue(name = "") + +opts(title = "Fraction of bridges reporting statistics\n") +ggsave("stats-coverage-bridges.png", width = 8, height = 6, dpi = 72) +
participants (1)
- 
                 karsten@torproject.org karsten@torproject.org