commit 0dddee5316aebf046db58858af81220bdf3fcb6a Author: Karsten Loesing karsten.loesing@gmx.net Date: Fri Apr 27 08:50:31 2012 +0200
Update analysis code for #3261. --- task-3261/.gitignore | 2 + task-3261/AggregateStats.java | 122 +++++++++ task-3261/AnalyzeDescriptorParts.java | 315 ++++++++++++++++++++++ task-3261/AnalyzeStatsCoverage.java | 478 --------------------------------- task-3261/ExtractDescriptorParts.java | 172 ++++++++++++ task-3261/README | 38 +++- task-3261/plot.R | 65 +++++ task-3261/stats-coverage.R | 25 -- 8 files changed, 711 insertions(+), 506 deletions(-)
diff --git a/task-3261/.gitignore b/task-3261/.gitignore index 2bfd23b..5f2b4dc 100644 --- a/task-3261/.gitignore +++ b/task-3261/.gitignore @@ -2,6 +2,8 @@ *.png *.pdf *.csv +bridge-network-statuses +parse-history in/ temp/ *.jar diff --git a/task-3261/AggregateStats.java b/task-3261/AggregateStats.java new file mode 100755 index 0000000..73f7279 --- /dev/null +++ b/task-3261/AggregateStats.java @@ -0,0 +1,122 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.Map; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; + +/* Aggregate half-hourly per-bridge data to daily statistics. */ +public class AggregateStats { + public static void main(String[] args) throws Exception { + + /* Read file containing publication times of bridge statuses and count + * statuses per day. */ + SortedMap<String, Long> publishedStatuses = + new TreeMap<String, Long>(); + File statusFile = new File("bridge-network-statuses"); + if (!statusFile.exists()) { + System.err.println(statusFile.getAbsolutePath() + " does not " + + "exist. Exiting."); + System.exit(1); + } else { + BufferedReader br = new BufferedReader(new FileReader(statusFile)); + String line; + while ((line = br.readLine()) != null) { + String date = line.split(" ")[0]; + if (publishedStatuses.containsKey(date)) { + publishedStatuses.put(date, publishedStatuses.get(date) + 1L); + } else { + publishedStatuses.put(date, 1L); + } + } + } + + /* Aggregate single observations in memory. */ + SortedMap<String, Map<String, Long>> aggregatedStats = + new TreeMap<String, Map<String, Long>>(); + SortedSet<String> allKeys = new TreeSet<String>(); + File evalOutFile = new File("eval-out.csv"); + if (!evalOutFile.exists()) { + System.err.println(evalOutFile.getAbsolutePath() + " does not " + + "exist. Exiting."); + System.exit(1); + } else { + BufferedReader ebr = new BufferedReader(new FileReader(evalOutFile)); + String line; + while ((line = ebr.readLine()) != null) { + String[] parts = line.split(","); + String date = parts[0].split(" ")[0]; + String key = parts[2] + "," + parts[3] + "," + parts[4]; + allKeys.add(key); + Map<String, Long> stats = null; + if (aggregatedStats.containsKey(date)) { + stats = aggregatedStats.get(date); + } else { + stats = new HashMap<String, Long>(); + aggregatedStats.put(date, stats); + } + if (stats.containsKey(key)) { + stats.put(key, stats.get(key) + 1L); + } else { + stats.put(key, 1L); + } + } + ebr.close(); + } + + /* Write aggregated statistics to aggregated.csv. */ + File aggregatedFile = new File("aggregated.csv"); + BufferedWriter abw = new BufferedWriter(new FileWriter( + aggregatedFile)); + abw.write("date,reported,discarded,reason,bridges,statuses\n"); + long previousDateMillis = -1L; + final long DAY = 24L * 60L * 60L * 1000L; + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + for (Map.Entry<String, Map<String, Long>> e : + aggregatedStats.entrySet()) { + String date = e.getKey(); + long currentDateMillis = dateFormat.parse(date).getTime(); + while (previousDateMillis > -1L && + currentDateMillis - previousDateMillis > DAY) { + previousDateMillis += DAY; + String tempDate = dateFormat.format(previousDateMillis); + for (String key : allKeys) { + abw.write(tempDate + "," + key + ",NA,0\n"); + } + } + previousDateMillis = currentDateMillis; + String nextDate = dateFormat.format(currentDateMillis + DAY); + String nextPlusOneDate = dateFormat.format(currentDateMillis + + 2 * DAY); + long statuses = publishedStatuses.containsKey(date) ? + publishedStatuses.get(date) : 0L; + Map<String, Long> stats = e.getValue(); + if (!aggregatedStats.containsKey(nextDate) || + !aggregatedStats.containsKey(nextPlusOneDate) || + statuses < 40) { + for (String key : allKeys) { + abw.write(date + "," + key + ",NA," + statuses + "\n"); + } + } else { + for (String key : allKeys) { + if (stats.containsKey(key)) { + abw.write(date + "," + key + "," + (stats.get(key) / statuses) + + "," + statuses + "\n"); + } else { + abw.write(date + "," + key + ",0," + statuses + "\n"); + } + } + } + } + abw.close(); + } +} + diff --git a/task-3261/AnalyzeDescriptorParts.java b/task-3261/AnalyzeDescriptorParts.java new file mode 100755 index 0000000..7f4bbc4 --- /dev/null +++ b/task-3261/AnalyzeDescriptorParts.java @@ -0,0 +1,315 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.text.SimpleDateFormat; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; + +/* Analyze descriptors parts bridge by bridge and determine whether a + * bridge reported usage statistics at a given time, and if not, find out + * why not. */ +public class AnalyzeDescriptorParts { + public static void main(String[] args) throws Exception { + + /* Define paths: we read descriptor part files from temp/ and append + * statistics on half hour detail to eval-out.csv. */ + File tempDirectory = new File("temp"); + File evalOutFile = new File("eval-out.csv"); + + /* Parse descriptor part files bridge by bridge. */ + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + final long HALF_HOUR = 30L * 60L * 1000L; + BufferedWriter ebw = new BufferedWriter(new FileWriter(evalOutFile)); + for (File tempFile : tempDirectory.listFiles()) { + String fingerprint = tempFile.getName(); + BufferedReader br = new BufferedReader(new FileReader(tempFile)); + String line; + + /* For each bridge, determine when it was first seen as relay. All + * timestamps are in half hours since 1970-01-01 00:00:00 UTC. */ + long firstRunningRelay = Long.MAX_VALUE; + + /* For each time the bridge was listed in a bridge network status as + * Running, remember the status publication time and referenced + * descriptor digest. */ + SortedMap<Long, String> runningBridgeHalfHours = + new TreeMap<Long, String>(); + + /* For each descriptor published by the bridge, remember seven + * timestamps in an array: + * 0: when the bridge was started due to the descriptor publication + * time and reported uptime, + * 1: when the descriptor was published, + * 2: when the descriptor was first referenced in a status, + * 3: when the descriptor was last referenced in status, + * 4: when the first descriptor in the same uptime session was first + * referenced in a status, + * 5: when the last descriptor in the same uptime session was last + * referenced in a status, and + * 6: when the last descriptor in the same uptime session was + * published. */ + Map<String, long[]> descriptorSessions = + new HashMap<String, long[]>(); + + /* For each descriptor, remember the platform string. */ + Map<String, String> descriptorPlatforms = + new HashMap<String, String>(); + + /* For each bridge-stats or geoip-stats line, remember a long[] with + * two timestamps and a boolean: + * 0: when the statistics interval started, + * 1: when the statistics interval ended, + * 2: whether the bridge reported its geoip file digest (only + * 0.2.3.x or higher). */ + SortedMap<Long, long[]> bridgeStats = new TreeMap<Long, long[]>(), + geoipStats = new TreeMap<Long, long[]>(); + + /* Parse the file in temp/ line by line. */ + while ((line = br.readLine()) != null) { + + /* Remember when a descriptor was published and which platform + * string it contained. */ + if (line.startsWith("server-descriptor ")) { + String[] parts = line.split(" "); + long publishedMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long publishedHalfHour = publishedMillis / HALF_HOUR + 1L; + String descriptor = parts[3]; + long startedHalfHour = (publishedMillis + - Long.parseLong(parts[4]) * 1000L) / HALF_HOUR + 1L; + long[] descriptorSession; + if (descriptorSessions.containsKey(descriptor)) { + descriptorSession = descriptorSessions.get(descriptor); + } else { + descriptorSession = new long[7]; + descriptorSessions.put(descriptor, descriptorSession); + } + if (descriptorSession[0] == 0) { + descriptorSession[0] = startedHalfHour; + descriptorSession[1] = publishedHalfHour; + } + String platform = line.substring(line.indexOf("Tor ")); + descriptorPlatforms.put(descriptor, platform); + + /* Remember when a descriptor was first and last referenced from a + * bridge network status. */ + } else if (line.startsWith("running-bridge ")) { + String[] parts = line.split(" "); + long publishedMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long publishedHalfHour = publishedMillis / HALF_HOUR; + String descriptor = parts[3]; + long[] descriptorSession; + if (descriptorSessions.containsKey(descriptor)) { + descriptorSession = descriptorSessions.get(descriptor); + if (descriptorSession[2] == 0 || + publishedHalfHour < descriptorSession[2]) { + descriptorSession[2] = publishedHalfHour; + } + if (publishedHalfHour > descriptorSession[3]) { + descriptorSession[3] = publishedHalfHour; + } + } else { + descriptorSession = new long[7]; + descriptorSession[2] = publishedHalfHour; + descriptorSession[3] = publishedHalfHour; + descriptorSessions.put(descriptor, descriptorSession); + } + runningBridgeHalfHours.put(publishedHalfHour, descriptor); + + /* Remember the start and end of a bridge-stats or geoip-stats + * interval, and remember whether the extra-info descriptor + * contained a geoip-db-digest line. */ + } else if (line.startsWith("bridge-stats ") || + line.startsWith("geoip-stats ")) { + String parts[] = line.split(" "); + long statsEndMillis = dateTimeFormat.parse(parts[1] + " " + + parts[2]).getTime(); + long statsEnd = statsEndMillis / HALF_HOUR; + long statsStart = (statsEndMillis - Long.parseLong(parts[3]) + * 1000L) / HALF_HOUR; + boolean hasGeoipFile = !parts[4].equals("NA"); + long[] stats = new long[3]; + stats[0] = statsStart; + stats[1] = statsEnd; + stats[2] = hasGeoipFile ? 1L : 0L; + if (line.startsWith("bridge-stats ")) { + bridgeStats.put(statsStart, stats); + } else { + geoipStats.put(statsStart, stats); + } + + /* Remember when this bridge was first seen as a relay in the + * consensus. */ + } else if (line.startsWith("running-relay ")) { + long runningRelayMillis = dateTimeFormat.parse(line.substring( + "running-relay ".length())).getTime() / HALF_HOUR; + firstRunningRelay = Math.min(firstRunningRelay, + runningRelayMillis); + } + } + br.close(); + + /* Sort descriptors by their first reference in a bridge network + * status. */ + SortedMap<Long, String> descriptorsByFirstReferenced = + new TreeMap<Long, String>(); + for (Map.Entry<String, long[]> e : descriptorSessions.entrySet()) { + if (e.getValue()[2] == 0) { + continue; + } + descriptorsByFirstReferenced.put(e.getValue()[2], e.getKey()); + } + if (descriptorsByFirstReferenced.isEmpty()) { + continue; + } + + /* Go through list of descriptors and see if two or more of them + * belong to the same bridge uptime session. Two descriptors are + * considered as part of the same uptime session if a) they are + * referenced from two subsequent statuses and b) the start time in + * the second descriptor lies before the publication time of the + * first descriptor. First make a list of all descriptors of a + * session and then update their long[] values to contain session + * information. */ + long[] previousDescriptorTimestamps = null; + long firstStatusInSession = Long.MAX_VALUE, + lastStatusInSession = -1L, lastDescriptorPublished = -1L; + Set<String> descriptorsInSession = new HashSet<String>(); + for (String descriptor : descriptorsByFirstReferenced.values()) { + long[] currentDescriptorTimestamps = + descriptorSessions.get(descriptor); + String currentDescriptor = descriptor; + if (previousDescriptorTimestamps != null) { + boolean sameSession = + previousDescriptorTimestamps[3] + 1L == + currentDescriptorTimestamps[2] && + currentDescriptorTimestamps[0] <= + previousDescriptorTimestamps[1]; + if (!sameSession) { + for (String descriptorInSession : descriptorsInSession) { + long[] descriptorTimestamps = descriptorSessions.get( + descriptorInSession); + descriptorTimestamps[4] = firstStatusInSession; + descriptorTimestamps[5] = lastStatusInSession; + descriptorTimestamps[6] = lastDescriptorPublished; + } + firstStatusInSession = Long.MAX_VALUE; + lastStatusInSession = lastDescriptorPublished = -1L; + descriptorsInSession.clear(); + } + } + firstStatusInSession = Math.min(firstStatusInSession, + currentDescriptorTimestamps[2]); + lastStatusInSession = Math.max(lastStatusInSession, + currentDescriptorTimestamps[3]); + lastDescriptorPublished = Math.max(lastDescriptorPublished, + currentDescriptorTimestamps[1]); + descriptorsInSession.add(currentDescriptor); + previousDescriptorTimestamps = currentDescriptorTimestamps; + } + for (String descriptorInSession : descriptorsInSession) { + long[] descriptorTimestamps = descriptorSessions.get( + descriptorInSession); + descriptorTimestamps[4] = firstStatusInSession; + descriptorTimestamps[5] = lastStatusInSession; + descriptorTimestamps[6] = lastDescriptorPublished; + } + + /* Go through all statuses listing this bridge as Running, determine + * if it reported usage statistics and if they were considered for + * aggregation, and find out possible reasons for the bridge not + * reporting usage statistics. */ + for (Map.Entry<Long, String> e : + runningBridgeHalfHours.entrySet()) { + long statusPublished = e.getKey(); + String descriptor = e.getValue(); + String platform = descriptorPlatforms.get(descriptor); + boolean reported = false, discarded = false; + String reason = "none"; + if (firstRunningRelay <= statusPublished) { + /* The bridge was running as a relay before. */ + discarded = true; + reason = "runasrelay"; + } + if (!geoipStats.headMap(statusPublished + 1).isEmpty()) { + long[] stats = geoipStats.get(geoipStats.headMap(statusPublished + + 1).lastKey()); + if (stats[0] <= statusPublished && stats[1] > statusPublished) { + /* Status publication time falls into stats interval. */ + reported = true; + if (platform != null && platform.compareTo("Tor 0.2.2") > 0) { + /* geoip stats published by versions 0.2.2.x or higher are + * buggy and therefore discarded. */ + discarded = true; + reason = "geoip022"; + } + } + } + if (!bridgeStats.headMap(statusPublished + 1).isEmpty()) { + long[] stats = bridgeStats.get(bridgeStats.headMap( + statusPublished + 1).lastKey()); + if (stats[0] <= statusPublished && stats[1] > statusPublished) { + /* Status publication time falls into stats interval. */ + reported = true; + if (platform != null && platform.compareTo("Tor 0.2.3") > 0 && + stats[2] == 0) { + /* The bridge running version 0.2.3.x did not have a geoip + * file and therefore published bad bridge statistics. */ + discarded = true; + reason = "nogeoipfile"; + } + } + } + if (!reported) { + /* The bridge didn't report statistics, so it doesn't matter + * whether we'd have discarded them. */ + discarded = false; + if (!descriptorSessions.containsKey(descriptor)) { + /* The descriptor referenced in the bridge network status is + * unavailable, which means we cannot make any statement why the + * bridge did not report usage statistics. */ + reason = "noserverdesc"; + } else { + long[] descriptorTimestamps = descriptorSessions.get(descriptor); + long sessionStart = descriptorTimestamps[4], + sessionEnd = descriptorTimestamps[5], + lastDescPubl = descriptorTimestamps[6]; + long currentStatsEnd = sessionStart + + 48 * ((statusPublished - sessionStart) / 48 + 1); + if (sessionEnd <= currentStatsEnd) { + /* The current uptime session ends before the 24-hour statistics + * interval. */ + reason = "lessthan24h"; + } else if (currentStatsEnd > lastDescPubl) { + /* The current uptime session ended after the 24-hour statistics + * interval, but the bridge didn't publish a descriptor + * containing the statistics. */ + reason = "publdelay"; + } else { + /* There is some other reason why the bridge did not report + * statistics. */ + reason = "other"; + } + } + } + ebw.write(dateTimeFormat.format(statusPublished * HALF_HOUR) + "," + + fingerprint + "," + reported + "," + discarded + "," + + reason + "\n"); + } + } + ebw.close(); + } +} + diff --git a/task-3261/AnalyzeStatsCoverage.java b/task-3261/AnalyzeStatsCoverage.java deleted file mode 100644 index 4688bde..0000000 --- a/task-3261/AnalyzeStatsCoverage.java +++ /dev/null @@ -1,478 +0,0 @@ -import java.io.*; -import java.text.*; -import java.util.*; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.codec.binary.Hex; -public class AnalyzeStatsCoverage { - public static void main(String[] args) throws Exception { - File inDirectory = new File("in"); - File tempDirectory = new File("temp"); - File outFile = new File("stats-coverage.csv"); - - /* Extract relevant lines from extra-info descriptors in inDirectory - * and write them to files tempDirectory/$date/$fingerprint-$date for - * later processing by fingerprint and date. */ - SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (inDirectory.exists() && inDirectory.isDirectory()) { - System.out.println("Parsing descriptors in '" - + inDirectory.getAbsolutePath() + "'."); - long started = System.currentTimeMillis(); - tempDirectory.mkdirs(); - Stack<File> dirs = new Stack<File>(); - SortedSet<File> files = new TreeSet<File>(); - dirs.add(inDirectory); - while (!dirs.isEmpty()) { - File file = dirs.pop(); - if (file.isDirectory()) { - if (file.getName().equals("statuses")) { - continue; - } - for (File f : file.listFiles()) { - dirs.add(f); - } - } else { - files.add(file); - } - } - int totalFiles = files.size(), fileNumber = 0; - for (File file : files) { - if (++fileNumber % (totalFiles / 1000) == 0) { - int numberLength = String.valueOf(totalFiles).length(); - long minutesLeft = (((System.currentTimeMillis() - started) - * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L); - System.out.printf("Parsed %" + numberLength + "d of %" - + numberLength + "d descriptors (%3d %%) %d minutes left%n", - fileNumber, totalFiles, (fileNumber * 100) / totalFiles, - minutesLeft); - } - BufferedReader br = new BufferedReader(new FileReader(file)); - String line, fingerprint = null, publishedLine = null; - SortedMap<String, SortedSet<String>> linesByDate = - new TreeMap<String, SortedSet<String>>(); - while ((line = br.readLine()) != null) { - if (line.startsWith("extra-info ")) { - fingerprint = line.split(" ")[2]; - } else if (line.startsWith("write-history ") || - line.startsWith("read-history ")) { - String[] parts = line.split(" "); - if (parts.length < 6) { - continue; - } - String historyEndDate = parts[1]; - long historyEndMillis = dateTimeFormat.parse(parts[1] + " " - + parts[2]).getTime(); - long intervalLength = Long.parseLong(parts[3].substring(1)); - if (intervalLength != 900L) { - System.out.println("Non-standard interval length in " - + "line '" + line + "' in file " - + file.getAbsolutePath() + ". Skipping this line."); - continue; - } - int intervals = parts[5].split(",").length; - long historyStartMillis = historyEndMillis - - (intervals * intervalLength * 1000L); - long currentMillis = historyStartMillis; - String currentDate; - while ((currentDate = dateFormat.format(currentMillis)). - compareTo(historyEndDate) <= 0) { - if (!linesByDate.containsKey(currentDate)) { - linesByDate.put(currentDate, new TreeSet<String>()); - } - linesByDate.get(currentDate).add(line); - currentMillis += 24L * 60L * 60L * 1000L; - } - } else if (line.startsWith("dirreq-stats-end ") || - line.startsWith("entry-stats-end ") || - line.startsWith("exit-stats-end ") || - line.startsWith("cell-stats-end ") || - line.startsWith("conn-bi-direct ") || - line.startsWith("bridge-stats-end ")) { - String[] parts = line.split(" "); - if (parts.length < 5) { - System.out.println("Malformed line '" + line + "' in " - + "file " + file.getAbsolutePath() + ". Skipping " - + "this line."); - continue; - } - String statsEndDate = parts[1]; - long statsEndMillis = dateTimeFormat.parse(parts[1] + " " - + parts[2]).getTime(); - long intervalLength = Long.parseLong(parts[3].substring(1)); - long statsStartMillis = statsEndMillis - - intervalLength * 1000L; - long currentMillis = statsStartMillis; - String currentDate; - while ((currentDate = dateFormat.format(currentMillis)). - compareTo(statsEndDate) <= 0) { - if (!linesByDate.containsKey(currentDate)) { - linesByDate.put(currentDate, new TreeSet<String>()); - } - linesByDate.get(currentDate).add(line); - currentMillis += 24L * 60L * 60L * 1000L; - } - } else if (line.startsWith("published ")) { - publishedLine = line; - } else if (line.startsWith("geoip-start-time ")) { - if (publishedLine == null) { - System.out.println("Missing published line in file " - + file.getAbsolutePath() + ". Skipping " - + "geoip-start-time line."); - continue; - } - String[] publishedParts = publishedLine.split(" "); - if (publishedParts.length < 3) { - System.out.println("Malformed line '" + publishedLine - + "' in file " + file.getAbsolutePath() + ". " - + "Skipping geoip-start-time line."); - continue; - } - String[] parts = line.split(" "); - if (parts.length < 3) { - System.out.println("Malformed line '" + line + "' in " - + "file " + file.getAbsolutePath() + ". Skipping " - + "this line."); - continue; - } - String statsEndDate = parts[1]; - long statsEndMillis = dateTimeFormat.parse( - publishedParts[1] + " " + publishedParts[2]).getTime(); - long statsStartMillis = dateTimeFormat.parse(parts[1] + " " - + parts[2]).getTime(); - long intervalLength = (statsEndMillis - statsStartMillis) - / 1000L; - String rewrittenLine = "geoip-stats-end " - + publishedParts[1] + " " + publishedParts[2] + " (" - + intervalLength + " s)"; - long currentMillis = statsStartMillis; - String currentDate; - while ((currentDate = dateFormat.format(currentMillis)). - compareTo(statsEndDate) <= 0) { - if (!linesByDate.containsKey(currentDate)) { - linesByDate.put(currentDate, new TreeSet<String>()); - } - linesByDate.get(currentDate).add(rewrittenLine); - currentMillis += 24L * 60L * 60L * 1000L; - } - } - } - br.close(); - for (Map.Entry<String, SortedSet<String>> e : - linesByDate.entrySet()) { - String date = e.getKey(); - SortedSet<String> lines = e.getValue(); - File outputFile = new File(tempDirectory, date + "/" - + fingerprint + "-" + date); - if (outputFile.exists()) { - br = new BufferedReader(new FileReader(outputFile)); - while ((line = br.readLine()) != null) { - lines.add(line); - } - br.close(); - } - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile)); - for (String l : lines) { - bw.write(l + "\n"); - } - bw.close(); - } - } - } - - /* Parse bridge network statuses and append "running " lines to - * files tempDirectory/$date/$fingerprint-$date for later processing - * by fingerprint and date. */ - SimpleDateFormat statusFormat = - new SimpleDateFormat("yyyyMMdd-HHmmss"); - statusFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - if (inDirectory.exists() && inDirectory.isDirectory()) { - System.out.println("Parsing statuses in '" - + inDirectory.getAbsolutePath() + "'."); - long started = System.currentTimeMillis(); - tempDirectory.mkdirs(); - Stack<File> dirs = new Stack<File>(); - SortedSet<File> files = new TreeSet<File>(); - dirs.add(inDirectory); - while (!dirs.isEmpty()) { - File file = dirs.pop(); - if (file.isDirectory()) { - if (file.getName().equals("extra-infos")) { - continue; - } - for (File f : file.listFiles()) { - dirs.add(f); - } - } else { - files.add(file); - } - } - int totalFiles = files.size(), fileNumber = 0; - for (File file : files) { - if (++fileNumber % (totalFiles / 1000) == 0) { - int numberLength = String.valueOf(totalFiles).length(); - long minutesLeft = (((System.currentTimeMillis() - started) - * (totalFiles - fileNumber)) / fileNumber) / (60L * 1000L); - System.out.printf("Parsed %" + numberLength + "d of %" - + numberLength + "d statuses (%3d %%) %d minutes left%n", - fileNumber, totalFiles, (fileNumber * 100) / totalFiles, - minutesLeft); - } - long statusPublishedMillis = statusFormat.parse( - file.getName().substring(0, "YYYYMMdd-HHmmss".length())). - getTime(); - SortedSet<String> statusPublishedDates = new TreeSet<String>(); - String statusPublishedString = dateTimeFormat.format( - statusPublishedMillis); - statusPublishedDates.add(dateFormat.format( - statusPublishedMillis)); - statusPublishedDates.add(dateFormat.format( - statusPublishedMillis + 15L * 60L * 1000L)); - BufferedReader br = new BufferedReader(new FileReader(file)); - String line, rLine = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("r ")) { - rLine = line; - } else if (line.startsWith("s ") && line.contains(" Running") && - rLine != null) { - String[] parts = rLine.split(" "); - if (parts.length != 9) { - System.out.println("Illegal line '" + rLine + "' in " - + file.getAbsolutePath() + ". Skipping this line."); - continue; - } - String fingerprint = Hex.encodeHexString(Base64.decodeBase64( - parts[2] + "==")); - for (String date : statusPublishedDates) { - File outputFile = new File(tempDirectory, date + "/" - + fingerprint.toUpperCase() + "-" + date); - outputFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - outputFile, true)); - bw.write("running " + statusPublishedString + "\n"); - bw.close(); - } - } - } - } - } - - /* Parse relevant lines by fingerprint and date. The result will be - * how many bytes that relay or bridge read/wrote in total, and how - * many bytes were included in the different reported statistics. - * Other results are the number of seconds for which this relay or - * bridge reported byte histories and other statistics, either based - * on self-reported bandwidth histories or based on the Running flag - * in bridge network statuses. */ - if (tempDirectory.exists() && tempDirectory.isDirectory()) { - System.out.println("Evaluating previously parsed descriptors in '" - + tempDirectory.getAbsolutePath() + "'."); - BufferedWriter bw = new BufferedWriter(new FileWriter(outFile)); - bw.write("fingerprint,date,totalwritten,totalread,totalseconds," - + "totalrunning,dirreqwritten,dirreqread,dirreqseconds," - + "dirreqrunning,entrywritten,entryread,entryseconds," - + "entryrunning,exitwritten,exitread,exitseconds,exitrunning," - + "cellwritten,cellread,cellseconds,cellrunning," - + "connbidirectwritten,connbidirectread,connbidirectseconds," - + "connbidirectrunning,bridgewritten,bridgeread,bridgeseconds," - + "bridgerunning,geoipwritten,geoipread,geoipseconds," - + "geoiprunning\n"); - Stack<File> dirs = new Stack<File>(); - SortedSet<File> files = new TreeSet<File>(); - dirs.add(tempDirectory); - while (!dirs.isEmpty()) { - File file = dirs.pop(); - if (file.isDirectory()) { - for (File f : file.listFiles()) { - dirs.add(f); - } - } else { - files.add(file); - } - } - int totalFiles = files.size(), fileNumber = 0; - for (File file : files) { - if (++fileNumber % (totalFiles / 1000) == 0) { - int numberLength = String.valueOf(totalFiles).length(); - System.out.printf("Evaluated %" + numberLength + "d of %" - + numberLength + "d descriptors/days (%3d %%)%n", - fileNumber, totalFiles, (fileNumber * 100) / totalFiles); - } - String fingerprint = file.getName().substring(0, 40); - String date = file.getName().substring(41); - long dateStartMillis = dateFormat.parse(date).getTime(); - long dateEndMillis = dateStartMillis + 24L * 60L * 60L * 1000L; - long[] writeHistory = new long[96], readHistory = new long[96]; - boolean[] upBridge = new boolean[96], - upStatus = new boolean[96], - dirreqStats = new boolean[96], - entryStats = new boolean[96], - exitStats = new boolean[96], - cellStats = new boolean[96], - connBiDirectStats = new boolean[96], - bridgeStats = new boolean[96], - geoipStats = new boolean[96]; - BufferedReader br = new BufferedReader(new FileReader(file)); - String line; - while ((line = br.readLine()) != null) { - if (line.startsWith("running ")) { - long statusPublishedMillis = dateTimeFormat.parse( - line.substring("running ".length())).getTime(); - int j = (int) ((statusPublishedMillis - dateStartMillis) - / (900L * 1000L)); - for (int i = 0; i < 2; i++) { - if (j + i >= 0 && j + i < 96) { - upStatus[j + i] = true; - } - } - } else if (line.startsWith("write-history ") || - line.startsWith("read-history ")) { - long[] history = line.startsWith("write-history ") - ? writeHistory : readHistory; - String[] parts = line.split(" "); - long historyEndMillis = dateTimeFormat.parse(parts[1] + " " - + parts[2]).getTime(); - String[] historyValues = parts[5].split(","); - long historyStartMillis = historyEndMillis - - (historyValues.length * 900L * 1000L); - long currentMillis = historyStartMillis; - for (int i = 0; i < historyValues.length; i++) { - if (currentMillis >= dateStartMillis && - currentMillis < dateEndMillis) { - int j = (int) ((currentMillis - dateStartMillis) - / (900L * 1000L)); - if (j < 0 || j >= 96) { - System.out.println("Internal error when processing " - + "line '" + line + "'. Index = " + j - + ". Exiting."); - System.exit(1); - } - history[j] = Long.parseLong(historyValues[i]); - upBridge[j] = true; - } - currentMillis += 15L * 60L * 1000L; - } - } else if (line.startsWith("dirreq-stats-end ") || - line.startsWith("entry-stats-end ") || - line.startsWith("exit-stats-end ") || - line.startsWith("cell-stats-end ") || - line.startsWith("conn-bi-direct ") || - line.startsWith("bridge-stats-end ") || - line.startsWith("geoip-stats-end ")) { - boolean[] stats = null; - if (line.startsWith("dirreq-stats-end ")) { - stats = dirreqStats; - } else if (line.startsWith("entry-stats-end ")) { - stats = entryStats; - } else if (line.startsWith("exit-stats-end ")) { - stats = exitStats; - } else if (line.startsWith("cell-stats-end ")) { - stats = cellStats; - } else if (line.startsWith("conn-bi-direct ")) { - stats = connBiDirectStats; - } else if (line.startsWith("bridge-stats-end ")) { - stats = bridgeStats; - } else if (line.startsWith("geoip-stats-end ")) { - stats = geoipStats; - } else { - System.out.println("Internal error when processing line '" - + line + "'. Exiting."); - System.exit(1); - } - String[] parts = line.split(" "); - long statsEndMillis = dateTimeFormat.parse(parts[1] + " " - + parts[2]).getTime(); - long intervalLength = Long.parseLong(parts[3].substring(1)); - long statsStartMillis = statsEndMillis - - intervalLength * 1000L; - long currentMillis = statsStartMillis; - while (currentMillis < dateEndMillis) { - if (currentMillis >= dateStartMillis) { - int j = (int) ((currentMillis - dateStartMillis) - / (900L * 1000L)); - if (j < 0 || j >= 96) { - System.out.println("Internal error when processing " - + "line '" + line + "'. Index = " + j - + ". Exiting."); - System.exit(1); - } - stats[j] = true; - } - currentMillis += 15L * 60L * 1000L; - } - } - } - br.close(); - bw.write(fingerprint + "," + date + ","); - long totalWritten = 0L, totalRead = 0L, totalSeconds = 0L, - totalRunning = 0L, dirreqWritten = 0L, dirreqRead = 0L, - dirreqSeconds = 0L, dirreqRunning = 0L, entryWritten = 0L, - entryRead = 0L, entrySeconds = 0L, entryRunning = 0L, - exitWritten = 0L, exitRead = 0L, exitSeconds = 0L, - exitRunning = 0L, cellWritten = 0L, cellRead = 0L, - cellSeconds = 0L, cellRunning = 0L, connBiDirectWritten = 0L, - connBiDirectRead = 0L, connBiDirectSeconds = 0L, - connBiDirectRunning = 0L, bridgeWritten = 0L, bridgeRead = 0L, - bridgeSeconds = 0L, bridgeRunning = 0L, geoipWritten = 0L, - geoipRead = 0L, geoipSeconds = 0L, geoipRunning = 0L; - for (int i = 0; i < 96; i++) { - totalWritten += writeHistory[i]; - totalRead += readHistory[i]; - totalSeconds += upBridge[i] ? 900L : 0L; - totalRunning += upStatus[i] ? 900L : 0L; - dirreqWritten += dirreqStats[i] ? writeHistory[i] : 0L; - dirreqRead += dirreqStats[i] ? readHistory[i] : 0L; - dirreqSeconds += dirreqStats[i] && upBridge[i] ? 900L : 0L; - dirreqRunning += dirreqStats[i] && upStatus[i] ? 900L : 0L; - entryWritten += entryStats[i] ? writeHistory[i] : 0L; - entryRead += entryStats[i] ? readHistory[i] : 0L; - entrySeconds += entryStats[i] && upBridge[i] ? 900L : 0L; - entryRunning += entryStats[i] && upStatus[i] ? 900L : 0L; - exitWritten += exitStats[i] ? writeHistory[i] : 0L; - exitRead += exitStats[i] ? readHistory[i] : 0L; - exitSeconds += exitStats[i] && upBridge[i] ? 900L : 0L; - exitRunning += exitStats[i] && upStatus[i] ? 900L : 0L; - cellWritten += cellStats[i] ? writeHistory[i] : 0L; - cellRead += cellStats[i] ? readHistory[i] : 0L; - cellSeconds += cellStats[i] && upBridge[i] ? 900L : 0L; - cellRunning += cellStats[i] && upStatus[i] ? 900L : 0L; - connBiDirectWritten += connBiDirectStats[i] ? writeHistory[i] - : 0L; - connBiDirectRead += connBiDirectStats[i] ? readHistory[i] - : 0L; - connBiDirectSeconds += connBiDirectStats[i] && upBridge[i] - ? 900L : 0L; - connBiDirectRunning += connBiDirectStats[i] && upStatus[i] - ? 900L : 0L; - bridgeWritten += bridgeStats[i] ? writeHistory[i] : 0L; - bridgeRead += bridgeStats[i] ? readHistory[i] : 0L; - bridgeSeconds += bridgeStats[i] && upBridge[i] ? 900L : 0L; - bridgeRunning += bridgeStats[i] && upStatus[i] ? 900L : 0L; - geoipWritten += geoipStats[i] ? writeHistory[i] : 0L; - geoipRead += geoipStats[i] ? readHistory[i] : 0L; - geoipSeconds += geoipStats[i] && upBridge[i] ? 900L : 0L; - geoipRunning += geoipStats[i] && upStatus[i] ? 900L : 0L; - } - bw.write(totalWritten + "," + totalRead + "," + totalSeconds + "," - + totalRunning + "," + dirreqWritten + "," + dirreqRead + "," - + dirreqSeconds + "," + dirreqRunning + "," + entryWritten - + "," + entryRead + "," + entrySeconds + "," + entryRunning - + "," + exitWritten + "," + exitRead + "," + exitSeconds + "," - + exitRunning + "," + cellWritten + "," + cellRead + "," - + cellSeconds + "," + cellRunning + "," + connBiDirectWritten - + "," + connBiDirectRead + "," + connBiDirectSeconds + "," - + connBiDirectRunning + "," + bridgeWritten + "," + bridgeRead - + "," + bridgeSeconds + "," + bridgeRunning + "," - + geoipWritten + "," + geoipRead + "," + geoipSeconds + "," - + geoipRunning + "\n"); - } - bw.close(); - } - } -} - diff --git a/task-3261/ExtractDescriptorParts.java b/task-3261/ExtractDescriptorParts.java new file mode 100755 index 0000000..544022d --- /dev/null +++ b/task-3261/ExtractDescriptorParts.java @@ -0,0 +1,172 @@ +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.text.SimpleDateFormat; +import java.util.Iterator; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeSet; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.torproject.descriptor.BridgeNetworkStatus; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; +import org.torproject.descriptor.ServerDescriptor; + +/* Extract the relevant parts from bridge descriptors and consensuses that + * are required to answer what fraction of bridges are not reporting + * bridge usage statistics. */ +public class ExtractDescriptorParts { + public static void main(String[] args) throws Exception { + + /* Define paths: we parse descriptor (tarballs) from in/, store the + * parse history to parse-history, write relevant parts per bridge to + * temp/, and write publication times of bridge network statuses to + * bridge-network-statuses. */ + File inDirectory = new File("in"); + File parseHistoryFile = new File("parse-history"); + File tempDirectory = new File("temp"); + File statusFile = new File("bridge-network-statuses"); + + /* Read descriptors. */ + SimpleDateFormat dateTimeFormat = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + DescriptorReader reader = + DescriptorSourceFactory.createDescriptorReader(); + reader.addDirectory(inDirectory); + reader.setExcludeFiles(parseHistoryFile); + Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + if (descriptorFile.getDescriptors() != null) { + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + + /* Extract bridge-stats and geoip-stats from bridge extra-info + * descriptors. */ + if (descriptor instanceof ExtraInfoDescriptor) { + System.out.print("e"); + SortedSet<String> lines = new TreeSet<String>(); + ExtraInfoDescriptor extraInfoDescriptor = + (ExtraInfoDescriptor) descriptor; + if (extraInfoDescriptor.getBridgeStatsEndMillis() > 0) { + lines.add("bridge-stats " + dateTimeFormat.format( + extraInfoDescriptor.getBridgeStatsEndMillis()) + " " + + extraInfoDescriptor.getBridgeStatsIntervalLength() + + " " + (extraInfoDescriptor.getGeoipDbDigest() == null + ? "NA" : extraInfoDescriptor.getGeoipDbDigest())); + } + if (extraInfoDescriptor.getGeoipStartTimeMillis() > 0) { + long intervalLength = + (extraInfoDescriptor.getPublishedMillis() + - extraInfoDescriptor.getGeoipStartTimeMillis()) + / 1000L; + String geoipStatsEnd = dateTimeFormat.format( + extraInfoDescriptor.getPublishedMillis()); + lines.add("geoip-stats " + geoipStatsEnd + " " + + intervalLength + " " + + (extraInfoDescriptor.getGeoipDbDigest() == null + ? "NA" : extraInfoDescriptor.getGeoipDbDigest())); + } + if (!lines.isEmpty()) { + File outputFile = new File(tempDirectory, + extraInfoDescriptor.getFingerprint().toUpperCase()); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, true)); + for (String l : lines) { + bw.write(l + "\n"); + } + bw.close(); + } + + /* Extract all bridges with the Running flag from bridge network + * statuses. Also extract the status publication time. */ + } else if (descriptor instanceof BridgeNetworkStatus) { + System.out.print("n"); + BridgeNetworkStatus status = (BridgeNetworkStatus) descriptor; + String published = dateTimeFormat.format( + status.getPublishedMillis()); + if (status.getStatusEntries() != null) { + for (NetworkStatusEntry entry : + status.getStatusEntries().values()) { + if (entry.getFlags().contains("Running")) { + File outputFile = new File(tempDirectory, + entry.getFingerprint().toUpperCase()); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, true)); + String digest = entry.getDescriptor().toUpperCase(); + bw.write("running-bridge " + published + " " + digest + + "\n"); + bw.close(); + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter( + statusFile, true)); + bw.write(published + "\n"); + bw.close(); + } + + /* Extract publication time, digest, uptime, and platform string + * from bridge server descriptors. */ + } else if (descriptor instanceof ServerDescriptor) { + System.out.print("s"); + ServerDescriptor serverDescriptor = + (ServerDescriptor) descriptor; + String published = dateTimeFormat.format( + serverDescriptor.getPublishedMillis()); + String digest = descriptorFile.getFileName().substring( + descriptorFile.getFileName().lastIndexOf("/") + 1). + toUpperCase(); + String uptime = serverDescriptor.getUptime() == null ? "-1" + : String.valueOf(serverDescriptor.getUptime()); + String platform = serverDescriptor.getPlatform() == null + ? "NA" : serverDescriptor.getPlatform(); + File outputFile = new File(tempDirectory, + serverDescriptor.getFingerprint().toUpperCase()); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, true)); + bw.write("server-descriptor " + published + " " + + digest + " " + uptime + " " + platform + "\n"); + bw.close(); + + /* Extract hashed fingerprints of all relays with the Running + * flag from relay network status consensuses. */ + } else if (descriptor instanceof RelayNetworkStatusConsensus) { + System.out.print("r"); + RelayNetworkStatusConsensus status = + (RelayNetworkStatusConsensus) descriptor; + if (status.getStatusEntries() != null) { + for (NetworkStatusEntry entry : + status.getStatusEntries().values()) { + if (entry.getFlags().contains("Running")) { + String hashedFingerprint = Hex.encodeHexString( + DigestUtils.sha(Hex.decodeHex( + entry.getFingerprint().toCharArray()))). + toUpperCase(); + File outputFile = new File(tempDirectory, + hashedFingerprint); + outputFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + outputFile, true)); + bw.write("running-relay " + dateTimeFormat.format( + status.getValidAfterMillis()) + "\n"); + bw.close(); + } + } + } + } + } + } + } + } +} + diff --git a/task-3261/README b/task-3261/README old mode 100644 new mode 100755 index cb430ac..43ef208 --- a/task-3261/README +++ b/task-3261/README @@ -1,4 +1,36 @@ -$ javac -cp commons-codec-1.4.jar AnalyzeStatsCoverage.java -$ java -cp commons-codec-1.4.jar.: -Xmx4g AnalyzeStatsCoverage -$ R --slave -f stats-coverage.R +What fraction of our bridges are not reporting usage statistics? +================================================================ + +Usage: + +1. Put metrics tarballs into a directory called in/. The best parsing + performance can be achieved by decompressing tarballs without + extracting them. The bridge-descriptors-* and consensuses-* tarballs + are required for this analysis. + +2. Clone metrics-lib.git, build descriptor.jar, and put it in this + directory. + +3. Download Apache Commons Codec and Compress and put the .jar files in + this directory. + +4. Parse descriptors and write all relevant parts to one file per bridge: + $ javac + -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar + ExtractDescriptorParts.java + $ java + -cp commons-codec-1.4.jar:commons-compress-1.3.jar:descriptor.jar:. + ExtractDescriptorParts + +5. Analyze descriptors parts bridge by bridge and determine whether it + reported bridge stats at a given time, and if not, find out why not: + $ javac AnalyzeDescriptorParts.java + $ java AnalyzeDescriptorParts + +6. Aggregate daily statistics that can be plotted: + $ javac AggregateStats.java + $ java AggregateStats + +7. Plot results: + $ R --slave -f plot.R
diff --git a/task-3261/plot.R b/task-3261/plot.R new file mode 100644 index 0000000..8a3808c --- /dev/null +++ b/task-3261/plot.R @@ -0,0 +1,65 @@ +library(ggplot2) +library(scales) +library(reshape) +a <- read.csv("aggregated.csv", stringsAsFactors = FALSE) + +e <- a +e <- data.frame(date = as.Date(e$date), case = ifelse( + e$reported == "true", ifelse(e$discarded == "false", "case1", "case2"), + "case3"), bridges = e$bridges) +e <- aggregate(list(bridges = e$bridges), + by = list(date = e$date, case = e$case), FUN = sum) +e <- cast(e, date ~ case) +sums <- e$case1 + e$case2 + e$case3 +e <- data.frame(date = e$date, case1 = e$case1 / sums, + case2 = e$case2 / sums, case3 = e$case3 / sums, stringsAsFactors = FALSE) +e <- melt(e, "date") +e <- data.frame(date = e$date, variable = ifelse(e$variable == "case1", + "reported and used", ifelse(e$variable == "case2", + "reported and discarded", "not reported")), value = e$value) +ggplot(e, aes(x = as.Date(date), y = value)) + +geom_line() + +facet_grid(variable ~ .) + +scale_x_date(name = "") + +scale_y_continuous(name = "", labels = percent) + +opts(title = "Fraction of bridge usage statistics that were...\n") +ggsave("reported-bridge-statistics.png", width = 8, height = 6, dpi = 120) + +d <- a +d <- d[d$reported == "false", ] +d <- data.frame(date = d$date, reason = d$reason, value = d$bridges) +d <- cast(d, date ~ reason) +d <- data.frame(date = d$date, case1 = d$lessthan24h / sums, + case2 = d$publdelay / sums, case3 = d$other / sums) +d <- melt(d, "date") +d <- data.frame(date = d$date, variable = ifelse(d$variable == "case1", + "Less than 24h uptime", ifelse(d$variable == "case2", + "Publication delay", "Other reason")), value = d$value) +ggplot(d, aes(x = as.Date(date), y = value)) + +geom_line() + +facet_grid(variable ~ .) + +scale_x_date(name = "") + +scale_y_continuous(name = "", labels = percent) + +opts(title = "Reasons for bridges not reporting usage statistics\n") +ggsave("bridge-statistics-nonreported.png", width = 8, height = 6, + dpi = 120) + +b <- a +b <- b[b$discarded == "true", ] +b <- data.frame(date = b$date, reason = b$reason, value = b$bridges) +b <- cast(b, date ~ reason) +b <- data.frame(date = b$date, case1 = b$geoip022 / sums, + case2 = b$nogeoipfile / sums, case3 = b$runasrelay / sums) +b <- melt(b, "date") +b <- data.frame(date = b$date, variable = ifelse(b$variable == "case1", + "0.2.2.x geoip-stats bug", ifelse(b$variable == "case2", + "missing geoip file", "Run as non-bridge relay")), value = b$value) +ggplot(b, aes(x = as.Date(date), y = value)) + +geom_line() + +facet_grid(variable ~ .) + +scale_x_date(name = "") + +scale_y_continuous(name = "", labels = percent) + +opts(title = "Reasons for discarding reported usage statistics\n") +ggsave("bridge-statistics-discarded.png", width = 8, height = 6, + dpi = 120) + diff --git a/task-3261/stats-coverage.R b/task-3261/stats-coverage.R deleted file mode 100644 index aef63f2..0000000 --- a/task-3261/stats-coverage.R +++ /dev/null @@ -1,25 +0,0 @@ -library(ggplot2) -library(scales) -b <- read.csv("stats-coverage.csv") -b <- aggregate(list( - totalwritten = b$totalwritten, totalseconds = b$totalseconds, - totalrunning = b$totalrunning, bridgewritten = b$bridgewritten, - bridgeseconds = b$bridgeseconds, bridgerunning = b$bridgerunning, - geoipwritten = b$geoipwritten, geoipseconds = b$geoipseconds, - geoiprunning = b$geoiprunning), by = list(date = as.Date(b$date)), sum) -b <- rbind(data.frame(date = b$date, variable = "by written bytes", - value = (b$bridgewritten + b$geoipwritten) / b$totalwritten), - data.frame(date = b$date, variable = "by uptime (bandwidth history)", - value = (b$bridgeseconds + b$geoipseconds) / b$totalseconds), - data.frame(date = b$date, variable = "by uptime (Running flag)", - value = (b$bridgerunning + b$geoiprunning) / b$totalrunning)) -b <- b[b$date >= as.Date("2010-10-01") & b$date < as.Date("2012-04-01"), ] -ggplot(b, aes(x = date, y = value)) + -geom_line() + -facet_grid(variable ~ .) + -scale_x_date(name = "") + -scale_y_continuous(name = "", limits = c(0, 1), labels = percent) + -scale_colour_hue(name = "") + -opts(title = "Fraction of bridges reporting statistics\n") -ggsave("stats-coverage-bridges.png", width = 8, height = 7, dpi = 72) -
tor-commits@lists.torproject.org