[metrics-tasks/master] Add parsing and graphing code for #4147.

commit 2e8049a8fdc0c72ba8136a9886ed3af75896cadc Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Sat Mar 17 08:10:34 2012 +0100 Add parsing and graphing code for #4147. --- task-4147/.gitignore | 13 ++ task-4147/AnalyzeDifferentExitAddress.java | 246 ++++++++++++++++++++++++++++ task-4147/README | 37 ++++ task-4147/different-exit-address-1.R | 12 ++ task-4147/different-exit-address-2.R | 39 +++++ 5 files changed, 347 insertions(+), 0 deletions(-) diff --git a/task-4147/.gitignore b/task-4147/.gitignore new file mode 100644 index 0000000..7e9e868 --- /dev/null +++ b/task-4147/.gitignore @@ -0,0 +1,13 @@ +in/ +out/ +status/ +*.jar +*.class +.classpath +.project +.settings/ +*.csv +*.pdf +*.png +*.swp + diff --git a/task-4147/AnalyzeDifferentExitAddress.java b/task-4147/AnalyzeDifferentExitAddress.java new file mode 100644 index 0000000..44984a8 --- /dev/null +++ b/task-4147/AnalyzeDifferentExitAddress.java @@ -0,0 +1,246 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.util.Date; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.torproject.descriptor.BandwidthHistory; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExitList; +import org.torproject.descriptor.ExitListEntry; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +/* Answer the question what fraction of bytes written by relays with the + * Exit flag could have used a different address for exiting than the + * relay used for registering in the Tor network. */ +public class AnalyzeDifferentExitAddress { + public static void main(String[] args) throws Exception { + + System.out.println(new Date() + " Starting."); + + /* Iterate over extra-info descriptors to learn about bandwidth + * histories. Append 15-minute intervals of written bytes to + * status/written-bytes/$fingerprint. */ + System.out.println(new Date() + " Reading in/extra-infos/* ..."); + DescriptorReader extraInfoReader = DescriptorSourceFactory + .createDescriptorReader(); + extraInfoReader.addDirectory(new File("in/extra-infos")); + extraInfoReader.setExcludeFiles(new File( + "status/extra-info-history")); + Iterator<DescriptorFile> extraInfoFiles = + extraInfoReader.readDescriptors(); + while (extraInfoFiles.hasNext()) { + DescriptorFile extraInfoFile = extraInfoFiles.next(); + if (extraInfoFile.getDescriptors() != null) { + for (Descriptor descriptor : extraInfoFile.getDescriptors()) { + ExtraInfoDescriptor extraInfoDescriptor = + (ExtraInfoDescriptor) descriptor; + BandwidthHistory writeHistory = extraInfoDescriptor. + getWriteHistory(); + if (writeHistory == null) { + continue; + } + String fingerprint = extraInfoDescriptor.getFingerprint(); + File writtenBytesFile = new File("status/written-bytes/" + + fingerprint); + writtenBytesFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + writtenBytesFile, true)); + for (Map.Entry<Long, Long> e : + writeHistory.getBandwidthValues().entrySet()) { + long intervalEndMillis = e.getKey(); + long bytesWritten = e.getValue(); + bw.write(String.valueOf(intervalEndMillis) + " " + + String.valueOf(bytesWritten) + "\n"); + } + bw.close(); + } + } + } + + /* Iterate over exit lists to learn about exit IP addresses. Append + * lines to status/exit-addresses/$fingerprint. */ + System.out.println(new Date() + " Reading in/exit-lists/* ..."); + DescriptorReader exitListReader = + DescriptorSourceFactory.createDescriptorReader(); + exitListReader.addDirectory(new File("in/exit-lists")); + exitListReader.setExcludeFiles(new File("status/exit-list-history")); + Iterator<DescriptorFile> exitListFiles = + exitListReader.readDescriptors(); + while (exitListFiles.hasNext()) { + DescriptorFile exitListFile = exitListFiles.next(); + if (exitListFile.getDescriptors() != null) { + for (Descriptor descriptor : exitListFile.getDescriptors()) { + ExitList exitList = (ExitList) descriptor; + if (exitList.getExitListEntries() == null) { + continue; + } + for (ExitListEntry exitListEntry : + exitList.getExitListEntries()) { + String fingerprint = exitListEntry.getFingerprint(); + File exitAddressesFile = new File("status/exit-addresses/" + + fingerprint); + exitAddressesFile.getParentFile().mkdirs(); + long scanMillis = exitListEntry.getScanMillis(); + String address = exitListEntry.getExitAddress(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + exitAddressesFile, true)); + bw.write(String.valueOf(scanMillis) + " " + address + "\n"); + bw.close(); + } + } + } + } + + /* Iterate over consensuses to learn about OR addresses of relays with + * the Exit flag. Append lines to + * status/or-addresses/$fingerprint. */ + System.out.println(new Date() + " Reading in/consensuses/* ..."); + DescriptorReader consensusReader = + DescriptorSourceFactory.createDescriptorReader(); + consensusReader.addDirectory(new File("in/consensuses")); + consensusReader.setExcludeFiles(new File("status/consensus-history")); + Iterator<DescriptorFile> consensusFiles = + consensusReader.readDescriptors(); + while (consensusFiles.hasNext()) { + DescriptorFile consensusFile = consensusFiles.next(); + if (consensusFile.getDescriptors() != null) { + for (Descriptor descriptor : consensusFile.getDescriptors()) { + RelayNetworkStatusConsensus consensus = + (RelayNetworkStatusConsensus) descriptor; + if (consensus.getStatusEntries() == null) { + continue; + } + long validAfterMillis = consensus.getValidAfterMillis(); + for (NetworkStatusEntry statusEntry : + consensus.getStatusEntries().values()) { + if (!statusEntry.getFlags().contains("Exit")) { + continue; + } + String fingerprint = statusEntry.getFingerprint(); + File orAddressesFile = new File("status/or-addresses/" + + fingerprint); + orAddressesFile.getParentFile().mkdirs(); + String address = statusEntry.getAddress(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + orAddressesFile, true)); + bw.write(String.valueOf(validAfterMillis) + " " + address + + "\n"); + bw.close(); + } + } + } + } + + /* Make sure not to overwrite existing results, and prepare writing + * results otherwise. */ + File differentExitAddressFile = new File( + "out/different-exit-address.csv"); + if (differentExitAddressFile.exists()) { + return; + } else { + differentExitAddressFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + differentExitAddressFile)); + bw.write("timestamp,differentaddress,writtenbytes\n"); + bw.close(); + } + + /* Iterate over OR addresses of relays with the Exit flag. */ + System.out.println(new Date() + " Writing " + + "out/different-exit-address.csv ..."); + for (File orAddressesFile : + new File("status/or-addresses").listFiles()) { + String fingerprint = orAddressesFile.getName(); + + /* For every relay, read OR addresses, bandwidth histories, and + * exit addresses to memory. */ + SortedMap<Long, String> orAddresses = new TreeMap<Long, String>(); + SortedMap<Long, Long> writtenBytes = new TreeMap<Long, Long>(); + SortedMap<Long, String> exitAddresses = new TreeMap<Long, String>(); + String line; + BufferedReader br = new BufferedReader(new FileReader( + orAddressesFile)); + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + long validAfterMillis = Long.parseLong(parts[0]); + String address = parts[1]; + orAddresses.put(validAfterMillis, address); + } + br.close(); + File writtenBytesFile = new File("status/written-bytes/" + + fingerprint); + if (!writtenBytesFile.exists()) { + continue; + } + br = new BufferedReader(new FileReader(writtenBytesFile)); + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + long intervalEndMillis = Long.parseLong(parts[0]); + long bytes = Long.parseLong(parts[1]); + writtenBytes.put(intervalEndMillis, bytes); + } + br.close(); + File exitAddressesFile = new File("status/exit-addresses/" + + fingerprint); + if (exitAddressesFile.exists()) { + br = new BufferedReader(new FileReader(exitAddressesFile)); + while ((line = br.readLine()) != null) { + String[] parts = line.split(" "); + long scanMillis = Long.parseLong(parts[0]); + String address = parts[1]; + exitAddresses.put(scanMillis, address); + } + br.close(); + } + + /* Go through consensuses containing this relay as Exit relay in + * chronological order, sum up written bytes in the hour after the + * consensuses' valid-after time, and look up any exit addresses + * found up to 23 hours before up to 1 hour after the valid-after + * time. */ + BufferedWriter bw = new BufferedWriter(new FileWriter( + differentExitAddressFile, true)); + for (Map.Entry<Long, String> e : orAddresses.entrySet()) { + long validAfterMillis = e.getKey(); + String currentOrAddress = e.getValue(); + long currentWrittenBytes = 0L; + for (long currentBytes : writtenBytes.tailMap(validAfterMillis). + headMap(validAfterMillis + 1L * 60L * 60L * 1000L).values()) { + currentWrittenBytes += currentBytes; + } + if (currentWrittenBytes < 1L) { + continue; + } + Set<String> currentExitAddresses = new HashSet<String>(); + for (String currentExitAddress : exitAddresses.tailMap( + validAfterMillis - 23L * 60L * 60L * 1000L).headMap( + validAfterMillis + 1L * 60L * 60L * 1000L).values()) { + if (!currentExitAddress.equals(currentOrAddress)) { + currentExitAddresses.add(currentExitAddress); + } + } + boolean usedOtherAddress = !currentExitAddresses.isEmpty(); + bw.write(String.valueOf(validAfterMillis / 1000L) + "," + + (usedOtherAddress ? "TRUE" : "FALSE") + "," + + currentWrittenBytes + "\n"); + } + bw.close(); + } + + System.out.println(new Date() + " Terminating."); + } +} + diff --git a/task-4147/README b/task-4147/README new file mode 100644 index 0000000..1af3f3a --- /dev/null +++ b/task-4147/README @@ -0,0 +1,37 @@ +Answer the question what fraction of bytes written by relays with the Exit +flag could have used a different address for exiting than the relay used +for registering in the Tor network. +========================================================================== + +Clone the metrics-lib repository, create the descriptor.jar file, and put +it in this directory. + +Obtain the Apache Commons Codec 1.4 .jar file commons-codec-1.4.jar and +put it in this directory. + +Download the metrics tarballs containing consensuses, extra-info +descriptors, and exit lists for a common time period. Note that the first +and last 2 days of the period won't be usable. Extract the tarballs and +put them in in/consensuses/, in/extra-infos/, and in/exit-lists/ in this +directory. + +Compile the Java class: + + $ javac -cp descriptor.jar AnalyzeDifferentExitAddress.java + +Run the Java class: + + $ java -cp descriptor.jar:commons-codec-1.4.jar:. \ + AnalyzeDifferentExitAddress + +In order to re-run parts of the analysis, delete files in status/ or the +results file in out/. + +Aggregate the results using R and ggplot2: + + $ R --slave -f different-exit-address-1.R + +Draw graphs using R and ggplot2: + + $ R --slave -f different-exit-address-2.R + diff --git a/task-4147/different-exit-address-1.R b/task-4147/different-exit-address-1.R new file mode 100644 index 0000000..b5f172a --- /dev/null +++ b/task-4147/different-exit-address-1.R @@ -0,0 +1,12 @@ +library(ggplot2) +d <- read.csv("out/different-exit-address.csv", stringsAsFactors = FALSE) +d <- data.frame( + date = as.Date(as.POSIXct(d$timestamp, origin = "1970-01-01 00:00:00")), + differentaddress = d$differentaddress, + writtenbytes = d$writtenbytes) +d <- aggregate(list(writtenbytes = d$writtenbytes), + by = list(date = d$date, differentaddress = d$differentaddress), + FUN = sum) +write.csv(d, file = "different-exit-address-aggregate.csv", quote = FALSE, + row.names = FALSE) + diff --git a/task-4147/different-exit-address-2.R b/task-4147/different-exit-address-2.R new file mode 100644 index 0000000..15f097e --- /dev/null +++ b/task-4147/different-exit-address-2.R @@ -0,0 +1,39 @@ +library(ggplot2) +d <- read.csv("different-exit-address-aggregate.csv", + stringsAsFactors = FALSE) + +# Cut off dates before 2012-02-14, because exit lists were stale +# Cut off dates after 2012-02-27, because we only imported February data +# Leaves us with 2 weeks of data; should be fine +d <- d[d$date >= "2012-02-14" & d$date <= "2012-02-27", ] + +a <- aggregate(x = list(writtenbytes = d$writtenbytes), + by = list(date = d$date), FUN = sum) +ggplot(a, aes(x = as.Date(date), y = writtenbytes / 2^20 / 86400)) + +geom_line() + +scale_x_date(name = "") + +scale_y_continuous(name = "MiB/s\n", + limits = c(0, max(a$writtenbytes) / 2^20 / 86400)) + +opts(title = "Bytes written by all relays with the Exit flag\n") + +ggplot(d, aes(x = as.Date(date), y = writtenbytes / 2^20 / 86400, + colour = differentaddress)) + +geom_line() + +scale_x_date(name = "") + +scale_y_continuous(name = "MiB/s\n", + limits = c(0, max(d$writtenbytes / 2^20 / 86400))) + +opts(title = "Bytes written by all relays with the Exit flag\n") + +s <- cast(d, date ~ differentaddress) +s <- data.frame(date = s$date, fracdifferent = s[, "TRUE"] / s[, "FALSE"]) +ggplot(s, aes(x = as.Date(date), y = fracdifferent)) + +geom_line() + +scale_x_date(name = "", format = "%Y-%m-%d") + +scale_y_continuous(name = "", formatter = "percent", limits = c(0, 1)) + +opts(title = + paste("Fraction of bytes written by relays with the Exit flag\n", + "which could have used a different address for exiting\n", + "than the relay used for registering in the Tor network\n", + sep = "")) +ggsave("different-exit-address.png", width = 8, height = 5, dpi = 72) +
participants (1)
-
karsten@torproject.org