commit e0862169e12b9a687d5d05b6dd32688cfbfe05f7 Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue Apr 14 17:36:03 2015 +0200
Investigate lifetime of introduction points (#15513). --- task-15513/.gitignore | 8 + task-15513/README.md | 24 ++ task-15513/plot.R | 81 +++++++ task-15513/src/ParseDescriptors.java | 420 ++++++++++++++++++++++++++++++++++ 4 files changed, 533 insertions(+)
diff --git a/task-15513/.gitignore b/task-15513/.gitignore new file mode 100644 index 0000000..3f98e38 --- /dev/null +++ b/task-15513/.gitignore @@ -0,0 +1,8 @@ +.classpath +.project +descriptors/ +/bin +*.csv +lifetimes-verbose.txt +Rplots.pdf + diff --git a/task-15513/README.md b/task-15513/README.md new file mode 100644 index 0000000..f1bdd55 --- /dev/null +++ b/task-15513/README.md @@ -0,0 +1,24 @@ +Investigate lifetime of IPs on Hidden Services +============================================== + +Extract hidden-service descriptors to descriptors/. + +Obtain commons-codec-1.6.jar and place it in /usr/share/java/. + +Compile and run the Java class: + + $ javac -d bin/ -cp /usr/share/java/commons-codec-1.6.jar \ + src/ParseDescriptors.java + + $ java -cp bin/:/usr/share/java/commons-codec-1.6.jar ParseDescriptors + +Find verbose logs in lifetimes-verbose.txt. + +Make sure you have R 3.0.2 with ggplot2 0.9.3.1 installed. + +Plot graphs: + + $ R --slave -f plot.R + +View output in Rplots.pdf + diff --git a/task-15513/plot.R b/task-15513/plot.R new file mode 100644 index 0000000..f1ce9d3 --- /dev/null +++ b/task-15513/plot.R @@ -0,0 +1,81 @@ +require(ggplot2) +require(reshape) +require(scales) + +l <- read.csv("lifetimes.csv") +ggplot(l, aes(x = lifetime_millis / 3600000, colour = service)) + + stat_ecdf() + + scale_x_continuous("\nLifetime in hours", + limits = c(0, max(l$lifetime_millis) / 3600000)) + + scale_y_continuous("Cumulative fraction\n", labels = percent) + + scale_colour_hue("Service") + + ggtitle("Lifetime of introduction points\n") + +l <- read.csv("published-descs.csv") +ggplot(l, aes(x = descs, colour = service)) + + stat_ecdf() + + scale_x_continuous("\nNumber of descriptors", + limits = c(0, max(l$descs))) + + scale_y_continuous("Cumulative fraction\n", labels = percent) + + scale_colour_hue("Service") + + ggtitle(paste("Number of descriptors published per hour,\n", + "including descriptor replicas\n", sep = "")) + +l <- read.csv("established-intros.csv") +ggplot(l, aes(x = intros, colour = service)) + + stat_ecdf() + + scale_x_continuous("\nNumber of introduction points", + limits = c(0, max(l$intros))) + + scale_y_continuous("Cumulative fraction\n", labels = percent) + + scale_colour_hue("Service") + + ggtitle("Number of distinct introduction points used per hour\n") + +l <- read.csv("intros-per-desc.csv") +ggplot(l, aes(x = introsperdesc, colour = service)) + + stat_ecdf() + + scale_x_continuous("\nNumber of introduction points per descriptor", + limits = c(0, max(l$introsperdesc)), + breaks = seq(0, max(l$introsperdesc), 2)) + + scale_y_continuous("Cumulative fraction\n", labels = percent) + + scale_colour_hue("Service") + + ggtitle("Number of introduction points per descriptor\n") + +l <- read.csv("intros-per-relay.csv") +data <- rbind( + data.frame(x = sort(l[l$service == "4cjw6cwpeaeppfqz" , 2]), + y = (1:length(l[l$service == "4cjw6cwpeaeppfqz" , 2])), + colour = "4cjw6cwpeaeppfqz"), + data.frame(x = sort(l[l$service == "agorahooawayyfoe" , 2]), + y = (1:length(l[l$service == "agorahooawayyfoe" , 2])), + colour = "agorahooawayyfoe"), + data.frame(x = sort(l[l$service == "facebookcorewwwi" , 2]), + y = (1:length(l[l$service == "facebookcorewwwi" , 2])), + colour = "facebookcorewwwi"), + data.frame(x = sort(l[l$service == "kpvz7kpmcmne52qf" , 2]), + y = (1:length(l[l$service == "kpvz7kpmcmne52qf" , 2])), + colour = "kpvz7kpmcmne52qf"), + data.frame(x = sort(l[l$service == "xdndo2okt43cjx44" , 2]), + y = (1:length(l[l$service == "xdndo2okt43cjx44" , 2])), + colour = "xdndo2okt43cjx44")) + +ggplot(data) + + geom_step(aes(x = x, y = y, colour = as.factor(colour))) + + scale_x_continuous("\nIntroduction points established on same relay", + limits = c(0, max(data$x))) + + scale_y_continuous(paste("Cumulative number of relays used for", + "introduction points\n")) + + scale_colour_hue("Service") + + ggtitle(paste("Number of introduction points established on\nthe same", + "relay (in the measurement period)\n")) + +data <- data[as.character(data$colour) != "agorahooawayyfoe", ] +ggplot(data) + + geom_step(aes(x = x, y = y, colour = as.factor(colour))) + + scale_x_continuous("\nIntroduction points established on same relay", + limits = c(0, max(data$x, na.rm = TRUE))) + + scale_y_continuous(paste("Cumulative number of relays used for", + "introduction points\n")) + + scale_colour_hue("Service") + + ggtitle(paste("Number of introduction points established on\nthe same", + "relay (in the measurement period)\n")) + diff --git a/task-15513/src/ParseDescriptors.java b/task-15513/src/ParseDescriptors.java new file mode 100644 index 0000000..ef4d77f --- /dev/null +++ b/task-15513/src/ParseDescriptors.java @@ -0,0 +1,420 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.StringReader; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TimeZone; +import java.util.TreeMap; + +import javax.xml.bind.DatatypeConverter; + +import org.apache.commons.codec.binary.Base32; +import org.apache.commons.codec.digest.DigestUtils; + +public class ParseDescriptors { + public static void main(String[] args) throws IOException, + ParseException { + /* Parse all hidden-service descriptors in descriptors/ and extract + * permanent keys, publication times, and relay identities of + * introduction points. */ + SortedMap<String, /* <- service name */ + SortedMap<Long, /* <- publication time */ + List< /* <- list of descriptors */ + Set<String>>>> /* <- fingerprints and client keys */ + parsedDescriptors = parseDescriptors(new File("descriptors/")); + + /* Print verbose log. */ + printVerboseLog(parsedDescriptors, new File("lifetimes-verbose.txt")); + + /* Calculate lifetimes of introduction points, but exclude those that + * were contained in the first or last known descriptor of a + * service. */ + writeLifetimes(parsedDescriptors, new File("lifetimes.csv"), + "service,lifetime_millis"); + + /* Calculate the number of published descriptors per hour, including + * descriptor replicas. */ + writePublishedDescriptors(parsedDescriptors, + new File("published-descs.csv"), "service,publication,descs"); + + /* Calculate the number of distinct established introduction points + * per hour. */ + writeEstablishedIntroductionPoints(parsedDescriptors, + new File("established-intros.csv"), "service,publication,intros"); + + writeIntroPointsPerDescriptor(parsedDescriptors, + new File("intros-per-desc.csv"), + "service,publication,introsperdesc"); + + writeIntroductionPointsPerRelay(parsedDescriptors, + new File("intros-per-relay.csv"), "service,intros"); + } + + private static SortedMap<String, SortedMap<Long, List<Set<String>>>> + parseDescriptors(File descriptorsDirectory) throws IOException, + ParseException { + SortedMap<String, SortedMap<Long, List<Set<String>>>> + parsedDescriptors = new TreeMap<String, SortedMap<Long, + List<Set<String>>>>(); + for (File descriptorFile : descriptorsDirectory.listFiles()) { + String permanentKeyDigest = null; + long publicationMillis = -1L; + Set<String> introductionPoints = null; + BufferedReader br = new BufferedReader(new FileReader( + descriptorFile)); + String line; + DateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + while ((line = br.readLine()) != null) { + if (line.equals("permanent-key")) { + String permanentKey = appendLines(br, + "-----BEGIN RSA PUBLIC KEY-----", + "-----END RSA PUBLIC KEY-----"); + permanentKeyDigest = toDigest(permanentKey); + } else if (line.startsWith("publication-time ")) { + publicationMillis = dateTimeFormat.parse(line.substring( + "publication-time ".length())).getTime(); + } else if (line.equals("introduction-points")) { + String encodedIntroductionPoints = appendLines(br, + "-----BEGIN MESSAGE-----", "-----END MESSAGE-----"); + introductionPoints = parseEncodedIntroductionPoints( + encodedIntroductionPoints); + } + } + br.close(); + if (permanentKeyDigest != null && publicationMillis >= 0L && + introductionPoints != null) { + if (!parsedDescriptors.containsKey(permanentKeyDigest)) { + parsedDescriptors.put(permanentKeyDigest, + new TreeMap<Long, List<Set<String>>>()); + } + if (!parsedDescriptors.get(permanentKeyDigest).containsKey( + publicationMillis)) { + parsedDescriptors.get(permanentKeyDigest).put(publicationMillis, + new ArrayList<Set<String>>()); + } + parsedDescriptors.get(permanentKeyDigest).get( + publicationMillis).add(introductionPoints); + } + } + return parsedDescriptors; + } + + private static String toDigest(String encodedString) { + byte[] bytes = DatatypeConverter.parseBase64Binary(encodedString); + String str = new Base32().encodeAsString(DigestUtils.sha(bytes)); + return str.substring(0, 16).toLowerCase(); + } + + private static String appendLines(BufferedReader br, String skipLine, + String untilLine) throws IOException { + StringBuilder appendedLinesBuilder = new StringBuilder(); + String line = br.readLine(); + while ((line = br.readLine()) != null) { + if (line.equals(skipLine)) { + continue; + } + if (line.equals(untilLine)) { + break; + } + appendedLinesBuilder.append(line); + } + return appendedLinesBuilder.toString(); + } + + private static Set<String> parseEncodedIntroductionPoints( + String encodedIntroductionPoints) throws IOException { + Set<String> introductionPoints = new HashSet<String>(); + String decodedIntroductionPoints = new String( + DatatypeConverter.parseBase64Binary( + encodedIntroductionPoints)); + BufferedReader br = new BufferedReader(new StringReader( + decodedIntroductionPoints)); + String line = null, lastIntroductionPoint = null; + while ((line = br.readLine()) != null) { + if (line.startsWith("introduction-point ")) { + lastIntroductionPoint = line.substring( + "introduction-point ".length()); + } else if (line.equals("service-key")) { + String encodedServiceKey = appendLines(br, + "-----BEGIN RSA PUBLIC KEY-----", + "-----END RSA PUBLIC KEY-----"); + String serviceKeyDigest = toDigest(encodedServiceKey); + introductionPoints.add(lastIntroductionPoint + "-" + + serviceKeyDigest); + } + } + br.close(); + return introductionPoints; + } + + private static void printVerboseLog( + SortedMap<String, SortedMap<Long, List<Set<String>>>> + parsedDescriptors, File verboseLogFile) throws IOException { + BufferedWriter bw = new BufferedWriter(new FileWriter( + verboseLogFile)); + DateFormat dateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + dateTimeFormat.setLenient(false); + dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + bw.write("\nService " + e0.getKey() + "\n"); + for (Map.Entry<Long, List<Set<String>>> e1 : + e0.getValue().entrySet()) { + bw.write(" publication-time " + + dateTimeFormat.format(e1.getKey()) + "\n"); + int descriptor = 0; + for (Set<String> intro : e1.getValue()) { + bw.write(" descriptor " + descriptor++ + "\n"); + for (String i : intro) { + bw.write(" " + i + "\n"); + } + } + } + } + bw.close(); + } + + private static void writeLifetimes( + SortedMap<String, SortedMap<Long, List<Set<String>>>> + parsedDescriptors, File lifetimesCsvFile, String header) + throws IOException { + SortedMap<String, /* <- service name */ + List<Long>> /* <- lifetimes in milliseconds */ + calculatedLifetimes = new TreeMap<String, List<Long>>(); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + String permanentKey = e0.getKey(); + List<Long> lifetimes = new ArrayList<Long>(); + SortedMap<Long, List<Set<String>>> + publicationMillisIntroductionPoints = e0.getValue(); + long firstPublicationMillis = + publicationMillisIntroductionPoints.firstKey(); + Set<String> introductionPointsInFirstDescriptor = + new HashSet<String>(); + for (Set<String> introductionPoints : + publicationMillisIntroductionPoints.get( + firstPublicationMillis)) { + introductionPointsInFirstDescriptor.addAll(introductionPoints); + } + Map<String, long[]> liveIntroductionPoints = + new HashMap<String, long[]>(); + for (Map.Entry<Long, List<Set<String>>> e1 : + publicationMillisIntroductionPoints.tailMap( + firstPublicationMillis + 1L).entrySet()) { + long publicationMillis = e1.getKey(); + Set<String> introductionPoints = new HashSet<String>(); + for (Set<String> points : e1.getValue()) { + introductionPoints.addAll(points); + } + introductionPointsInFirstDescriptor.retainAll(introductionPoints); + for (String introductionPoint : introductionPoints) { + if (introductionPointsInFirstDescriptor.contains( + introductionPoint)) { + /* Skip introduction point, because it was contained in the + * first known descriptor of this service, and it could have + * been running for a while. */ + continue; + } else if (!liveIntroductionPoints.containsKey( + introductionPoint)) { + /* This introduction point is new, let's remember it. */ + liveIntroductionPoints.put(introductionPoint, + new long[] { publicationMillis, publicationMillis } ); + } else { + /* This introduction point is not new, and it's still there, + * update its last-seen timestamp. */ + liveIntroductionPoints.get(introductionPoint)[1] = + publicationMillis; + } + } + Set<String> deadIntroductionPoints = new HashSet<String>(); + for (Map.Entry<String, long[]> e2 : + liveIntroductionPoints.entrySet()) { + String introductionPoint = e2.getKey(); + + if (!introductionPoints.contains(introductionPoint)) { + long lifetime = e2.getValue()[1] - e2.getValue()[0] + + 3600000L; + lifetimes.add(lifetime); + deadIntroductionPoints.add(introductionPoint); + } + } + for (String introductionPoint : deadIntroductionPoints) { + liveIntroductionPoints.remove(introductionPoint); + } + } + calculatedLifetimes.put(permanentKey, lifetimes); + } + BufferedWriter bw = new BufferedWriter(new FileWriter( + lifetimesCsvFile)); + bw.write(header + "\n"); + for (Map.Entry<String, List<Long>> e : + calculatedLifetimes.entrySet()) { + for (long lifetime : e.getValue()) { + bw.write(e.getKey() + "," + lifetime + "\n"); + } + } + bw.close(); + } + + private static void writePublishedDescriptors(SortedMap<String, + SortedMap<Long, List<Set<String>>>> parsedDescriptors, File csvFile, + String header) throws IOException { + SortedMap<String, /* <- service name */ + SortedMap<Long, /* <- publication time */ + Integer>> /* <- number of published descriptors*/ + publishedDescriptors = + new TreeMap<String, SortedMap<Long, Integer>>(); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + String serviceName = e0.getKey(); + publishedDescriptors.put(serviceName, new TreeMap<Long, Integer>()); + for (Map.Entry<Long, List<Set<String>>> e1 : + e0.getValue().entrySet()) { + long publicationMillis = e1.getKey(); + int descriptors = e1.getValue().size(); + publishedDescriptors.get(serviceName).put(publicationMillis, + descriptors); + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter(csvFile)); + bw.write(header + "\n"); + for (Map.Entry<String, SortedMap<Long, Integer>> e0 : + publishedDescriptors.entrySet()) { + for (Map.Entry<Long, Integer> e1 : e0.getValue().entrySet()) { + bw.write(e0.getKey() + "," + e1.getKey() + "," + e1.getValue() + + "\n"); + } + } + bw.close(); + } + + private static void writeEstablishedIntroductionPoints(SortedMap<String, + SortedMap<Long, List<Set<String>>>> parsedDescriptors, File csvFile, + String header) throws IOException { + SortedMap<String, /* <- service name */ + SortedMap<Long, /* <- publication time */ + Integer>> /* <- number of published descriptors*/ + establishedIntros = + new TreeMap<String, SortedMap<Long, Integer>>(); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + String serviceName = e0.getKey(); + establishedIntros.put(serviceName, new TreeMap<Long, Integer>()); + for (Map.Entry<Long, List<Set<String>>> e1 : + e0.getValue().entrySet()) { + long publicationMillis = e1.getKey(); + Set<String> introductionPoints = new HashSet<String>(); + for (Set<String> points : e1.getValue()) { + introductionPoints.addAll(points); + } + establishedIntros.get(serviceName).put(publicationMillis, + introductionPoints.size()); + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter(csvFile)); + bw.write(header + "\n"); + for (Map.Entry<String, SortedMap<Long, Integer>> e0 : + establishedIntros.entrySet()) { + for (Map.Entry<Long, Integer> e1 : e0.getValue().entrySet()) { + bw.write(e0.getKey() + "," + e1.getKey() + "," + e1.getValue() + + "\n"); + } + } + bw.close(); + } + + private static void writeIntroPointsPerDescriptor(SortedMap<String, + SortedMap<Long, List<Set<String>>>> parsedDescriptors, + File csvFile, String header) throws IOException { + SortedMap<String, SortedMap<Long, List<Integer>>> + introPointsPerDescriptor = + new TreeMap<String, SortedMap<Long, List<Integer>>>(); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + String serviceName = e0.getKey(); + introPointsPerDescriptor.put(serviceName, + new TreeMap<Long, List<Integer>>()); + for (Map.Entry<Long, List<Set<String>>> e1 : + e0.getValue().entrySet()) { + long publicationMillis = e1.getKey(); + introPointsPerDescriptor.get(serviceName).put(publicationMillis, + new ArrayList<Integer>()); + for (Set<String> intros : e1.getValue()) { + introPointsPerDescriptor.get(serviceName).get( + publicationMillis).add(intros.size()); + } + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter(csvFile)); + bw.write(header + "\n"); + for (Map.Entry<String, SortedMap<Long, List<Integer>>> e0 : + introPointsPerDescriptor.entrySet()) { + for (Map.Entry<Long, List<Integer>> e1 : e0.getValue().entrySet()) { + for (int i : e1.getValue()) { + bw.write(e0.getKey() + "," + e1.getKey() + "," + i + "\n"); + } + } + } + bw.close(); + + } + + private static void writeIntroductionPointsPerRelay(SortedMap<String, + SortedMap<Long, List<Set<String>>>> parsedDescriptors, + File csvFile, String header) throws IOException { + SortedMap<String, /* <- service name */ + List<Integer>> /* <- established introduction point per relay */ + introductionPointsPerRelay = + new TreeMap<String, List<Integer>>(); + for (Map.Entry<String, SortedMap<Long, List<Set<String>>>> e0 : + parsedDescriptors.entrySet()) { + String serviceName = e0.getKey(); + SortedMap<String, Set<String>> clientKeysPerFingerprint = + new TreeMap<String, Set<String>>(); + for (List<Set<String>> descriptors : e0.getValue().values()) { + for (Set<String> descriptor : descriptors) { + for (String introductionPoint : descriptor) { + String[] parts = introductionPoint.split("-"); + String fingerprint = parts[0]; + String clientKey = parts[1]; + if (!clientKeysPerFingerprint.containsKey(fingerprint)) { + clientKeysPerFingerprint.put(fingerprint, + new HashSet<String>()); + } + clientKeysPerFingerprint.get(fingerprint).add(clientKey); + } + } + } + List<Integer> counted = new ArrayList<Integer>(); + for (Set<String> e1 : clientKeysPerFingerprint.values()) { + counted.add(e1.size()); + } + introductionPointsPerRelay.put(serviceName, counted); + } + BufferedWriter bw = new BufferedWriter(new FileWriter(csvFile)); + bw.write(header + "\n"); + for (Map.Entry<String, List<Integer>> e0 : + introductionPointsPerRelay.entrySet()) { + for (int e1 : e0.getValue()) { + bw.write(e0.getKey() + "," + e1 + "\n"); + } + } + bw.close(); + } +}
tor-commits@lists.torproject.org