Author: kloesing Date: 2011-05-18 20:15:43 +0000 (Wed, 18 May 2011) New Revision: 24768
Added: projects/archives/trunk/README Removed: projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java projects/archives/trunk/bridge-desc-sanitizer/HOWTO projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh projects/archives/trunk/exonerator/ExoneraTor.java projects/archives/trunk/exonerator/HOWTO projects/archives/trunk/exonerator/LICENSE projects/archives/trunk/exonerator/exonerator.py Log: Only leave a note that the code moved to Git.
Added: projects/archives/trunk/README =================================================================== --- projects/archives/trunk/README (rev 0) +++ projects/archives/trunk/README 2011-05-18 20:15:43 UTC (rev 24768) @@ -0,0 +1,7 @@ +--------------------------------------------------------------------------- + + THIS REPOSITORY HAS MOVED TO GIT! + + git clone git://git.torproject.org/metrics-utils/ + +---------------------------------------------------------------------------
Deleted: projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java =================================================================== --- projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,504 +0,0 @@ -import java.io.*; -import java.util.*; -import com.maxmind.geoip.*; -import org.apache.commons.codec.digest.*; -import org.apache.commons.codec.binary.*; - -public class ConvertBridgeDescs { - - public static void main(String[] args) throws Exception { - - /* If the following flag is set to true, don't write 127.0.0.1 for - * bridge IP addresses, but put replace IP addresses with - * H(IP address + bridge identity + secret)[:4] formatted as IP - * address. An example for the hash input would be: - * "12.34.56.78ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDpassword" - * (without quotes) */ - boolean hashIpAddresses = false; - String secret = "password"; - - long started = System.currentTimeMillis(); - - if (args.length < 5) { - System.err.println("Usage: java " - + ConvertBridgeDescs.class.getSimpleName() - + " <input directory> <geoip.txt file> <YYYY> <MM> " - + "<output directory>"); - System.exit(1); - } - File inDir = new File(args[0]); - File geoipFile = new File(args[1]); - LookupService cl = new LookupService(geoipFile, - LookupService.GEOIP_MEMORY_CACHE); - Set<String> unresolved = new HashSet<String>(); - unresolved.add("--"); - unresolved.add("a1"); - unresolved.add("a2"); - unresolved.add("eu"); - unresolved.add("ap"); - String year = args[2]; - String month = args[3]; - int yearInt = Integer.parseInt(year); - int monthInt = Integer.parseInt(month); - File outDir = new File(args[4] + File.separator - + "bridge-descriptors-" + year + "-" + month); - outDir.mkdirs(); - - SortedSet<File> statuses = new TreeSet<File>(); - Set<File> descriptors = new HashSet<File>(); - Set<File> extrainfos = new HashSet<File>(); - - System.out.println("Checking files in " + inDir.getAbsolutePath() - + "..."); - Stack<File> directoriesLeftToParse = new Stack<File>(); - directoriesLeftToParse.push(inDir); - String currentYearAndMonth = "from-tonga-" + year + "-" + month; - String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ? - "" + (yearInt - 1) + "-12" : - year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1)); - String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ? - "" + (yearInt + 1) + "-01" : - year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1)); - while (!directoriesLeftToParse.isEmpty()) { - File directoryOrFile = directoriesLeftToParse.pop(); - String filename = directoryOrFile.getName(); - if (directoryOrFile.isDirectory()) { - if (/* base directory */ - filename.equals(inDir.getName()) || - /* current month */ - filename.startsWith(currentYearAndMonth) || - /* last days of previous month */ - (filename.startsWith(previousYearAndMonth) - && Integer.parseInt(filename.substring(19, 21)) > 24) || - /* first days of next month */ - (filename.startsWith(nextYearAndMonth) - && Integer.parseInt(filename.substring(19, 21)) < 6)) { - for (File fileInDir : directoryOrFile.listFiles()) { - directoriesLeftToParse.push(fileInDir); - } - } - continue; - } - if (filename.startsWith("cached-extrainfo")) { - extrainfos.add(directoryOrFile); - } else if (filename.equals("bridge-descriptors")) { - descriptors.add(directoryOrFile); - } else if (filename.equals("networkstatus-bridges")) { - statuses.add(directoryOrFile); - } - } - - int days = ((extrainfos.size() / 2 + descriptors.size() - + statuses.size()) + 3 * 24) / (3 * 48); - System.out.println("Found " + extrainfos.size() - + " cached-extrainfo[.new] files, " + descriptors.size() - + " bridge-descriptors files, and " + statuses.size() - + " networkstatus-bridges files, covering approximately " + days - + " days."); - - System.out.print("Parsing server descriptors to find out country " - + "codes of bridges in extra-info descriptors"); - Map<String, String> bridgeCountries = new HashMap<String, String>(); - int parsed = 0; - for (File file : descriptors) { - if (parsed++ > descriptors.size() / days) { - System.out.print("."); - parsed = 0; - } - BufferedReader br = new BufferedReader(new FileReader(file)); - String line = null, routerLine = null; - while ((line = br.readLine()) != null) { - if (line.startsWith("router ")) { - routerLine = line; - } else if (line.startsWith("opt extra-info-digest ")) { - String extraInfoDigest = line.split(" ")[2]; - String countryCode = cl.getCountry(routerLine.split(" ")[2]). - getCode(); - if (bridgeCountries.containsKey(extraInfoDigest) && - !bridgeCountries.get(extraInfoDigest). - equals(countryCode)) { - System.out.println("Mapping already contains extra-info " - + "digest " + extraInfoDigest + " with different " - + "country. Exiting."); - System.exit(1); - } - bridgeCountries.put(extraInfoDigest, countryCode); - } - } - } - System.out.println("Mapping contains " + bridgeCountries.size() - + " entries."); - - System.out.print("Parsing extra-info descriptors"); - String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7", - "8", "9", "a", "b", "c", "d", "e", "f" }; - for (String x : hex) - for (String y : hex) - new File(outDir + File.separator + "extra-infos" + File.separator - + x + File.separator + y).mkdirs(); - int writtenExtrainfos = 0; - Map<String, String> extrainfoMapping = new HashMap<String, String>(); - parsed = 0; - for (File file : extrainfos) { - if (parsed++ > extrainfos.size() / days) { - System.out.print("."); - parsed = 0; - } - FileInputStream fis = new FileInputStream(file); - BufferedInputStream bis = new BufferedInputStream(fis); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - int startDescriptorIndex = -1, endDescriptorIndex = -1; - String asciiString = new String(allData, "US-ASCII"); - BufferedReader br = new BufferedReader(new StringReader( - asciiString)); - String line = null; - StringBuilder scrubbed = null; - boolean skipSignature = false; - boolean skipDescriptor = false; - while ((line = br.readLine()) != null) { - if (skipSignature && !line.equals("-----END SIGNATURE-----")) { - continue; - } else if (line.startsWith("extra-info ")) { - endDescriptorIndex = startDescriptorIndex = - asciiString.indexOf(line, startDescriptorIndex + 1); - scrubbed = new StringBuilder(DigestUtils.shaHex(Hex.decodeHex( - line.split(" ")[2].toCharArray())).toUpperCase() + "\n"); - } else if (line.startsWith("published ") - || line.startsWith("write-history ") - || line.startsWith("read-history ") - || line.startsWith("geoip-start-time ") - || line.startsWith("geoip-client-origins ") - || line.startsWith("bridge-stats-end ") - || line.startsWith("bridge-ips ")) { - scrubbed.append(line + "\n"); - } else if (line.startsWith("router-signature")) { - if (skipDescriptor) { - System.out.println("Skipping!"); - skipDescriptor = false; - } else { - endDescriptorIndex = asciiString.indexOf(line, - endDescriptorIndex + 1) + line.length() + 1; - byte[] forDigest = new byte[endDescriptorIndex - - startDescriptorIndex]; - System.arraycopy(allData, startDescriptorIndex, forDigest, 0, - endDescriptorIndex - startDescriptorIndex); - String originalHash = DigestUtils.shaHex(forDigest); - String countryCode = "ZZ"; - if (bridgeCountries.containsKey(originalHash.toUpperCase())) { - countryCode = bridgeCountries.get(originalHash.toUpperCase()); - } - String scrubbedDesc = "extra-info Unnamed" + countryCode + " " - + scrubbed.toString(); - String scrubbedHash = DigestUtils.shaHex(scrubbedDesc); - if (extrainfoMapping.containsKey(originalHash) && - !extrainfoMapping.get(originalHash).equals(scrubbedHash)) { - System.out.println("We already have an extra-info mapping " - + "from " + originalHash + " to " - + extrainfoMapping.get(originalHash) + ", but we now " - + "want to add a mapping to " + scrubbedHash - + ". Exiting"); - System.exit(1); - } - extrainfoMapping.put(originalHash, scrubbedHash); - File out = new File(outDir + File.separator + "extra-infos" - + File.separator + scrubbedHash.charAt(0) + File.separator - + scrubbedHash.charAt(1) + File.separator + scrubbedHash); - if (!out.exists()) { - BufferedWriter bw = new BufferedWriter(new FileWriter(out)); - bw.write(scrubbedDesc); - bw.close(); - writtenExtrainfos++; - } - } - } else if (line.equals("-----BEGIN SIGNATURE-----")) { - skipSignature = true; - } else if (line.equals("-----END SIGNATURE-----")) { - skipSignature = false; - } else if (line.startsWith("dirreq-") || line.startsWith("cell-") - || line.startsWith("exit-")) { - continue; - } else { - System.out.println("Unrecognized line '" + line + "'. Skipping"); - skipDescriptor = true; - } - } - br.close(); - } - System.out.println("\nWrote " + writtenExtrainfos - + " extra-info descriptors."); - - System.out.print("Parsing server descriptors"); - for (String x : hex) - for (String y : hex) - new File(outDir + File.separator + "server-descriptors" - + File.separator + x + File.separator + y).mkdirs(); - int writtenDescriptors = 0; - Map<String, String> descriptorMapping = new HashMap<String, String>(); - int found = 0, notfound = 0; - parsed = 0; - String haveExtraInfo = null; - for (File file : descriptors) { - if (parsed++ > descriptors.size() / days) { - System.out.print("."); - parsed = 0; - } - FileInputStream fis = new FileInputStream(file); - BufferedInputStream bis = new BufferedInputStream(fis); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - baos.write(data, 0, len); - } - bis.close(); - byte[] allData = baos.toByteArray(); - int startDescriptorIndex = -1, endDescriptorIndex = -1; - String asciiString = new String(allData, "US-ASCII"); - BufferedReader br = new BufferedReader(new StringReader( - asciiString)); - String line = null, country = null, originalAddress = null, - ipAddress = "127.0.0.1", routerLinePartOne = null, - routerLinePartTwo = null; - StringBuilder scrubbed = null; - boolean skipCrypto = false, contactWritten = false; - while ((line = br.readLine()) != null) { - if (skipCrypto && !line.startsWith("-----END ")) { - continue; - } else if (line.startsWith("router ")) { - endDescriptorIndex = startDescriptorIndex = - asciiString.indexOf(line, startDescriptorIndex + 1); - country = cl.getCountry(line.split(" ")[2]).getCode(). - toLowerCase(); - if (unresolved.contains(country)) { - country = "zz"; - } - originalAddress = line.split(" ")[2]; - scrubbed = new StringBuilder(); - routerLinePartOne = "router Unnamed" + country.toUpperCase(); - routerLinePartTwo = line.split(" ")[3] + " " - + line.split(" ")[4] + " " + line.split(" ")[5] + "\n"; - contactWritten = false; - haveExtraInfo = null; - } else if (line.startsWith("opt fingerprint ")) { - scrubbed.append("opt fingerprint"); - String fingerprint = DigestUtils.shaHex(Hex.decodeHex( - line.substring(16).replaceAll(" ", "").toCharArray())). - toUpperCase(); - for (int i = 0; i < fingerprint.length() / 4; i++) - scrubbed.append(" " + fingerprint.substring(4 * i, - 4 * (i + 1))); - scrubbed.append("\n"); - if (hashIpAddresses) { - byte[] hashedOctets = DigestUtils.sha(originalAddress - + line.substring(16).replaceAll(" ", "") + secret); - String hashedIp = ""; - for (int i = 0; i < 4; i++) { - hashedIp += "." + ((int) hashedOctets[i] + 256) % 256; - } - ipAddress = hashedIp.substring(1); - } - } else if (line.startsWith("contact ")) { - scrubbed.append("contact somebody at example dot " + country - + "\n"); - contactWritten = true; - } else if (line.startsWith("router-signature")) { - endDescriptorIndex = asciiString.indexOf(line, - endDescriptorIndex + 1) + line.length() + 1; - byte[] forDigest = new byte[endDescriptorIndex - - startDescriptorIndex]; - System.arraycopy(allData, startDescriptorIndex, forDigest, 0, - endDescriptorIndex - startDescriptorIndex); - String originalHash = DigestUtils.shaHex(forDigest); - String scrubbedDesc = routerLinePartOne + " " + ipAddress - + " " + routerLinePartTwo + scrubbed.toString(); - String scrubbedHash = DigestUtils.shaHex(scrubbedDesc); - if (descriptorMapping.containsKey(originalHash) && - !descriptorMapping.get(originalHash).equals(scrubbedHash)) { - System.out.println("We already have a descriptor mapping " - + "from " + originalHash + " to " - + descriptorMapping.get(originalHash) + ", but we now " - + "want to add a mapping to " + scrubbedHash - + ". Exiting"); - System.exit(1); - } - descriptorMapping.put(originalHash, scrubbedHash); - if (haveExtraInfo != null) { - File out = new File(outDir + File.separator - + "server-descriptors" + File.separator - + scrubbedHash.charAt(0) + File.separator - + scrubbedHash.charAt(1) + File.separator + scrubbedHash); - if (!out.exists()) { - BufferedWriter bw2 = new BufferedWriter(new FileWriter(out)); - bw2.write(scrubbedDesc); - bw2.close(); - writtenDescriptors++; - } - } - } else if (line.startsWith("opt extra-info-digest ")) { - String originalExtraInfo = line.split(" ")[2].toLowerCase(); - if (!extrainfoMapping.containsKey(originalExtraInfo)) { - notfound++; - haveExtraInfo = "0000000000000000000000000000000000000000"; - } else { - found++; - haveExtraInfo = extrainfoMapping.get(originalExtraInfo). - toUpperCase(); - } - scrubbed.append("opt extra-info-digest " + haveExtraInfo - + "\n"); - } else if (line.startsWith("reject ") - || line.startsWith("accept ")) { - if (!contactWritten) { - scrubbed.append("contact nobody at example dot " + country - + "\n"); - contactWritten = true; - } - scrubbed.append(line + "\n"); - } else if (line.startsWith("platform ") - || line.startsWith("opt protocols ") - || line.startsWith("published ") - || line.startsWith("uptime ") - || line.startsWith("bandwidth ") - || line.startsWith("opt hibernating ") - || line.equals("opt hidden-service-dir") - || line.equals("opt caches-extra-info") - || line.equals("opt allow-single-hop-exits")) { - scrubbed.append(line + "\n"); - } else if (line.startsWith("family ")) { - StringBuilder familyLine = new StringBuilder("family"); - for (String s : line.substring(7).split(" ")) { - if (s.startsWith("$")) { - familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex( - s.substring(1).toCharArray())).toUpperCase()); - } else { - familyLine.append(" Unnamed"); - } - } - scrubbed.append(familyLine.toString() + "\n"); - } else if (line.startsWith("@purpose ")) { - continue; - } else if (line.startsWith("-----BEGIN ") - || line.equals("onion-key") || line.equals("signing-key")) { - skipCrypto = true; - } else if (line.startsWith("-----END ")) { - skipCrypto = false; - } else { - System.out.println("Unrecognized line '" + line + "'. Exiting"); - System.exit(1); - } - } - br.close(); - } - System.out.println("\nWrote " + writtenDescriptors - + " bridge descriptors. While parsing, we found that we parsed " - + found + " extra-info identifiers before, but are missing " - + notfound + ". (The number of missing identifiers should be " - + "significantly smaller.)"); - - System.out.print("Parsing network statuses"); - parsed = notfound = found = 0; - for (File file : statuses) { - if (parsed++ > statuses.size() / days) { - System.out.print("."); - parsed = 0; - } - if (!file.getParent().substring(file.getParent(). - indexOf("from-tonga-")).startsWith(currentYearAndMonth)) { - continue; - } - BufferedReader br = new BufferedReader(new FileReader(file)); - String line = null; - StringBuilder scrubbed = new StringBuilder(); - while ((line = br.readLine()) != null) { - if (line.startsWith("r ")) { - String[] parts = line.split(" "); - String bridgeIdentity = parts[2] + "=="; - String hashedBridgeIdentity = Base64.encodeBase64String( - DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))). - substring(0, 27); - String descIdentifier = parts[3] + "=="; - String hexDescIdentifier = Hex.encodeHexString( - Base64.decodeBase64(descIdentifier)); - String replacementDescIdentifier = null; - if (!descriptorMapping.containsKey(hexDescIdentifier)) { - notfound++; - replacementDescIdentifier = "AAAAAAAAAAAAAAAAAAAAAAAAAAA"; - } else { - found++; - String refDesc = descriptorMapping.get(hexDescIdentifier). - toLowerCase(); - File descriptorFile = new File(outDir + File.separator - + "server-descriptors" + File.separator - + refDesc.charAt(0) + File.separator + refDesc.charAt(1) - + File.separator + refDesc); - if (!descriptorFile.exists()) { - System.out.println("Descriptor file '" - + descriptorFile.getAbsolutePath() + "' does not exist."); - System.exit(1); - } - replacementDescIdentifier = Base64.encodeBase64String( - Hex.decodeHex(descriptorMapping.get(hexDescIdentifier). - toCharArray())).substring(0, 27); - } - String country = cl.getCountry(parts[6]).getCode(). - toLowerCase(); - if (unresolved.contains(country)) { - country = "zz"; - } - String ipAddress = "127.0.0.1"; - if (hashIpAddresses) { - byte[] hashedOctets = DigestUtils.sha(parts[6] - + Hex.encodeHexString(Base64.decodeBase64( - bridgeIdentity)).toUpperCase() + secret); - String hashedIp = ""; - for (int i = 0; i < 4; i++) { - hashedIp += "." + ((int) hashedOctets[i] + 256) % 256; - } - ipAddress = hashedIp.substring(1); - } - scrubbed.append("r Unnamed" + country.toUpperCase() + " " - + hashedBridgeIdentity - + " " + replacementDescIdentifier + " " + parts[4] + " " - + parts[5] + " " + ipAddress + " " + parts[7] + " " - + parts[8] + "\n"); - } else if (line.startsWith("s ")) { - scrubbed.append(line + "\n"); - } else { - System.out.println("Unknown line: " + line); - System.exit(1); - } - } - String timeString = file.getParent().substring(file.getParent(). - indexOf("from-tonga-") + 11); - String[] date = timeString.substring(0, 10).split("-"); - String time = timeString.substring(11, 17); - File dir = new File(outDir + File.separator + "statuses" - + File.separator + date[2] + File.separator); - dir.mkdirs(); - File out = new File(dir.getAbsolutePath() + File.separator + date[0] - + date[1] + date[2] + "-" + time + "-" - + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D"); - if (!out.exists()) { - BufferedWriter bw3 = new BufferedWriter(new FileWriter(out)); - bw3.write(scrubbed.toString()); - bw3.close(); - } - } - System.out.println("\nWhile parsing, we found that we parsed " - + found + " bridge descriptors before, but are missing " - + notfound + ". (The number of missing identifiers should be " - + "significantly smaller.)"); - - long finished = System.currentTimeMillis(); - System.out.println("Processing took " + ((finished - started) / 1000) - + " seconds."); - } -} -
Deleted: projects/archives/trunk/bridge-desc-sanitizer/HOWTO =================================================================== --- projects/archives/trunk/bridge-desc-sanitizer/HOWTO 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/bridge-desc-sanitizer/HOWTO 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,138 +0,0 @@ -Bridge descriptor sanitizer - ---------------------------------------------------------------------------- - - THIS REPOSITORY HAS MOVED TO GIT! - - git clone git://git.torproject.org/metrics-utils/ - ---------------------------------------------------------------------------- - -Introduction: - -The bridge authority Tonga maintains a list of bridges in order to serve -bridge addresses and descriptors to its clients. Every half hour, Tonga -takes a snapshot of the known bridge descriptors and copies them to -byblos for later statistical analysis. As a guiding principle, the Tor -project makes all data that it uses for statistical analysis available to -the interested public, in order to maximize transparency towards the -community. However, the bridge descriptors contain the IP addresses and -other contact information of bridges that must not be made public, or the -purpose of bridges as non-public entry points into the Tor network would -be obsolete. This script takes the half-hourly snapshots as input, removes -all possibly sensitive information from the descriptors, and puts out the -sanitized bridge descriptors that are safe to be published. - ---------------------------------------------------------------------------- - -Processing steps: - -The following steps are taken to remove all potentially sensitive -information from the bridge descriptors while keeping them useful for -statistical analysis. - -1. Replace the bridge identity with its SHA1 value - - Clients can request a bridge's current descriptor by sending its - identity string to the bridge authority. This is a feature to make - bridges on dynamic IP addresses useful. Therefore, the original - identities (and anything that could be used to derive them) need to be - removed from the descriptors. The bridge identity is replaced with its - SHA1 hash value. The idea is to have a consistent replacement that - remains stable over months or even years (without keeping a secret for a - keyed hash function). - -2. Remove all cryptographic keys and signatures - - It would be straightforward to learn about the bridge identity from the - bridge's public key. Replacing keys by newly generated ones seemed to be - unnecessary (and would involve keeping a state over months/years), so - that all cryptographic objects have simply been removed. - -3. Replace IP address with 127.0.0.1 - - Of course, the IP address needs to be removed, too. However, the IP - address is resolved to a country code first and the result written to - the contact line as "somebody at example dot de" for Germany, etc. The - ports are kept unchanged though. - -4. Replace contact information - - If there is contact information in a descriptor, the contact line is - changed to "somebody at ...". If there is none, a contact line is added - saying "nobody at ..." in order to put in the country code. If the - bridge's IP address cannot be resolved to a country, the unassigned - country code "zz" is written to the contact line. - -5. Replace nickname with UnnamedCC - - The bridge nicknames might give hints on the location of the bridge if - chosen without care; e.g. a bridge nickname might be very similar to the - operators' relay nicknames which might be located on adjacent IP - addresses. All bridge nicknames are therefore replaced with the string - UnnamedCC with CC being the upper-case country code. - -6. Replace references to descriptors - - Changing anything in the server descriptors or extra-info descriptors - invalidates the references from network statuses or server descriptors, - respectively. All references are replaced with the new hashes of - referenced descriptors, if available. In case of missing descriptors, - references are replaced with all zeros (or 'A's in base 64 encoding). - -Note that these processing steps only prevent people from learning about -new bridge locations. People who already know a bridge identity or location -can easily learn more about this bridge from the sanitized descriptors. -This is useful for statistical analysis, e.g. to filter out bridges that -have been running as relays before. - ---------------------------------------------------------------------------- - -Quick Start: - -The following steps are necessary to process the half-hourly snapshots as -collected by moria: - -- Install Java 5 or higher. - -- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding - from http://commons.apache.org/codec/ and place the .jar (in the - following assumed to be commons-codec-1.4.jar) in the same directory as - this HOWTO file. - -- Download MaxMind GeoIP Java library from http://geolite.maxmind.com/ - download/geoip/api/java/ and generate a JAR file as described in the - README file. Place the resulting maxmindgeoip.jar in the same directory - as this HOWTO file. - -- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz - in a directory called data/ in the same directory as this HOWTO file. - -- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/ - to separate directories in the newly created subdirectory in/ . - -- Put the binary MaxMind GeoIP database file that shall be used for - resolving IP addresses to country codes in the same directory as this - HOWTO file. Either the free or the commercial version of the database - can be used. For the archives provided by The Tor Project, the first - available commercial version of the subsequent month is used. - -- Compile the Java class using - - $ javac -cp commons-codec-1.4.jar:maxmindgeoip.jar - ConvertBridgeDescs.java - -- Run the script, providing it with the parameters it needs: - - java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs - <input directory> <geoip database file> <YYYY> <MM> - <output directory> - - Note that YYYY and MM specify the month that shall be processed. The other - descriptors in the input directory are ignored. - - A sample invocation might be: - - $ java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs - in/ GeoIP-106_20081101.dat 2008 10 out/ -
Deleted: projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh =================================================================== --- projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,8 +0,0 @@ -#!/bin/bash -mkdir "in/" -for i in `ls data/ | cut -c 1-29` -do -mkdir "in/"$i -tar -C "in/"$i -xf "data/"$i".tar.gz" -done -
Deleted: projects/archives/trunk/exonerator/ExoneraTor.java =================================================================== --- projects/archives/trunk/exonerator/ExoneraTor.java 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/exonerator/ExoneraTor.java 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,404 +0,0 @@ -/* Copyright 2009 The Tor Project - * See LICENSE for licensing information */ - -import java.io.*; -import java.math.*; -import java.text.*; -import java.util.*; -import org.bouncycastle.util.encoders.Base64; - -public final class ExoneraTor { - - public static void main(final String[] args) throws Exception { - - // check parameters - if (args.length < 4 || args.length > 5) { - System.err.println("\nUsage: java " - + ExoneraTor.class.getSimpleName() - + " <descriptor archive directory> <IP address in question> " - + "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " - + "[<target address>[:<target port>]]\n"); - return; - } - File archiveDirectory = new File(args[0]); - if (!archiveDirectory.exists() || !archiveDirectory.isDirectory()) { - System.err.println("\nDescriptor archive directory + " - + archiveDirectory.getAbsolutePath() - + " does not exist or is not a directory.\n"); - return; - } - String relayIP = args[1]; - String timestampStr = args[2] + " " + args[3]; - SimpleDateFormat timeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - long timestamp = timeFormat.parse(timestampStr).getTime(); - String target = null, targetIP = null, targetPort = null; - String[] targetIPParts = null; - if (args.length > 4) { - target = args[4]; - if (target.contains(":")) { - targetIP = target.split(":")[0]; - targetPort = target.split(":")[1]; - } else { - targetIP = target; - } - targetIPParts = targetIP.replace(".", " ").split(" "); - } - String DELIMITER = "--------------------------------------------------" - + "-------------------------"; - System.out.println("\nTrying to find out whether " + relayIP + " was " - + "running as a Tor relay at " + timestampStr - + (target != null ? " permitting exiting to " + target : "") - + "...\n\n" + DELIMITER); - - // check that we have the required archives - long timestampTooOld = timestamp - 300 * 60 * 1000; - long timestampFrom = timestamp - 180 * 60 * 1000; - long timestampTooNew = timestamp + 120 * 60 * 1000; - Calendar calTooOld = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - Calendar calFrom = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - Calendar calTooNew = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - calTooOld.setTimeInMillis(timestampTooOld); - calFrom.setTimeInMillis(timestampFrom); - calTooNew.setTimeInMillis(timestampTooNew); - System.out.printf("%nChecking that relevant archives between " - + "%tF %<tT and %tF %<tT are available...%n", calTooOld, - calTooNew); - SortedSet<String> requiredDirs = new TreeSet<String>(); - requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooOld)); - requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooNew)); - if (target != null) { - requiredDirs.add(String.format("server-descriptors-%tY-%<tm", - calTooOld)); - requiredDirs.add(String.format("server-descriptors-%tY-%<tm", - calTooNew)); - } - SortedSet<File> consensusDirs = new TreeSet<File>(); - SortedSet<File> descriptorsDirs = new TreeSet<File>(); - Stack<File> directoriesLeftToParse = new Stack<File>(); - directoriesLeftToParse.push(archiveDirectory); - while (!directoriesLeftToParse.isEmpty()) { - File directoryOrFile = directoriesLeftToParse.pop(); - if (directoryOrFile.getName().startsWith("consensuses-")) { - if (requiredDirs.contains(directoryOrFile.getName())) { - requiredDirs.remove(directoryOrFile.getName()); - consensusDirs.add(directoryOrFile); - } - } else if (directoryOrFile.getName().startsWith( - "server-descriptors-")) { - if (requiredDirs.contains(directoryOrFile.getName())) { - requiredDirs.remove(directoryOrFile.getName()); - descriptorsDirs.add(directoryOrFile); - } - } else { - for (File fileInDir : directoryOrFile.listFiles()) - if (fileInDir.isDirectory()) - directoriesLeftToParse.push(fileInDir); - } - } - for (File dir : consensusDirs) - System.out.println(" " + dir.getAbsolutePath()); - for (File dir : descriptorsDirs) - System.out.println(" " + dir.getAbsolutePath()); - if (!requiredDirs.isEmpty()) { - System.out.println("\nWe are missing consensuses and/or server " - + "descriptors. Please download these archives and extract them " - + "to your data directory. Be sure NOT to rename the extracted " - + "directories or the contained files."); - for (String dir : requiredDirs) - System.out.println(" " + dir + ".tar.bz2"); - return; - } - - // look for consensus files - System.out.printf("%nLooking for relevant consensuses between " - + "%tF %<tT and %s...%n", calFrom, timestampStr); - SortedSet<File> tooOldConsensuses = new TreeSet<File>(); - SortedSet<File> relevantConsensuses = new TreeSet<File>(); - SortedSet<File> tooNewConsensuses = new TreeSet<File>(); - directoriesLeftToParse.clear(); - for (File consensusDir : consensusDirs) - directoriesLeftToParse.push(consensusDir); - SimpleDateFormat consensusTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd-HH-mm-ss"); - consensusTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - while (!directoriesLeftToParse.isEmpty()) { - File directoryOrFile = directoriesLeftToParse.pop(); - if (directoryOrFile.isDirectory()) { - for (File fileInDir : directoryOrFile.listFiles()) { - directoriesLeftToParse.push(fileInDir); - } - continue; - } else { - String filename = directoryOrFile.getName(); - if (filename.endsWith("consensus")) { - long consensusTime = consensusTimeFormat.parse( - filename.substring(0, 19)).getTime(); - if (consensusTime >= timestampTooOld && - consensusTime < timestampFrom) - tooOldConsensuses.add(directoryOrFile); - else if (consensusTime >= timestampFrom && - consensusTime <= timestamp) - relevantConsensuses.add(directoryOrFile); - else if (consensusTime > timestamp && - consensusTime <= timestampTooNew) - tooNewConsensuses.add(directoryOrFile); - } - } - } - SortedSet<File> allConsensuses = new TreeSet<File>(); - allConsensuses.addAll(tooOldConsensuses); - allConsensuses.addAll(relevantConsensuses); - allConsensuses.addAll(tooNewConsensuses); - if (allConsensuses.isEmpty()) { - System.out.println(" None found!\n\n" + DELIMITER + "\n\nResult is " - + "INDECISIVE!\n\nWe cannot make any statement about IP address " - + relayIP + " being a relay at " + timestampStr + " or not! We " - + "did not find any relevant consensuses preceding the given " - + "time. This either means that you did not download and " - + "extract the consensus archives preceding the hours before " - + "the given time, or (in rare cases) that the directory " - + "archives are missing the hours before the timestamp. Please " - + "check that your directory archives contain consensus files " - + "of the interval 5:00 hours before and 2:00 hours after the " - + "time you are looking for.\n"); - return; - } - for (File f : relevantConsensuses) - System.out.println(" " + f.getAbsolutePath()); - - // parse consensuses to find descriptors belonging to the IP address - System.out.println("\nLooking for descriptor identifiers referenced " - + "in "r " lines in these consensuses containing IP address " - + relayIP + "..."); - SortedSet<File> positiveConsensusesNoTarget = new TreeSet<File>(); - Set<String> addressesInSameNetwork = new HashSet<String>(); - SortedMap<String, Set<File>> relevantDescriptors = - new TreeMap<String, Set<File>>(); - for (File consensus : allConsensuses) { - if (relevantConsensuses.contains(consensus)) - System.out.println(" " + consensus.getAbsolutePath()); - BufferedReader br = new BufferedReader(new FileReader(consensus)); - String line; - while ((line = br.readLine()) != null) { - if (!line.startsWith("r ")) - continue; - String[] parts = line.split(" "); - String address = parts[6]; - if (address.equals(relayIP)) { - byte[] result = Base64.decode(parts[3] + "=="); - String hex = String.format("%040x", new BigInteger(1, - Base64.decode(parts[3] + "=="))); - if (!relevantDescriptors.containsKey(hex)) - relevantDescriptors.put(hex, new HashSet<File>()); - relevantDescriptors.get(hex).add(consensus); - positiveConsensusesNoTarget.add(consensus); - if (relevantConsensuses.contains(consensus)) - System.out.println(" "" + line + "" references " - + "descriptor " + hex); - } else { - if (relayIP.startsWith(address.substring(0, - address.lastIndexOf(".")))) { - addressesInSameNetwork.add(address); - } - } - } - br.close(); - } - if (relevantDescriptors.isEmpty()) { - System.out.printf(" None found!\n\n" + DELIMITER + "\n\nResult is " - + "NEGATIVE with moderate certainty!\n\nWe did not find IP " - + "address " + relayIP + " in any of the consensuses that were " - + "published between %tF %<tT and %tF %<tT.\n\nA possible " - + "reason for false negatives is that the relay is using a " - + "different IP address when generating a descriptor than for " - + "exiting to the Internet. We hope to provide better checks " - + "for this case in the future.", calTooOld, calTooNew); - if (!addressesInSameNetwork.isEmpty()) { - System.out.println("\n\nThe following other IP addresses of Tor " - + "relays were found in the mentioned consensus files that " - + "are in the same /24 network and that could be related to " - + "IP address " + relayIP + ":"); - for (String s : addressesInSameNetwork) { - System.out.println(" " + s); - } - } - System.out.println(); - return; - } - - // parse router descriptors to check exit policies - SortedSet<File> positiveConsensuses = new TreeSet<File>(); - Set<String> missingDescriptors = new HashSet<String>(); - if (target != null) { - System.out.println("\nChecking if referenced descriptors permit " - + "exiting to " + target + "..."); - Set<String> descriptors = relevantDescriptors.keySet(); - missingDescriptors.addAll(relevantDescriptors.keySet()); - directoriesLeftToParse.clear(); - for (File descriptorsDir : descriptorsDirs) - directoriesLeftToParse.push(descriptorsDir); - while (!directoriesLeftToParse.isEmpty()) { - File directoryOrFile = directoriesLeftToParse.pop(); - if (directoryOrFile.isDirectory()) { - for (File fileInDir : directoryOrFile.listFiles()) { - directoriesLeftToParse.push(fileInDir); - } - continue; - } else { - String filename = directoryOrFile.getName(); - for (String descriptor : descriptors) { - if (filename.equals(descriptor)) { - missingDescriptors.remove(descriptor); - BufferedReader br = new BufferedReader( - new FileReader(directoryOrFile)); - String line; - while ((line = br.readLine()) != null) { - if (line.startsWith("reject ") || - line.startsWith("accept ")) { - boolean ruleAccept = line.split(" ")[0].equals("accept"); - String ruleAddress = line.split(" ")[1].split(":")[0]; - if (!ruleAddress.equals("*")) { - if (!ruleAddress.contains("/") && - !ruleAddress.equals(targetIP)) - continue; // IP address does not match - String[] ruleIPParts = ruleAddress.split("/")[0]. - replace(".", " ").split(" "); - int ruleNetwork = ruleAddress.contains("/") ? - Integer.parseInt(ruleAddress.split("/")[1]) : 32; - for (int i = 0; i < 4; i++) { - if (ruleNetwork == 0) { - break; - } else if (ruleNetwork >= 8) { - if (ruleIPParts[i].equals(targetIPParts[i])) - ruleNetwork -= 8; - else - break; - } else { - int mask = 255 ^ 255 >>> ruleNetwork; - if ((Integer.parseInt(ruleIPParts[i]) & mask) == - (Integer.parseInt(targetIPParts[i]) & mask)) - ruleNetwork = 0; - break; - } - } - if (ruleNetwork > 0) - continue; // IP address does not match - } - String rulePort = line.split(" ")[1].split(":")[1]; - if (targetPort == null && !ruleAccept && - !rulePort.equals("*")) - continue; // with no port given, we only consider - // reject :* rules as matching - if (targetPort != null) { - if (!rulePort.equals("*") && - !targetPort.equals(rulePort)) - continue; // ports do not match - } - boolean relevantMatch = false; - for (File f : relevantDescriptors.get(descriptor)) - if (relevantConsensuses.contains(f)) - relevantMatch = true; - if (relevantMatch) - System.out.println(" " - + directoryOrFile.getAbsolutePath() + " " - + (ruleAccept ? "permits" : "does not permit") - + " exiting to " + target + " according to rule "" - + line + """); - if (ruleAccept) - positiveConsensuses.addAll( - relevantDescriptors.get(descriptor)); - break; - } - } - br.close(); - } - } - } - } - } - - // print out result - Set<File> matches = (target != null) ? positiveConsensuses - : positiveConsensusesNoTarget; - if (matches.contains(relevantConsensuses.last())) { - System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE with " - + "high certainty!\n\nWe found one or more relays on IP address " - + relayIP - + (target != null ? " permitting exit to " + target : "") - + " in the most recent consensus preceding " + timestampStr - + " that clients were likely to know.\n"); - return; - } - boolean resultIndecisive = target != null - && !missingDescriptors.isEmpty(); - if (resultIndecisive) { - System.out.println("\n" + DELIMITER + "\n\nResult is INDECISIVE!\n\n" - + "At least one referenced descriptor could not be found. This " - + "is a rare case, but one that (apparently) happens. We cannot " - + "make any good statement about exit relays without these " - + "descriptors. The following descriptors are missing:"); - for (String desc : missingDescriptors) - System.out.println(" " + desc); - } - boolean inOtherRelevantConsensus = false, inTooOldConsensuses = false, - inTooNewConsensuses = false; - for (File f : matches) - if (relevantConsensuses.contains(f)) - inOtherRelevantConsensus = true; - else if (tooOldConsensuses.contains(f)) - inTooOldConsensuses = true; - else if (tooNewConsensuses.contains(f)) - inTooNewConsensuses = true; - if (inOtherRelevantConsensus) { - if (!resultIndecisive) - System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE " - + "with moderate certainty!"); - System.out.println("\nWe found one or more relays on IP address " - + relayIP - + (target != null ? " permitting exit to " + target : "") - + ", but not in the consensus immediately preceding " - + timestampStr + ". A possible reason for the relay being " - + "missing in the last consensus preceding the given time might " - + "be that some of the directory authorities had difficulties " - + "connecting to the relay. However, clients might still have " - + "used the relay."); - } else { - if (!resultIndecisive) - System.out.println("\n" + DELIMITER + "\n\nResult is NEGATIVE " - + "with high certainty!"); - System.out.println("\nWe did not find any relay on IP address " - + relayIP - + (target != null ? " permitting exit to " + target : "") - + " in the consensuses 3:00 hours preceding " + timestampStr - + "."); - if (inTooOldConsensuses || inTooNewConsensuses) { - if (inTooOldConsensuses && !inTooNewConsensuses) - System.out.println("\nNote that we found a matching relay in " - + "consensuses that were published between 5:00 and 3:00 " - + "hours before " + timestampStr + "."); - else if (!inTooOldConsensuses && inTooNewConsensuses) - System.out.println("\nNote that we found a matching relay in " - + "consensuses that were published up to 2:00 hours after " - + timestampStr + "."); - else - System.out.println("\nNote that we found a matching relay in " - + "consensuses that were published between 5:00 and 3:00 " - + "hours before and in consensuses that were published up " - + "to 2:00 hours after " + timestampStr + "."); - System.out.println("Make sure that the timestamp you provided is " - + "in the correct timezone: UTC (or GMT)."); - } - } - if (target != null) { - if (positiveConsensuses.isEmpty() && - !positiveConsensusesNoTarget.isEmpty()) - System.out.println("\nNote that although the found relay(s) did " - + "not permit exiting to " + target + ", there have been one " - + "or more relays running at the given time."); - } - System.out.println(); - } -} -
Deleted: projects/archives/trunk/exonerator/HOWTO =================================================================== --- projects/archives/trunk/exonerator/HOWTO 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/exonerator/HOWTO 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,165 +0,0 @@ -ExoneraTor - or: a script that tells you whether some IP address was a Tor relay - ---------------------------------------------------------------------------- - - THIS REPOSITORY HAS MOVED TO GIT! - - git clone git://git.torproject.org/metrics-utils/ - ---------------------------------------------------------------------------- - -Introduction: - -Some people have expressed the desire to learn whether a given IP address -has been a Tor relay at a certain time. In addition to that, these people -might want to know whether the IP address permitted exit to a given address -and port. - -Answering these questions can be important for Tor relay operators to show -to the authorities that an anonymous user might have conducted bad things -with their IP address. Likewise, police investigators might be interested -in the answer to these questions, too, in order to decide whether to -proceed with their investigations or not. - -We can answer the above questions from looking at the descriptor archives -that are available since late 2007 (or even beyond, but this script only -works with the data format that was produced starting in October 2007). -This script parses the directory archives to print out the answer whether -a certain IP address was a Tor relay at a given time. The script further -prints out all intermediate steps in answering this, so that users can -confirm the correctness of the result themselves. - -This script is available in two versions written in Python and in Java with -equivalent functionality. - ---------------------------------------------------------------------------- - -Python Quick Start: - -In order to run the Python version of this script, you need to install and -download the following software and data (please note that all instructions -are written for Linux; commands for Windows or Mac OS X may vary): - -- Install Python 2.6.2 or higher. (Previous Python versions might work, - too, but have not been tested.) - -- Install the Python module IPy 0.62 or higher either from - http://pypi.python.org/pypi/IPy/ or using "apt-get install python-ipy" on - Debian-based systems. - -- Download the v3 consensuses and server descriptors of the relevant time - from http://metrics.torproject.org/data.html and extract them to a - directory in your working directory, e.g. /home/you/exonerator/data/ . - Don't rename the extracted directories or any of the contained files, or - the script won't find the contained descriptors. - - Note that you only need the server descriptors if you want to learn - whether a given IP address permits exiting to a given target. If you - only want to learn whether that IP address was a Tor relay, you don't - need them. - -- Run the script, providing it with the parameters it needs: - - python exonerator.py [--archive=<descriptor archive directory>] - <IP address in question> - <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> - [<target address>[:<target port>]] - - The --archive option defaults to data/ . In the following examples, it is - assumed that this default applies. - - Make sure that the timestamp is provided in UTC, which is equivalent to - GMT, and not in your local timezone! Otherwise, results will very likely - be wrong. - - A sample invocation might be: - - $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 - 209.85.129.104:80 - ---------------------------------------------------------------------------- - -Java Quick Start: - -In order to run the Java version of this script, you need to install and -download the following software and data (please note that all instructions -are written for Linux; commands for Windows or Mac OS X may vary): - -- Install Java 6 or higher. - -- Download the BouncyCastle provider that includes Base 64 decoding from - http://www.bouncycastle.org/download/bcprov-jdk16-143.jar and put it in - your working directory, e.g. /home/you/exonerator/ . - -- Download the v3 consensuses and server descriptors of the relevant time - from http://metrics.torproject.org/data.html and extract them to a - directory in your working directory, e.g. /home/you/exonerator/data/ . - Don't rename the extracted directories or any of the contained files, or - the script won't find the contained descriptors. - - Note that you only need the server descriptors if you want to learn - whether a given IP address permits exiting to a given target. If you - only want to learn whether that IP address was a Tor relay, you don't - need them. - -- Compile the (single) Java class using this command: - - $ javac -cp bcprov-jdk16-143.jar ExoneraTor.java - -- Run the script, providing it with the parameters it needs: - - java -cp .:bcprov-jdk16-143.jar ExoneraTor - <descriptor archive directory> - <IP address in question> - <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> - [<target address>[:<target port>]] - - Make sure that the timestamp is provided in UTC, which is equivalent to - GMT, and not in your local timezone! Otherwise, results will very likely - be wrong. - - A sample invocation might be: - - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \ - 2009-08-15 16:05:00 209.85.129.104:80 - ---------------------------------------------------------------------------- - -Test cases: - -The following test cases work with the August 2009 archives and can be used -to check whether this script works correctly: - -- Positive result of echelon1+2 being a relay: - - $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \ - 2009-08-15 16:05:00 - -- Positive result of echelon1+2 exiting to google.com on any port - - $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 209.85.129.104 - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \ - 2009-08-15 16:05:00 209.85.129.104 - -- Positive result of echelon1+2 exiting to google.com on port 80 - - $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \ - 209.85.129.104:80 - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \ - 2009-08-15 16:05:00 209.85.129.104:80 - -- Negative result of echelon1+2 exiting to google.com, but not on port 25 - - $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \ - 209.85.129.104:25 - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \ - 2009-08-15 16:05:00 209.85.129.104:25 - -- Negative result with IP address of echelon1+2 changed in the last octet - - $ python exonerator.py 209.17.171.50 2009-08-15 16:05:00 - $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.50 \ - 2009-08-15 16:05:00 -
Deleted: projects/archives/trunk/exonerator/LICENSE =================================================================== --- projects/archives/trunk/exonerator/LICENSE 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/exonerator/LICENSE 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,30 +0,0 @@ -Copyright 2009 The Tor Project - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - * Neither the names of the copyright owners nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -
Deleted: projects/archives/trunk/exonerator/exonerator.py =================================================================== --- projects/archives/trunk/exonerator/exonerator.py 2011-05-17 22:47:52 UTC (rev 24767) +++ projects/archives/trunk/exonerator/exonerator.py 2011-05-18 20:15:43 UTC (rev 24768) @@ -1,370 +0,0 @@ -#!/usr/bin/env python -# Copyright 2009 The Tor Project -- see LICENSE for licensing information - -import binascii -import os -import sys -import time -import calendar -from optparse import OptionParser -from IPy import IP - -USAGE = "usage: %prog [options] <IP address in question> " \ - "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " \ - "[<target address>[:<target port>]]" -DELIMITER = "-" * 75 - -if __name__ == '__main__': - # check parameters - parser = OptionParser(usage=USAGE) - parser.add_option("-a", "--archive", dest="archive", default="data/", - help="descriptor archive directory") - (options, args) = parser.parse_args() - if len(args) not in (3, 4): - parser.error("incorrect number of arguments") - if not os.path.isdir(options.archive): - parser.error("descriptor archive directory %s does not exist or " \ - "is not a directory." % \ - os.path.abspath(options.archive)) - archiveDirectory = os.path.dirname(options.archive) - try: - relayIP = IP(args[0]) - except ValueError: - parser.error("invalid IP address in question: '%s'" % args[0]) - timestampStr = "%s %s" % (args[1], args[2]) - try: - timestamp = time.strptime(timestampStr + " UTC", "%Y-%m-%d %H:%M:%S %Z") - except ValueError: - parser.error("incorrect time format: '%s'" % timestampStr) - # if a target is given, parse address and possibly port part of it - target = None - targetIP = None - targetPort = None - if len(args) == 4: - target = args[3] - targetParts = target.split(":") - try: - targetIP = IP(targetParts[0]) - except ValueError: - parser.error("invalid target IP address in: '%s'" % args[3]) - if len(targetParts) > 2: - parser.error("invalid target format: '%s'" % args[3]) - if len(targetParts) > 1: - try: - targetPortTest = int(targetParts[1]) - except ValueError: - parser.error("invalid target port number in: '%s'" % \ - args[3]) - if targetPortTest not in range(1, 65535): - parser.error("invalid target port number in: '%s'" % \ - args[3]) - targetPort = targetParts[1] - - targetHelpStr = "" - if target: - targetHelpStr = " permitting exiting to %s" % target - print "\nTrying to find out whether %s was running a Tor relay at " \ - "%s%s...\n\n%s\n" % (relayIP, timestampStr, targetHelpStr, - DELIMITER) - - # check that we have the required archives - timestampTooOld = time.gmtime(calendar.timegm(timestamp) - 300 * 60) - timestampFrom = time.gmtime(calendar.timegm(timestamp) - 180 * 60) - timestampTooNew = time.gmtime(calendar.timegm(timestamp) + 120 * 60) - timestampTooOldStr = time.strftime("%Y-%m-%d %H:%M:%S", - timestampTooOld) - timestampFromStr = time.strftime("%Y-%m-%d %H:%M:%S", timestampFrom) - timestampTooNewStr = time.strftime("%Y-%m-%d %H:%M:%S", - timestampTooNew) - print "\nChecking that relevant archives between %s and %s are " \ - "available..." % (timestampTooOldStr, timestampTooNewStr) - - requiredDirs = set() - requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooOld)) - requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooNew)) - if target: - requiredDirs.add(time.strftime("server-descriptors-%Y-%m", - timestampTooOld)) - requiredDirs.add(time.strftime("server-descriptors-%Y-%m", - timestampTooNew)) - - consensusDirs = list() - descriptorsDirs = list() - directoriesLeftToParse = list() - directoriesLeftToParse.append(archiveDirectory) - - while directoriesLeftToParse: - directoryOrFile = directoriesLeftToParse.pop() - basename = os.path.basename(directoryOrFile) - if basename.startswith("consensuses-"): - if basename in requiredDirs: - requiredDirs.remove(basename) - consensusDirs.append(directoryOrFile) - elif basename.startswith("server-descriptors-"): - if basename in requiredDirs: - requiredDirs.remove(basename) - descriptorsDirs.append(directoryOrFile) - else: - for filename in os.listdir(directoryOrFile): - entry = "%s/%s" % (directoryOrFile, filename) - if os.path.isdir(entry): - directoriesLeftToParse.append(entry) - - consensusDirs.sort() - for consensusDir in consensusDirs: - print " %s" % consensusDir - descriptorsDirs.sort() - for descriptorsDir in descriptorsDirs: - print " %s" % descriptorsDir - - if requiredDirs: - print "\nWe are missing consensuses and/or server descriptors. " \ - "Please download these archives and extract them to your " \ - "data directory. Be sure NOT to rename the extracted " \ - "directories or the contained files." - for requiredDir in sorted(requiredDirs): - print " %s.tar.bz2" % requiredDir - sys.exit() - - # look for consensus files - print "\nLooking for relevant consensuses between %s and %s..." % \ - (timestampFromStr, timestampStr) - tooOldConsensuses = set() - relevantConsensuses = set() - tooNewConsensuses = set() - directoriesLeftToParse = list(consensusDirs) - while directoriesLeftToParse: - directoryOrFile = directoriesLeftToParse.pop() - if os.path.isdir(directoryOrFile): - for filename in os.listdir(directoryOrFile): - entry = "%s/%s" % (directoryOrFile, filename) - directoriesLeftToParse.append(entry) - else: - basename = os.path.basename(directoryOrFile) - if (basename.endswith("consensus")): - consensusTime = time.strptime(basename[0:19], - "%Y-%m-%d-%H-%M-%S") - if consensusTime >= timestampTooOld and \ - consensusTime < timestampFrom: - tooOldConsensuses.add(directoryOrFile) - elif consensusTime >= timestampFrom and \ - consensusTime <= timestamp: - relevantConsensuses.add(directoryOrFile) - elif consensusTime > timestamp and \ - consensusTime <= timestampTooNew: - tooNewConsensuses.add(directoryOrFile) - allConsensuses = set() - allConsensuses.update(tooOldConsensuses) - allConsensuses.update(relevantConsensuses) - allConsensuses.update(tooNewConsensuses) - if not allConsensuses: - print " None found!\n\n%s\n\nResult is INDECISIVE!\n\nWe " \ - "cannot make any statement about IP address %s being a " \ - "relay at %s or not! We did not find any relevant " \ - "consensuses preceding the given time. This either means " \ - "that you did not download and extract the consensus " \ - "archives preceding the hours before the given time, or " \ - "(in rare cases) that the directory archives are missing " \ - "the hours before the timestamp. Please check that your " \ - "directory archives contain consensus files of the " \ - "interval 5:00 hours before and 2:00 hours after the time " \ - "you are looking for.\n" % (DELIMITER, relayIP, timestampStr) - sys.exit() - for consensus in sorted(relevantConsensuses): - print " %s" % consensus - - # parse consensuses to find descriptors belonging to the IP address - print "\nLooking for descriptor identifiers referenced in "r " " \ - "lines in these consensuses containing IP address %s..." % \ - relayIP - positiveConsensusesNoTarget = set() - addressesInSameNetwork = set() - relevantDescriptors = dict() - for consensus in allConsensuses: - if consensus in relevantConsensuses: - print " %s" % consensus - consensusFile = open(consensus, "r") - line = consensusFile.readline() - while line: - if line.startswith("r "): - address = IP(line.split(" ")[6]) - if address == relayIP: - hexDesc = binascii.b2a_hex(binascii.a2b_base64( - line.split(" ")[3] + "==")) - if hexDesc not in relevantDescriptors.keys(): - relevantDescriptors[hexDesc] = set() - relevantDescriptors[hexDesc].add(consensus) - positiveConsensusesNoTarget.add(consensus) - if consensus in relevantConsensuses: - print " "%s" references descriptor %s" % \ - (line.rstrip(), hexDesc) - elif relayIP.overlaps(IP("%s/24" % address, - make_net=True)): - addressesInSameNetwork.add(address) - line = consensusFile.readline() - consensusFile.close() - if not relevantDescriptors: - print " None found!\n\n%s\n\nResult is NEGATIVE with moderate " \ - "certainty!\n\nWe did not find IP address %s in any of " \ - "the consensuses that were published between %s and " \ - "%s.\n\nA possible reason for false negatives is that the " \ - "relay is using a different IP address when generating a " \ - "descriptor than for exiting to the Internet. We hope to " \ - "provide better checks for this case in the future." % \ - (DELIMITER, relayIP, timestampTooOldStr, timestampTooNewStr) - if addressesInSameNetwork: - print "\nThe following other IP addresses of Tor relays " \ - "were found in the mentioned consensus files that are " \ - "in the same /24 network and that could be related to " \ - "IP address %s:" % relayIP - for addr in addressesInSameNetwork: - print " %s" % addr - print "" - sys.exit() - - # parse router descriptors to check exit policies - positiveConsensuses = set() - missingDescriptors = set() - if target: - print "\nChecking if referenced descriptors permit exiting to " \ - "%s..." % target - descriptors = relevantDescriptors.keys() - for desc in descriptors: - missingDescriptors.add(desc) - directoriesLeftToParse = list(descriptorsDirs) - while directoriesLeftToParse: - directoryOrFile = directoriesLeftToParse.pop() - if os.path.isdir(directoryOrFile): - for filename in os.listdir(directoryOrFile): - entry = "%s/%s" % (directoryOrFile, filename) - directoriesLeftToParse.append(entry) - else: - basename = os.path.basename(directoryOrFile) - for descriptor in descriptors: - if basename == descriptor: - missingDescriptors.remove(descriptor) - descriptorFile = open(directoryOrFile, "r") - line = descriptorFile.readline() - while line: - if line.startswith("reject ") or \ - line.startswith("accept "): - ruleAccept = line.split()[0] == "accept" - ruleAddress = line.split()[1].split(":")[0] - if ruleAddress != "*" and not \ - IP(ruleAddress).overlaps(targetIP): - # IP address does not match - line = descriptorFile.readline() - continue - rulePort = line.split()[1].split(":")[1] - if not targetPort and not ruleAccept and \ - rulePort != "*": - # with no port given, we only consider - # reject :* rules as matching - line = descriptorFile.readline() - continue - if targetPort and rulePort != "*" and \ - targetPort != rulePort: - # ports do not match - line = descriptorFile.readline() - continue - relevantMatch = False - for f in relevantDescriptors.get( - descriptor): - if f in relevantConsensuses: - relevantMatch = True - if relevantMatch: - if ruleAccept: - print " %s permits exiting to " \ - "%s according to rule " \ - ""%s"" % (directoryOrFile, - target, line.rstrip()) - else: - print " %s does not permit " \ - "exiting to %s according " \ - "to rule "%s"" % \ - (directoryOrFile, - target, line.rstrip()) - if ruleAccept: - for consensus in \ - relevantDescriptors.get( - descriptor): - positiveConsensuses.add(consensus) - break - line = descriptorFile.readline() - descriptorFile.close() - - # print out result - matches = None - if target: - matches = positiveConsensuses - else: - matches = positiveConsensusesNoTarget - lastConsensus = sorted(relevantConsensuses)[len(relevantConsensuses)-1] - if lastConsensus in matches: - print "\n%s\n\nResult is POSITIVE with high certainty!\n\nWe " \ - "found one or more relays on IP address %s%s in the most " \ - "recent consensus preceding %s that clients were likely " \ - "to know.\n" % (DELIMITER, relayIP, targetHelpStr, - timestampStr) - sys.exit() - resultIndecisive = target and len(missingDescriptors) > 0 - if resultIndecisive: - print "\n%s\n\nResult is INDECISIVE!\n\nAt least one " \ - "referenced descriptor could not be found. This is a rare " \ - "case, but one that (apparently) happens. We cannot make " \ - "any good statement about exit relays without these " \ - "descriptors. The following descriptors are missing:" % \ - DELIMITER - for desc in missingDescriptors: - print " %s" % desc - inOtherRelevantConsensus = False - inTooOldConsensuses = False - inTooNewConsensuses = False - for f in matches: - if f in relevantConsensuses: - inOtherRelevantConsensus = True - elif f in tooOldConsensuses: - inTooOldConsensuses = True - elif f in tooNewConsensuses: - inTooNewConsensuses = True - if inOtherRelevantConsensus: - if not resultIndecisive: - print "\n%s\n\nResult is POSITIVE with moderate certainty!" % \ - DELIMITER - print "\nWe found one or more relays on IP address %s%s, but " \ - "not in the consensus immediately preceding %s. A " \ - "possible reason for the relay being missing in the last " \ - "consensus preceding the given time might be that some of " \ - "the directory authorities had difficulties connecting to " \ - "the relay. However, clients might still have used the " \ - "relay." % (relayIP, targetHelpStr, timestampStr) - else: - if not resultIndecisive: - print "\n%s\n\nResult is NEGATIVE with high certainty!" % \ - DELIMITER - print "\nWe did not find any relay on IP address %s%s in the " \ - "consensuses 3:00 hours preceding %s." % (relayIP, - targetHelpStr, timestampStr) - if inTooOldConsensuses or inTooNewConsensuses: - if inTooOldConsensuses and not inTooNewConsensuses: - print "\nNote that we found a matching relay in " \ - "consensuses that were published between 5:00 and " \ - "3:00 hours before %s." % timestampStr - elif not inTooOldConsensuses and inTooNewConsensuses: - print "\nNote that we found a matching relay in " \ - "consensuses that were published up to 2:00 hours " \ - "after %s." % timestampStr - else: - print "\nNote that we found a matching relay in " \ - "consensuses that were published between 5:00 and " \ - "3:00 hours before and in consensuses that were " \ - "published up to 2:00 hours after %s." % timestampStr - print "Make sure that the timestamp you provided is in the " \ - "correct timezone: UTC (or GMT)." - if target: - if not positiveConsensuses and positiveConsensusesNoTarget: - print "\nNote that although the found relay(s) did not " \ - "permit exiting to %s there have been one or more " \ - "relays running at the given time." % target - print "" -
tor-commits@lists.torproject.org