[or-cvs] r17774: {} Add Java application to parse archived bridge data. This is (in projects/dir-stats/trunk/bridge-stats: . java java/lib java/src)

kloesing at seul.org kloesing at seul.org
Thu Dec 25 16:11:41 UTC 2008


Author: kloesing
Date: 2008-12-25 11:11:41 -0500 (Thu, 25 Dec 2008)
New Revision: 17774

Added:
   projects/dir-stats/trunk/bridge-stats/java/
   projects/dir-stats/trunk/bridge-stats/java/lib/
   projects/dir-stats/trunk/bridge-stats/java/lib/bcprov-jdk16-137.jar
   projects/dir-stats/trunk/bridge-stats/java/src/
   projects/dir-stats/trunk/bridge-stats/java/src/BaseEncoding.java
   projects/dir-stats/trunk/bridge-stats/java/src/Crypto.java
   projects/dir-stats/trunk/bridge-stats/java/src/ParseBridgeData.java
Log:
Add Java application to parse archived bridge data. This is probably useful for parsing other directory data, too.


Property changes on: projects/dir-stats/trunk/bridge-stats/java
___________________________________________________________________
Name: svn:ignore
   + bin


Added: projects/dir-stats/trunk/bridge-stats/java/lib/bcprov-jdk16-137.jar
===================================================================
(Binary files differ)


Property changes on: projects/dir-stats/trunk/bridge-stats/java/lib/bcprov-jdk16-137.jar
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: projects/dir-stats/trunk/bridge-stats/java/src/BaseEncoding.java
===================================================================
--- projects/dir-stats/trunk/bridge-stats/java/src/BaseEncoding.java	                        (rev 0)
+++ projects/dir-stats/trunk/bridge-stats/java/src/BaseEncoding.java	2008-12-25 16:11:41 UTC (rev 17774)
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2008 The Tor Project
+ * 
+ * Parts of this code have been copied from the OnionCoffee project:
+ * http://onioncoffee.sourceforge.net/
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+/**
+ * Provides basic encoding methods between binary, hex, base32, and base64.
+ * 
+ * @author karsten
+ */
+public class BaseEncoding {
+
+	/**
+	 * do a base32-enconding from a binary field
+	 */
+	public static String toBase32(byte[] data) {
+		String base32 = "abcdefghijklmnopqrstuvwxyz234567";
+
+		StringBuffer sb = new StringBuffer();
+		int b32 = 0;
+		int b32_filled = 0;
+		for (int pos = 0; pos < data.length; ++pos)
+			for (int bitmask = 128; bitmask > 0; bitmask /= 2) {
+				b32 = (b32 << 1);
+				if (((int) data[pos] & bitmask) != 0)
+					b32 = b32 | 1;
+				++b32_filled;
+				if (b32_filled == 5) {
+					sb.append(base32.charAt(b32)); // transform to
+					// base32-encoding
+					b32 = 0;
+					b32_filled = 0;
+				}
+			}
+		// check if bits were left unencoded
+		if (b32_filled != 0)
+			System.out
+					.println("Common.toBase32: received array with unsupported number of bits.");
+		// return result
+		return sb.toString();
+	}
+
+	private static String[] hexChars = { "00", "01", "02", "03", "04", "05", "06",
+			"07", "08", "09", "0a", "0b", "0c", "0d", "0e", "0f", "10", "11",
+			"12", "13", "14", "15", "16", "17", "18", "19", "1a", "1b", "1c",
+			"1d", "1e", "1f", "20", "21", "22", "23", "24", "25", "26", "27",
+			"28", "29", "2a", "2b", "2c", "2d", "2e", "2f", "30", "31", "32",
+			"33", "34", "35", "36", "37", "38", "39", "3a", "3b", "3c", "3d",
+			"3e", "3f", "40", "41", "42", "43", "44", "45", "46", "47", "48",
+			"49", "4a", "4b", "4c", "4d", "4e", "4f", "50", "51", "52", "53",
+			"54", "55", "56", "57", "58", "59", "5a", "5b", "5c", "5d", "5e",
+			"5f", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69",
+			"6a", "6b", "6c", "6d", "6e", "6f", "70", "71", "72", "73", "74",
+			"75", "76", "77", "78", "79", "7a", "7b", "7c", "7d", "7e", "7f",
+			"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8a",
+			"8b", "8c", "8d", "8e", "8f", "90", "91", "92", "93", "94", "95",
+			"96", "97", "98", "99", "9a", "9b", "9c", "9d", "9e", "9f", "a0",
+			"a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "aa", "ab",
+			"ac", "ad", "ae", "af", "b0", "b1", "b2", "b3", "b4", "b5", "b6",
+			"b7", "b8", "b9", "ba", "bb", "bc", "bd", "be", "bf", "c0", "c1",
+			"c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9", "ca", "cb", "cc",
+			"cd", "ce", "cf", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+			"d8", "d9", "da", "db", "dc", "dd", "de", "df", "e0", "e1", "e2",
+			"e3", "e4", "e5", "e6", "e7", "e8", "e9", "ea", "eb", "ec", "ed",
+			"ee", "ef", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8",
+			"f9", "fa", "fb", "fc", "fd", "fe", "ff" };
+
+	/**
+	 * Converts a byte array to hex string
+	 */
+	public static String toHexString(byte[] block, int column_width) {
+		StringBuffer buf = new StringBuffer(4 * (block.length + 2));
+		for (int i = 0; i < block.length; i++) {
+			if (i > 0 && i % (column_width / 3) == 0)
+				buf.append("\n");
+			buf.append(hexChars[block[i] & 0xff]);
+		}
+		return buf.toString();
+	}
+
+	public static String toHex(byte[] block) {
+		return toHexString(block, block.length * 3 + 1).toUpperCase();
+	}
+
+	public static String toBase64NoTrailingEqualSigns(byte[] data) {
+		String result = new String(org.bouncycastle.util.encoders.Base64.encode(data));
+		int cut = result.indexOf("=");
+		if (cut > -1)
+			result = result.substring(0, cut);
+		return result;
+	}
+}

Added: projects/dir-stats/trunk/bridge-stats/java/src/Crypto.java
===================================================================
--- projects/dir-stats/trunk/bridge-stats/java/src/Crypto.java	                        (rev 0)
+++ projects/dir-stats/trunk/bridge-stats/java/src/Crypto.java	2008-12-25 16:11:41 UTC (rev 17774)
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2008 The Tor Project
+ * 
+ * Parts of this code have been copied from the OnionCoffee project:
+ * http://onioncoffee.sourceforge.net/
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+import org.bouncycastle.crypto.digests.SHA1Digest;
+
+/**
+ * Provides crypto operations.
+ * 
+ * @author karsten
+ */
+public class Crypto {
+
+	/**
+	 * Determine SHA1 hash of input.
+	 */
+	public static byte[] getHash(byte[] input) {
+		SHA1Digest sha1 = new SHA1Digest();
+		sha1.reset();
+		sha1.update(input, 0, input.length);
+		byte[] hash = new byte[sha1.getDigestSize()];
+		sha1.doFinal(hash, 0);
+		return hash;
+	}
+}

Added: projects/dir-stats/trunk/bridge-stats/java/src/ParseBridgeData.java
===================================================================
--- projects/dir-stats/trunk/bridge-stats/java/src/ParseBridgeData.java	                        (rev 0)
+++ projects/dir-stats/trunk/bridge-stats/java/src/ParseBridgeData.java	2008-12-25 16:11:41 UTC (rev 17774)
@@ -0,0 +1,454 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+/**
+ * Parse archived bridge data.
+ * 
+ * @author karsten
+ */
+public class ParseBridgeData {
+
+	static class Networkstatus {
+		Set<Router> entries;
+	}
+
+	static class Router {
+		Descriptor descriptor;
+		// some of the following fields are not used by the analysis, but
+		// are left here as placeholders.
+		// String nickname;
+		// String identity;
+		String descriptorDigest;
+		// String published;
+		// String ipAddress;
+		// String orPort;
+		// boolean fast;
+		// boolean guard;
+		// boolean hsdir;
+		boolean running;
+		// boolean stable;
+		// boolean v2dir;
+		// boolean valid;
+	}
+
+	static class Descriptor {
+		Extrainfo extrainfo;
+		// String nickname;
+		// String ipAddress;
+		// String orPort;
+		// String platform;
+		// String published;
+		// String fingerprint;
+		// long uptime;
+		// String averageBandwidth;
+		// String maxBandwidth;
+		// long observedBandwidth;
+		String extrainfoDigest;
+	}
+
+	static class Extrainfo {
+		// String nickname;
+		// String fingerprint;
+		// String published;
+		String writeHistory;
+		String readHistory;
+		String geoipStartTime;
+		String geoipClientOrigins;
+		long averageWrite;
+		long averageRead;
+	}
+
+	public static void main(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: java "
+					+ ParseBridgeData.class.getSimpleName()
+					+ " <path to data/ directory> <output directory>");
+			System.exit(1);
+		}
+		File testenvDirectory = new File(args[0]);
+		if (!testenvDirectory.exists() || !testenvDirectory.isDirectory()) {
+			System.err.println(testenvDirectory.getAbsolutePath()
+					+ " does not exist or is not a directory.");
+			System.exit(1);
+		}
+		File outputDirectory = new File(args[1]);
+		if (outputDirectory.exists() && !outputDirectory.isDirectory()) {
+			System.err.println(outputDirectory.getAbsolutePath()
+					+ " exists, but is not a directory.");
+			System.exit(1);
+		}
+		outputDirectory.mkdir();
+
+		// create data structure to hold parsed data
+		SortedMap<Long, Networkstatus> networkstatuses = new TreeMap<Long, Networkstatus>();
+
+		// prepare output file(s)
+		File runningOutFile = new File(outputDirectory.getAbsolutePath()
+				+ File.separatorChar + "running.csv");
+		File bandwidthOutFile = new File(outputDirectory.getAbsolutePath()
+				+ File.separatorChar + "bandwidth.csv");
+		File geoipOutFile = new File(outputDirectory.getAbsolutePath()
+				+ File.separatorChar + "geoip.csv");
+		if (runningOutFile.exists() || bandwidthOutFile.exists()
+				|| geoipOutFile.exists()) {
+			System.err.println("Output file(s) already exist(s). Exiting.");
+			System.exit(1);
+		}
+		BufferedWriter runningOut = new BufferedWriter(new FileWriter(
+				runningOutFile, false));
+		runningOut.write("time,total,running\n");
+		BufferedWriter bandwidthOut = new BufferedWriter(new FileWriter(
+				bandwidthOutFile, false));
+		bandwidthOut.write("time,bandwidth\n");
+		BufferedWriter geoipOut = new BufferedWriter(new FileWriter(
+				geoipOutFile, false));
+		SortedSet<String> allCountryCodes = new TreeSet<String>();
+		String[] allCC = new String[] { "ad", "ae", "af", "ag", "ai", "al",
+				"am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "ax",
+				"az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj",
+				"bm", "bn", "bo", "br", "bs", "bt", "bv", "bw", "by", "bz",
+				"ca", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn",
+				"co", "cr", "cs", "cu", "cv", "cy", "cz", "de", "dj", "dk",
+				"dm", "do", "dz", "ec", "ee", "eg", "er", "es", "et", "fi",
+				"fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf",
+				"gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt",
+				"gu", "gw", "gy", "hk", "hn", "hr", "ht", "hu", "id", "ie",
+				"il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm",
+				"jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kr", "kw",
+				"ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt",
+				"lu", "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk",
+				"ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu",
+				"mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng",
+				"ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe",
+				"pf", "pg", "ph", "pk", "pl", "pr", "ps", "pt", "pw", "py",
+				"qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd",
+				"se", "sg", "si", "sk", "sl", "sm", "sn", "so", "sr", "st",
+				"sv", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk",
+				"tl", "tm", "tn", "to", "tr", "tt", "tv", "tw", "ua", "ug",
+				"um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn",
+				"vu", "wf", "ws", "ye", "yt", "za", "zm", "zw" };
+		geoipOut.write("time,");
+		for (String c : allCC) {
+			if (c != allCC[allCC.length - 1]) {
+				geoipOut.write(c + ",");
+			} else {
+				geoipOut.write(c + "\n");
+			}
+			allCountryCodes.add(c);
+		}
+
+		// parse log files
+		File[] snapshots = testenvDirectory.listFiles();
+		List<File> sortedDirectories = new ArrayList<File>(snapshots.length);
+		for (File s : snapshots)
+			sortedDirectories.add(s);
+		Collections.sort(sortedDirectories);
+		System.out.print("Progress: |--------------------------------------"
+				+ "------------|\n" + "           ");
+		int printedDots = 0;
+		int numSnapshots = sortedDirectories.size();
+		int parseProgress = 0;
+		long currentTimestamp = -1L;
+		for (File subdirectory : sortedDirectories) {
+
+			// check if files are where we expect them
+			if (!subdirectory.isDirectory()) {
+				System.err.println(subdirectory.getName()
+						+ " is not a directory.");
+				System.exit(1);
+			}
+			File networkstatusFile = new File(subdirectory.getAbsolutePath()
+					+ File.separatorChar + "networkstatus-bridges");
+			File bridgeDescriptorsFile = new File(subdirectory
+					.getAbsolutePath()
+					+ File.separatorChar + "bridge-descriptors");
+			File cachedExtrainfoFile = new File(subdirectory.getAbsolutePath()
+					+ File.separatorChar + "cached-extrainfo");
+			File cachedExtrainfoFileNew = new File(subdirectory
+					.getAbsolutePath()
+					+ File.separatorChar + "cached-extrainfo.new");
+			if (!networkstatusFile.exists()) {
+				System.err.println("Missing networkstatus-bridges file in "
+						+ subdirectory.getAbsolutePath());
+				System.exit(1);
+			}
+
+			// extract a useful timestamp (from strings like 2008-11-19T023703Z)
+			SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HHmm");
+			format.setTimeZone(TimeZone.getTimeZone("UTC"));
+			long millisSince1970 = format.parse(
+					subdirectory.getName().substring(0, 15).replace('T', ' '),
+					new ParsePosition(0)).getTime();
+			long timestamp = millisSince1970 / 1800000 * 1800000;
+			Networkstatus networkstatus = new Networkstatus();
+			networkstatus.entries = new HashSet<Router>();
+			networkstatuses.put(timestamp, networkstatus);
+
+			// read network status
+			BufferedReader br = new BufferedReader(new FileReader(
+					networkstatusFile));
+			String line = null;
+			String rLine = null;
+			while ((line = br.readLine()) != null) {
+				if (line.startsWith("r ")) {
+					rLine = line;
+				} else if (line.startsWith("s ")) {
+					String[] splitted = rLine.split(" ");
+					Router entry = new Router();
+					// unused fields are not written to data structure
+					// entry.nickname = splitted[1];
+					// entry.identity = splitted[2];
+					entry.descriptorDigest = splitted[3];
+					// entry.published = splitted[4] + " " + splitted[5];
+					// entry.ipAddress = splitted[6];
+					// entry.orPort = splitted[7];
+					// entry.fast = line.contains("Fast");
+					// entry.guard = line.contains("Guard");
+					// entry.hsdir = line.contains("HSDir");
+					entry.running = line.contains("Running");
+					// entry.stable = line.contains("Stable");
+					// entry.v2dir = line.contains("V2Dir");
+					// entry.valid = line.contains("Valid");
+					networkstatus.entries.add(entry);
+				}
+			}
+			br.close();
+
+			// read descriptors
+			if (bridgeDescriptorsFile.exists()) {
+				Map<String, Descriptor> descriptors = new HashMap<String, Descriptor>();
+				br = new BufferedReader(new FileReader(bridgeDescriptorsFile));
+				StringBuilder digestBuilder = new StringBuilder();
+				Descriptor d = null;
+				while ((line = br.readLine()) != null) {
+					if (line.startsWith("@")) {
+						continue;
+					} else if (line.startsWith("router ")) {
+						digestBuilder = new StringBuilder();
+						d = new Descriptor();
+						// String[] parts = line.split(" ");
+						// unused fields
+						// d.nickname = parts[1];
+						// d.ipAddress = parts[2];
+						// d.orPort = parts[3];
+						// } else if (line.startsWith("platform ")) {
+						// d.platform = line.substring(9);
+						// } else if (line.startsWith("published ")) {
+						// d.published = line.substring(10);
+						// } else if (line.startsWith("opt fingerprint ")) {
+						// d.fingerprint = line.substring(16);
+						// } else if (line.startsWith("uptime ")) {
+						// d.uptime = Long.parseLong(line.substring(7));
+						// } else if (line.startsWith("bandwidth ")) {
+						// String[] parts = line.split(" ");
+						// d.averageBandwidth = parts[1];
+						// d.maxBandwidth = parts[2];
+						// d.observedBandwidth = Long.parseLong(parts[3]);
+					} else if (line.startsWith("opt extra-info-digest ")) {
+						d.extrainfoDigest = line.substring(22);
+					} else if (line.startsWith("router-signature")) {
+						// calculate hash
+						String desc = digestBuilder.toString() + line + "\n";
+						byte[] hash = Crypto.getHash(desc.getBytes());
+						String out = BaseEncoding
+								.toBase64NoTrailingEqualSigns(hash);
+						descriptors.put(out, d);
+					}
+					digestBuilder.append(line + "\n");
+				}
+				br.close();
+				for (Router entry : networkstatus.entries) {
+					if (descriptors.containsKey(entry.descriptorDigest)) {
+						entry.descriptor = descriptors
+								.get(entry.descriptorDigest);
+					}
+				}
+
+				// read extrainfos
+				if (cachedExtrainfoFile.exists()) {
+					Map<String, Extrainfo> extrainfos = new HashMap<String, Extrainfo>();
+					br = new BufferedReader(new FileReader(cachedExtrainfoFile));
+					digestBuilder = null;
+					Extrainfo e = null;
+					int filesToParse = 2; // cached-extrainfo and
+					// cached-extrainfo.new
+					while (filesToParse > 0) {
+						line = br.readLine();
+						if (line == null) {
+							if (--filesToParse == 1) {
+								// at the end of cached-extrainfos file, read
+								// .new file now
+								br.close();
+								if (!cachedExtrainfoFileNew.exists()) {
+									--filesToParse;
+								} else {
+									br = new BufferedReader(new FileReader(
+											cachedExtrainfoFileNew));
+								}
+							}
+							continue;
+						}
+						if (line.startsWith("@")) {
+							continue;
+						} else if (line.startsWith("extra-info ")) {
+							digestBuilder = new StringBuilder();
+							e = new Extrainfo();
+							// String[] parts = line.split(" ");
+							// e.nickname = parts[1];
+							// e.fingerprint = parts[2];
+							// } else if (line.startsWith("published ")) {
+							// e.published = line.substring(10);
+						} else if (line.startsWith("write-history ")) {
+							e.writeHistory = line.substring(14);
+						} else if (line.startsWith("read-history ")) {
+							e.readHistory = line.substring(13);
+						} else if (line.startsWith("geoip-start-time ")) {
+							e.geoipStartTime = line.substring(17);
+						} else if (line.startsWith("geoip-client-origins ")) {
+							e.geoipClientOrigins = line.substring(21);
+						} else if (line.startsWith("router-signature")) {
+							// calculate hash
+							String desc = digestBuilder.toString() + line
+									+ "\n";
+							byte[] hash = Crypto.getHash(desc.getBytes());
+							String out = BaseEncoding.toHex(hash);
+							extrainfos.put(out, e);
+						}
+						digestBuilder.append(line + "\n");
+					}
+					br.close();
+					for (Map.Entry<String, Descriptor> descr : descriptors
+							.entrySet()) {
+						if (extrainfos
+								.containsKey(descr.getValue().extrainfoDigest)) {
+							descr.getValue().extrainfo = extrainfos.get(descr
+									.getValue().extrainfoDigest);
+						}
+					}
+				}
+			}
+
+			// write output files
+			if (currentTimestamp == -1)
+				currentTimestamp = timestamp; // this is the first snapshot
+			else
+				currentTimestamp += 1800000; // advance by 30 mins
+			// if we are missing some snapshots, add NA's
+			while (currentTimestamp < timestamp) {
+				runningOut.write(currentTimestamp + ",NA,NA\n");
+				bandwidthOut.write(currentTimestamp + ",NA\n");
+				geoipOut.write("" + currentTimestamp);
+				for (int i = 0; i < allCountryCodes.size() - 1; i++)
+					geoipOut.write(",NA");
+				geoipOut.write("\n");
+				currentTimestamp += 1800000; // advance by 30 mins
+			}
+
+			// write data for this snapshot
+			int running = 0;
+			int total = 0;
+			long totalBandwidth = 0L;
+			Map<String, Integer> geoipClients = new HashMap<String, Integer>();
+			for (Router router : networkstatus.entries) {
+				total++;
+				if (router.running) {
+					running++;
+					if (router.descriptor != null) {
+						if (router.descriptor.extrainfo != null) {
+							if (router.descriptor.extrainfo.writeHistory != null
+									&& router.descriptor.extrainfo.readHistory != null) {
+								String[] write1 = router.descriptor.extrainfo.writeHistory
+										.split(" ");
+								String[] read1 = router.descriptor.extrainfo.readHistory
+										.split(" ");
+								if (write1.length > 4 && read1.length > 4) {
+									String[] write2 = write1[4].split(",");
+									String[] read2 = read1[4].split(",");
+									if (write2.length == read2.length) {
+										long num = 0, sum = 0;
+										for (int i = 0; i < write2.length; i++) {
+											num++;
+											sum += (Long.parseLong(write2[i]) + Long
+													.parseLong(read2[i])) / 2;
+										}
+										totalBandwidth += (sum / num);
+									}
+								}
+							}
+						}
+						if (router.descriptor.extrainfo != null
+								&& router.descriptor.extrainfo.geoipClientOrigins != null
+								&& router.descriptor.extrainfo.geoipClientOrigins
+										.length() > 3) {
+							String[] entries = router.descriptor.extrainfo.geoipClientOrigins
+									.split(",");
+							for (String ent : entries) {
+								String countryCode = ent.substring(0, 2);
+								int clients = Integer
+										.parseInt(ent.substring(3));
+								if (clients == 0)
+									continue;
+								if (geoipClients.containsKey(countryCode)) {
+									geoipClients.put(countryCode, geoipClients
+											.get(countryCode)
+											+ clients - 4);
+								} else {
+									geoipClients.put(countryCode, clients - 4);
+								}
+							}
+						}
+					}
+				}
+			}
+			runningOut.write(timestamp + "," + total + "," + running + "\n");
+			bandwidthOut.write(timestamp + "," + totalBandwidth + "\n");
+			StringBuilder ccBuilder = new StringBuilder(timestamp + ",");
+			for (String cc : allCountryCodes) {
+				if (geoipClients.containsKey(cc)) {
+					ccBuilder.append(geoipClients.get(cc) + ",");
+				} else {
+					ccBuilder.append("0,");
+				}
+			}
+			ccBuilder.deleteCharAt(ccBuilder.lastIndexOf(","));
+			geoipOut.write(ccBuilder.toString() + "\n");
+
+			// periodically free snapshots that are older than 48 hours
+			// (or rather: keep a maximum of 96 snapshots in memory)
+			if (networkstatuses.size() > 144) {
+				while (networkstatuses.size() > 96) {
+					networkstatuses.remove(networkstatuses.firstKey());
+				}
+				System.gc();
+			}
+
+			// display progress
+			parseProgress++;
+			while (parseProgress > printedDots * numSnapshots / 50) {
+				System.out.print(".");
+				printedDots++;
+			}
+		}
+		runningOut.close();
+		bandwidthOut.close();
+		geoipOut.close();
+		System.out.println();
+	}
+}



More information about the tor-commits mailing list