commit 10eb958d33de920cfbedb2cb9bfebf14f0334a0e Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Sep 28 10:08:24 2011 +0200
Make a few improvements to ExoneraTor.
- Include exit lists in results along with network status entries. Implements part of #3567.
- Relax the requirement to specify an exact timestamps by also accepting a full day to search relays for.
- Use a new database schema that allows us to remove raw descriptors from the Metrics database in the future.
- Prepare the ExoneraTor database for IPv6 addresses (not supported by Tor or the ExoneraTor database importer or web interface yet).
- Use a single database connection per HTTP request. --- build.xml | 9 + config.template | 7 + db/exonerator.sql | 328 +++++++ etc/context.xml.template | 10 + etc/web.xml | 11 + src/org/torproject/ernie/cron/Configuration.java | 14 + .../ernie/cron/ExoneraTorDatabaseImporter.java | 528 +++++++++++ .../ernie/web/ExoneraTorBetaServlet.java | 991 ++++++++++++++++++++ 8 files changed, 1898 insertions(+), 0 deletions(-)
diff --git a/build.xml b/build.xml index 4b4a7e9..9194ccd 100644 --- a/build.xml +++ b/build.xml @@ -51,6 +51,15 @@ </java> </target>
+ <!-- Import descriptors into the ExoneraTor database. --> + <target name="exonerator" depends="compile"> + <java fork="true" + maxmemory="2048m" + classname="org.torproject.ernie.cron.ExoneraTorDatabaseImporter"> + <classpath refid="classpath"/> + </java> + </target> + <!-- Prepare data for being displayed on the website. --> <target name="run" depends="compile"> <java fork="true" diff --git a/config.template b/config.template index 7936420..e0fa743 100644 --- a/config.template +++ b/config.template @@ -58,4 +58,11 @@ # ## Relative path to directory where to find GetTor stats #GetTorDirectory gettor/ +# +## JDBC string for ExoneraTor database +#ExoneraTorDatabaseJdbc jdbc:postgresql://localhost/exonerator?user=metrics&password=password +# +## Relative path to directory where to find descriptors to import into the +## ExoneraTor database +#ExoneraTorImportDirectory exonerator-import/
diff --git a/db/exonerator.sql b/db/exonerator.sql new file mode 100755 index 0000000..b3c7c62 --- /dev/null +++ b/db/exonerator.sql @@ -0,0 +1,328 @@ +-- Copyright 2011 The Tor Project +-- See LICENSE for licensing information + +-- The descriptor table holds server descriptors that we use for display +-- purposes and to parse exit policies. +CREATE TABLE descriptor ( + + -- The 40-character lower-case hex string identifies a descriptor + -- uniquely and is used to join statusentry and this table. + descriptor CHARACTER(40) NOT NULL PRIMARY KEY, + + -- The raw descriptor string is used for display purposes and to check + -- whether the relay allowed exiting to a given target or not. + rawdescriptor BYTEA NOT NULL +); + +-- The consensus table stores network status consensuses to be looked up +-- by valid-after time and displayed upon request. A second purpose is +-- to learn quickly whether the database contains status entries for a +-- given day or not. +CREATE TABLE consensus ( + + -- The unique valid-after time of the consensus. + validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL PRIMARY KEY, + + -- The raw consensus string for display purposes only. + rawconsensus BYTEA NOT NULL +); + +-- The statusentry table stores network status consensus entries listing +-- a relay as running at a certain point in time. Only relays with the +-- Running flag shall be inserted into this table. If a relay advertises +-- more than one IP address, there is a distinct entry for each address in +-- this table. +CREATE TABLE statusentry ( + + -- The valid-after time of the consensus that contains this entry. + validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL, + + -- The 40-character lower-case hex string uniquely identifying the + -- relay. + fingerprint CHARACTER(40) NOT NULL, + + -- The 40-character lower-case hex string that identifies the server + -- descriptor published by the relay. + descriptor CHARACTER(40) NOT NULL, + + -- The most significant 3 bytes of the relay's onion routing address in + -- hex notation. This column contains the /24 network of the IPv4 or + -- IPv6 address. The purpose is to quickly reduce query results which + -- works surprisingly well. + oraddress24 CHARACTER(6) NOT NULL, + + -- The relay's onion routing address. Can be an IPv4 or an IPv6 + -- address. If a relay advertises more than one address, there are + -- multiple entries in this table for the same status entry. + oraddress INET NOT NULL, + + -- The raw status entry string as contained in the network status + -- consensus for display purposes only. + rawstatusentry BYTEA NOT NULL, + + -- A status entry is uniquely identified by its valid-after time, relay + -- fingerprint, and onion routing address. + CONSTRAINT statusentry_pkey + PRIMARY KEY (validafter, fingerprint, oraddress) +); + +-- The index on the exact onion routing address and on the valid-after +-- date is used to speed up ExoneraTor's query for status entries. +CREATE INDEX statusentry_oraddress_validafterdate + ON statusentry (oraddress, DATE(validafter)); + +-- The index on the most significant 3 bytes of the relay's onion routing +-- address and on the valid-after date is used to speed up queries for +-- other relays in the same /24 network. +CREATE INDEX statusentry_oraddress24_validafterdate + ON statusentry (oraddress24, DATE(validafter)); + +-- The exitlistentry table stores the results of the active testing, +-- DNS-based exit list for exit nodes. An entry in this table means that +-- a relay was scanned at a given time and found to be exiting to the +-- Internet from a given IP address. This IP address can be different +-- from the relay's onion routing address if the relay uses more than one +-- IP addresses. +CREATE TABLE exitlistentry ( + + -- The 40-character lower-case hex string identifying the relay. + fingerprint CHARACTER(40) NOT NULL, + + -- The most significant 3 bytes of the relay's exit address in hex + -- notation. This column contains the /24 network of the IPv4 or IPv6 + -- address. The purpose is to quickly reduce query results. + exitaddress24 CHARACTER(6) NOT NULL, + + -- The IP address that the relay uses for exiting to the Internet. If + -- the relay uses more than one IP address, there are multiple entries + -- in this table. + exitaddress INET NOT NULL, + + -- The time when the relay was scanned to find out its exit IP + -- address(es). + scanned TIMESTAMP WITHOUT TIME ZONE NOT NULL, + + -- The raw exit list entry containing all scan results for a given relay + -- for display purposes. + rawexitlistentry BYTEA NOT NULL, + + -- An exit list entry is uniquely identified by its scan time, relay + -- fingerprint, and exit address. + CONSTRAINT exitlistentry_pkey + PRIMARY KEY (scanned, fingerprint, exitaddress) +); + +-- The index on the exact exit address and on the valid-after date is used +-- to speed up ExoneraTor's query for status entries referencing exit list +-- entries. +CREATE INDEX exitlistentry_exitaddress_scanneddate + ON exitlistentry (exitaddress, DATE(scanned)); + +-- The index on the most significant 3 bytes of the relay's exit address +-- and on the valid-after date is used to speed up queries for other +-- relays in the same /24 network. +CREATE INDEX exitlistentry_exitaddress24_scanneddate + ON exitlistentry (exitaddress24, DATE(scanned)); + +-- Create the plpgsql language, so that we can use it below. +CREATE LANGUAGE plpgsql; + +-- Insert a server descriptor into the descriptor table. Before doing so, +-- check that there is no descriptor with the same descriptor identifier +-- in the table yet. Return 1 if the descriptor was inserted, 0 +-- otherwise. +CREATE OR REPLACE FUNCTION insert_descriptor ( + insert_descriptor CHARACTER(40), + insert_rawdescriptor BYTEA) + RETURNS INTEGER AS $$ + BEGIN + -- Look up if the descriptor is already contained in the descriptor + -- table. + IF (SELECT COUNT(*) + FROM descriptor + WHERE descriptor = insert_descriptor) = 0 THEN + -- Insert the descriptor and remember the new descriptorid to update + -- the foreign key in statusentry. + INSERT INTO descriptor (descriptor, rawdescriptor) + VALUES (insert_descriptor, insert_rawdescriptor); + -- Return 1 for a successfully inserted descriptor. + RETURN 1; + ELSE + -- Return 0 because we didn't change anything. + RETURN 0; + END IF; + END; +$$ LANGUAGE 'plpgsql'; + +-- Insert a status entry into the statusentry table. First check that +-- this status entry isn't contained in the table yet. It's okay to +-- insert the same status entry multiple times for different IP addresses +-- though. Return 1 if it was inserted, 0 otherwise. +CREATE OR REPLACE FUNCTION insert_statusentry ( + insert_validafter TIMESTAMP WITHOUT TIME ZONE, + insert_fingerprint CHARACTER(40), + insert_descriptor CHARACTER(40), + insert_oraddress24 CHARACTER(6), + insert_oraddress TEXT, + insert_rawstatusentry BYTEA) + RETURNS INTEGER AS $$ + BEGIN + -- Look up if the status entry is already contained in the statusentry + -- table. + IF (SELECT COUNT(*) + FROM statusentry + WHERE validafter = insert_validafter + AND fingerprint = insert_fingerprint + AND oraddress = insert_oraddress::INET) = 0 THEN + -- Insert the status entry. + INSERT INTO statusentry (validafter, fingerprint, descriptor, + oraddress24, oraddress, rawstatusentry) + VALUES (insert_validafter, insert_fingerprint, + insert_descriptor, insert_oraddress24, insert_oraddress::INET, + insert_rawstatusentry); + -- Return 1 for a successfully inserted status entry. + RETURN 1; + ELSE + -- Return 0 because we already had this status entry. + RETURN 0; + END IF; + END; +$$ LANGUAGE 'plpgsql'; + +-- Insert a consensus into the consensus table. Check that the same +-- consensus has not been imported before. Return 1 if it was inserted, 0 +-- otherwise. +CREATE OR REPLACE FUNCTION insert_consensus ( + insert_validafter TIMESTAMP WITHOUT TIME ZONE, + insert_rawconsensus BYTEA) + RETURNS INTEGER AS $$ + BEGIN + -- Look up if the consensus is already contained in the consensus + -- table. + IF (SELECT COUNT(*) + FROM consensus + WHERE validafter = insert_validafter) = 0 THEN + -- Insert the consensus. + INSERT INTO consensus (validafter, rawconsensus) + VALUES (insert_validafter, insert_rawconsensus); + -- Return 1 for a successful insert operation. + RETURN 1; + ELSE + -- Return 0 for not inserting the consensus. + RETURN 0; + END IF; + END; +$$ LANGUAGE 'plpgsql'; + +-- Insert an exit list entry into the exitlistentry table. Check that +-- this entry hasn't been inserted before. It's okay to insert the same +-- exit list entry multiple times for different exit addresses. Return 1 +-- if the entry was inserted, 0 otherwise. +CREATE OR REPLACE FUNCTION insert_exitlistentry ( + insert_fingerprint CHARACTER(40), + insert_exitaddress24 CHARACTER(6), + insert_exitaddress TEXT, + insert_scanned TIMESTAMP WITHOUT TIME ZONE, + insert_rawexitlistentry BYTEA) + RETURNS INTEGER AS $$ + BEGIN + IF (SELECT COUNT(*) + FROM exitlistentry + WHERE fingerprint = insert_fingerprint + AND exitaddress = insert_exitaddress::INET + AND scanned = insert_scanned) = 0 THEN + -- This exit list entry is not in the database yet. Add it. + INSERT INTO exitlistentry (fingerprint, exitaddress24, exitaddress, + scanned, rawexitlistentry) + VALUES (insert_fingerprint, insert_exitaddress24, + insert_exitaddress::INET, insert_scanned, + insert_rawexitlistentry); + -- Return 1 for a successfully inserted exit list entry. + RETURN 1; + ELSE + -- Return 0 to show that we didn't add anything. + RETURN 0; + END IF; + END; +$$ LANGUAGE 'plpgsql'; + +-- Search for status entries with the given IP address as onion routing +-- address, plus status entries of relays having an exit list entry with +-- the given IP address as exit address. +CREATE OR REPLACE FUNCTION search_statusentries_by_address_date ( + select_address TEXT, + select_date DATE) + RETURNS TABLE(rawstatusentry BYTEA, + descriptor CHARACTER(40), + validafter TIMESTAMP WITHOUT TIME ZONE, + fingerprint CHARACTER(40), + oraddress TEXT, + exitaddress TEXT, + scanned TIMESTAMP WITHOUT TIME ZONE) AS $$ + -- The first select finds all status entries of relays with the given + -- IP address as onion routing address. + SELECT rawstatusentry, + descriptor, + validafter, + fingerprint, + HOST(oraddress), + NULL, + NULL + FROM statusentry + WHERE oraddress = $1::INET + AND DATE(validafter) >= $2 - 1 + AND DATE(validafter) <= $2 + 1 + UNION + -- The second select finds status entries of relays having an exit list + -- entry with the provided IP address as the exit address. + SELECT statusentry.rawstatusentry, + statusentry.descriptor, + statusentry.validafter, + statusentry.fingerprint, + HOST(statusentry.oraddress), + HOST(exitlistentry.exitaddress), + -- Pick only the last scan result that took place in the 24 hours + -- before the valid-after time. + MAX(exitlistentry.scanned) + FROM statusentry + JOIN exitlistentry + ON statusentry.fingerprint = exitlistentry.fingerprint + WHERE exitlistentry.exitaddress = $1::INET + -- Focus on a time period from 1 day before and 1 day after the + -- given date. Also include a second day before the given date + -- for exit lists, because it can take up to 24 hours to scan a + -- relay again. We shouldn't miss exit list entries here. + AND DATE(exitlistentry.scanned) >= $2 - 2 + AND DATE(exitlistentry.scanned) <= $2 + 1 + AND DATE(statusentry.validafter) >= $2 - 1 + AND DATE(statusentry.validafter) <= $2 + 1 + -- Consider only exit list scans that took place in the 24 hours + -- before the relay was listed in a consensus. + AND statusentry.validafter >= exitlistentry.scanned + AND statusentry.validafter - exitlistentry.scanned <= + '1 day'::INTERVAL + GROUP BY 1, 2, 3, 4, 5, 6 + ORDER BY 3, 4, 6; +$$ LANGUAGE SQL; + +-- Look up all IP adddresses in the /24 network of a given address to +-- suggest other addresses the user may be looking for. +-- TODO Revisit this function when enabling IPv6. +CREATE OR REPLACE FUNCTION search_addresses_in_same_24 ( + select_address24 CHARACTER(6), + select_date DATE) + RETURNS TABLE(address TEXT) AS $$ + SELECT HOST(oraddress) + FROM statusentry + WHERE oraddress24 = $1 + AND DATE(validafter) >= $2 - 1 + AND DATE(validafter) <= $2 + 1 + UNION + SELECT HOST(exitaddress) + FROM exitlistentry + WHERE exitaddress24 = $1 + AND DATE(scanned) >= $2 - 2 + AND DATE(scanned) <= $2 + 1 + ORDER BY 1; +$$ LANGUAGE SQL; + diff --git a/etc/context.xml.template b/etc/context.xml.template index 00f14fe..152f1de 100644 --- a/etc/context.xml.template +++ b/etc/context.xml.template @@ -1,4 +1,14 @@ <Context cookies="false"> + <Resource name="jdbc/exonerator" + type="javax.sql.DataSource" + auth="Container" + username="metrics" + password="password" + driverClassName="org.postgresql.Driver" + url="jdbc:postgresql://localhost/exonerator" + maxActive="8" + maxIdle="4" + maxWait="15000"/> <Resource name="jdbc/tordir" type="javax.sql.DataSource" auth="Container" diff --git a/etc/web.xml b/etc/web.xml index 5c48bc0..2c6752b 100644 --- a/etc/web.xml +++ b/etc/web.xml @@ -266,6 +266,17 @@ </servlet-mapping>
<servlet> + <servlet-name>ExoneraTorBeta</servlet-name> + <servlet-class> + org.torproject.ernie.web.ExoneraTorBetaServlet + </servlet-class> + </servlet> + <servlet-mapping> + <servlet-name>ExoneraTorBeta</servlet-name> + <url-pattern>/exonerator-beta.html</url-pattern> + </servlet-mapping> + + <servlet> <servlet-name>ServerDescriptor</servlet-name> <servlet-class> org.torproject.ernie.web.ServerDescriptorServlet diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java index 4192f55..1c57392 100644 --- a/src/org/torproject/ernie/cron/Configuration.java +++ b/src/org/torproject/ernie/cron/Configuration.java @@ -31,6 +31,10 @@ public class Configuration { private String torperfDirectory = "torperf/"; private boolean processGetTorStats = false; private String getTorDirectory = "gettor/"; + private String exoneraTorDatabaseJdbc = "jdbc:postgresql:" + + "//localhost/exonerator?user=metrics&password=password"; + private String exoneraTorImportDirectory = "exonerator-import/"; + public Configuration() {
/* Initialize logger. */ @@ -93,6 +97,10 @@ public class Configuration { line.split(" ")[1]) != 0; } else if (line.startsWith("GetTorDirectory")) { this.getTorDirectory = line.split(" ")[1]; + } else if (line.startsWith("ExoneraTorDatabaseJdbc")) { + this.exoneraTorDatabaseJdbc = line.split(" ")[1]; + } else if (line.startsWith("ExoneraTorImportDirectory")) { + this.exoneraTorImportDirectory = line.split(" ")[1]; } else { logger.severe("Configuration file contains unrecognized " + "configuration key in line '" + line + "'! Exiting!"); @@ -169,5 +177,11 @@ public class Configuration { public String getGetTorDirectory() { return this.getTorDirectory; } + public String getExoneraTorDatabaseJdbc() { + return this.exoneraTorDatabaseJdbc; + } + public String getExoneraTorImportDirectory() { + return this.exoneraTorImportDirectory; + } }
diff --git a/src/org/torproject/ernie/cron/ExoneraTorDatabaseImporter.java b/src/org/torproject/ernie/cron/ExoneraTorDatabaseImporter.java new file mode 100755 index 0000000..95d894b --- /dev/null +++ b/src/org/torproject/ernie/cron/ExoneraTorDatabaseImporter.java @@ -0,0 +1,528 @@ +/* Copyright 2011 The Tor Project + * See LICENSE for licensing information */ +package org.torproject.ernie.cron; + +import java.io.*; +import java.sql.*; +import java.text.*; +import java.util.*; +import org.apache.commons.codec.binary.*; +import org.apache.commons.codec.digest.*; + +/* Import Tor descriptors into the ExoneraTor database. */ +public class ExoneraTorDatabaseImporter { + + /* Main function controlling the parsing process. */ + public static void main(String[] args) { + readConfiguration(); + openDatabaseConnection(); + prepareDatabaseStatements(); + createLockFile(); + readImportHistoryToMemory(); + parseDescriptors(); + writeImportHistoryToDisk(); + closeDatabaseConnection(); + deleteLockFile(); + } + + /* JDBC string of the ExoneraTor database. */ + private static String jdbcString; + + /* Directory from which to import descriptors. */ + private static String importDirString; + + /* Learn JDBC string and directory to parse descriptors from. */ + private static void readConfiguration() { + Configuration config = new Configuration(); + jdbcString = config.getExoneraTorDatabaseJdbc(); + importDirString = config.getExoneraTorImportDirectory(); + } + + /* Database connection. */ + private static Connection connection; + + /* Open a database connection using the JDBC string in the config. */ + private static void openDatabaseConnection() { + try { + connection = DriverManager.getConnection(jdbcString); + } catch (SQLException e) { + System.out.println("Could not connect to database. Exiting."); + System.exit(1); + } + } + + /* Callable statements to import data into the database. */ + private static CallableStatement insertDescriptorStatement; + private static CallableStatement insertStatusentryStatement; + private static CallableStatement insertConsensusStatement; + private static CallableStatement insertExitlistentryStatement; + + /* Prepare statements for importing data into the database. */ + private static void prepareDatabaseStatements() { + try { + insertDescriptorStatement = connection.prepareCall( + "{call insert_descriptor(?, ?)}"); + insertStatusentryStatement = connection.prepareCall( + "{call insert_statusentry(?, ?, ?, ?, ?, ?)}"); + insertConsensusStatement = connection.prepareCall( + "{call insert_consensus(?, ?)}"); + insertExitlistentryStatement = connection.prepareCall( + "{call insert_exitlistentry(?, ?, ?, ?, ?)}"); + } catch (SQLException e) { + System.out.println("Could not prepare callable statements to " + + "import data into the database. Exiting."); + System.exit(1); + } + } + + /* Create a local lock file to prevent other instances of this import + * tool to run concurrently. */ + private static void createLockFile() { + File lockFile = new File("exonerator-lock"); + try { + if (lockFile.exists()) { + BufferedReader br = new BufferedReader(new FileReader(lockFile)); + long runStarted = Long.parseLong(br.readLine()); + br.close(); + if (System.currentTimeMillis() - runStarted + < 6L * 60L * 60L * 1000L) { + System.out.println("File 'exonerator-lock' is less than 6 " + + "hours old. Exiting."); + System.exit(1); + } else { + System.out.println("File 'exonerator-lock' is at least 6 hours " + + "old. Overwriting and executing anyway."); + } + } + BufferedWriter bw = new BufferedWriter(new FileWriter( + "exonerator-lock")); + bw.append(String.valueOf(System.currentTimeMillis()) + "\n"); + bw.close(); + } catch (IOException e) { + System.out.println("Could not create 'exonerator-lock' file. " + + "Exiting."); + System.exit(1); + } + } + + /* Last and next parse histories containing paths of parsed files and + * last modified times. */ + private static Map<String, Long> + lastImportHistory = new HashMap<String, Long>(), + nextImportHistory = new HashMap<String, Long>(); + + /* Read stats/exonerator-import-history file from disk and remember + * locally when files were last parsed. */ + private static void readImportHistoryToMemory() { + File parseHistoryFile = new File("stats", + "exonerator-import-history"); + if (parseHistoryFile.exists()) { + try { + BufferedReader br = new BufferedReader(new FileReader( + parseHistoryFile)); + String line = null; + int lineNumber = 0; + while ((line = br.readLine()) != null) { + lineNumber++; + String[] parts = line.split(","); + if (parts.length != 2) { + System.out.println("File 'stats/exonerator-import-history' " + + "contains a corrupt entry in line " + lineNumber + + ". Ignoring parse history file entirely."); + lastImportHistory.clear(); + return; + } + long lastModified = Long.parseLong(parts[0]); + String filename = parts[1]; + lastImportHistory.put(filename, lastModified); + } + br.close(); + } catch (IOException e) { + System.out.println("Could not read import history. Ignoring."); + lastImportHistory.clear(); + } + } + } + + /* Parse descriptors in the import directory and its subdirectories. */ + private static void parseDescriptors() { + File file = new File(importDirString); + if (!file.exists()) { + System.out.println("File or directory " + importDirString + " does " + + "not exist. Exiting."); + return; + } + Stack<File> files = new Stack<File>(); + files.add(file); + while (!files.isEmpty()) { + file = files.pop(); + if (file.isDirectory()) { + for (File f : file.listFiles()) { + files.add(f); + } + } else { + parseFile(file); + } + } + } + + /* Import a file if it wasn't imported before, and add it to the import + * history for the next execution. */ + private static void parseFile(File file) { + long lastModified = file.lastModified(); + String filename = file.getName(); + nextImportHistory.put(filename, lastModified); + if (!lastImportHistory.containsKey(filename) || + lastImportHistory.get(filename) < lastModified) { + try { + FileInputStream fis = new FileInputStream(file); + BufferedInputStream bis = new BufferedInputStream(fis); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int len; + byte[] bytes = new byte[1024]; + while ((len = bis.read(bytes, 0, 1024)) >= 0) { + baos.write(bytes, 0, len); + } + bis.close(); + byte[] allBytes = baos.toByteArray(); + splitFile(file, allBytes); + } catch (IOException e) { + System.out.println("Could not read '" + file + "' to memory. " + + "Skipping."); + nextImportHistory.remove(filename); + } + } + } + + /* Detect what descriptor type is contained in a file and split it to + * parse the single descriptors. */ + private static void splitFile(File file, byte[] bytes) { + try { + String asciiString = new String(bytes, "US-ASCII"); + BufferedReader br = new BufferedReader(new StringReader( + asciiString)); + String line = br.readLine(); + while (line != null && line.startsWith("@")) { + line = br.readLine(); + } + if (line == null) { + return; + } + br.close(); + String startToken = null; + if (line.startsWith("router ")) { + startToken = "router "; + } else if (line.equals("network-status-version 3")) { + startToken = "network-status-version 3"; + } else if (line.startsWith("ExitNode ")) { + startToken = "ExitNode "; + } else { + System.out.println("Unknown descriptor type in file '" + file + + "'. Ignoring."); + return; + } + String splitToken = "\n" + startToken; + int length = bytes.length, start = asciiString.indexOf(startToken); + while (start < length) { + int end = asciiString.indexOf(splitToken, start); + if (end < 0) { + end = length; + } else { + end += 1; + } + byte[] descBytes = new byte[end - start]; + System.arraycopy(bytes, start, descBytes, 0, end - start); + if (startToken.equals("router ")) { + parseServerDescriptor(file, descBytes); + } else if (startToken.equals("network-status-version 3")) { + parseConsensus(file, descBytes); + } else if (startToken.equals("ExitNode ")) { + parseExitList(file, descBytes); + } + start = end; + } + } catch (IOException e) { + System.out.println("Could not parse descriptor '" + file + "'. " + + "Skipping."); + } + } + + /* Date format to parse UTC timestamps. */ + private static SimpleDateFormat parseFormat; + static { + parseFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + /* Parse a single server descriptor. */ + private static void parseServerDescriptor(File file, byte[] bytes) { + String ascii = ""; + try { + ascii = new String(bytes, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + /* We know that US-ASCII is a supported encoding. */ + } + String startToken = "router "; + String sigToken = "\nrouter-signature\n"; + int start = ascii.indexOf(startToken); + int sig = ascii.indexOf(sigToken) + sigToken.length(); + String descriptor = null; + if (start >= 0 || sig >= 0 || sig > start) { + byte[] forDigest = new byte[sig - start]; + System.arraycopy(bytes, start, forDigest, 0, sig - start); + descriptor = DigestUtils.shaHex(forDigest); + } + if (descriptor == null) { + System.out.println("Could not calculate descriptor digest. " + + "Skipping."); + return; + } + importDescriptor(descriptor, bytes); + } + + /* Import a single server descriptor into the database. */ + private static void importDescriptor(String descriptor, + byte[] rawDescriptor) { + try { + insertDescriptorStatement.clearParameters(); + insertDescriptorStatement.setString(1, descriptor); + insertDescriptorStatement.setBytes(2, rawDescriptor); + insertDescriptorStatement.execute(); + } catch (SQLException e) { + System.out.println("Could not import descriptor into the " + + "database. Exiting."); + System.exit(1); + } + } + + /* Parse a consensus. */ + private static void parseConsensus(File file, byte[] bytes) { + try { + BufferedReader br = new BufferedReader(new StringReader(new String( + bytes, "US-ASCII"))); + String line, fingerprint = null, descriptor = null, + orAddress24 = null, orAddress = null; + long validAfterMillis = -1L; + StringBuilder rawStatusentryBuilder = null; + boolean isRunning = false; + while ((line = br.readLine()) != null) { + if (line.startsWith("vote-status ") && + !line.equals("vote-status consensus")) { + System.out.println("File '" + file + "' contains network status " + + "*votes*, not network status *consensuses*. Skipping."); + return; + } else if (line.startsWith("valid-after ")) { + String validAfterTime = line.substring("valid-after ".length()); + try { + validAfterMillis = parseFormat.parse(validAfterTime). + getTime(); + } catch (ParseException e) { + System.out.println("Could not parse valid-after timestamp in " + + "'" + file + "'. Skipping."); + return; + } + importConsensus(validAfterMillis, bytes); + } else if (line.startsWith("r ") || + line.equals("directory-footer")) { + if (isRunning) { + byte[] rawStatusentry = rawStatusentryBuilder.toString(). + getBytes(); + importStatusentry(validAfterMillis, fingerprint, descriptor, + orAddress24, orAddress, rawStatusentry); + } + if (line.equals("directory-footer")) { + return; + } + rawStatusentryBuilder = new StringBuilder(line + "\n"); + String[] parts = line.split(" "); + if (parts.length < 9) { + System.out.println("Could not parse r line '" + line + + "'. Skipping."); + return; + } + fingerprint = Hex.encodeHexString(Base64.decodeBase64(parts[2] + + "=")).toLowerCase(); + descriptor = Hex.encodeHexString(Base64.decodeBase64(parts[3] + + "=")).toLowerCase(); + orAddress = parts[6]; + /* TODO Extend the following code for IPv6 once Tor supports + * it. */ + String[] orAddressParts = orAddress.split("\."); + byte[] orAddress24Bytes = new byte[3]; + orAddress24Bytes[0] = (byte) Integer.parseInt( + orAddressParts[0]); + orAddress24Bytes[1] = (byte) Integer.parseInt( + orAddressParts[1]); + orAddress24Bytes[2] = (byte) Integer.parseInt( + orAddressParts[2]); + orAddress24 = Hex.encodeHexString(orAddress24Bytes); + } else if (line.startsWith("s ") || line.equals("s")) { + rawStatusentryBuilder.append(line + "\n"); + isRunning = line.contains(" Running"); + } else if (rawStatusentryBuilder != null) { + rawStatusentryBuilder.append(line + "\n"); + } + /* TODO Extend this code to parse additional addresses once that's + * implemented in Tor. */ + } + } catch (IOException e) { + System.out.println("Could not parse consensus. Skipping."); + return; + } + } + + /* UTC calendar for importing timestamps into the database. */ + private static Calendar calendarUTC = Calendar.getInstance( + TimeZone.getTimeZone("UTC")); + + /* Import a single status entry into the database. */ + private static void importStatusentry(long validAfterMillis, + String fingerprint, String descriptor, String orAddress24, + String orAddress, byte[] rawStatusentry) { + try { + insertStatusentryStatement.clearParameters(); + insertStatusentryStatement.setTimestamp(1, + new Timestamp(validAfterMillis), calendarUTC); + insertStatusentryStatement.setString(2, fingerprint); + insertStatusentryStatement.setString(3, descriptor); + insertStatusentryStatement.setString(4, orAddress24); + insertStatusentryStatement.setString(5, orAddress); + insertStatusentryStatement.setBytes(6, rawStatusentry); + insertStatusentryStatement.execute(); + } catch (SQLException e) { + System.out.println("Could not import status entry. Exiting."); + System.exit(1); + } + } + + /* Import a consensus into the database. */ + private static void importConsensus(long validAfterMillis, + byte[] rawConsensus) { + try { + insertConsensusStatement.clearParameters(); + insertConsensusStatement.setTimestamp(1, + new Timestamp(validAfterMillis), calendarUTC); + insertConsensusStatement.setBytes(2, rawConsensus); + insertConsensusStatement.execute(); + } catch (SQLException e) { + System.out.println("Could not import consensus. Exiting."); + System.exit(1); + } + } + + /* Parse an exit list. */ + private static void parseExitList(File file, byte[] bytes) { + try { + BufferedReader br = new BufferedReader(new StringReader(new String( + bytes, "US-ASCII"))); + String fingerprint = null; + Set<String> exitAddressLines = new HashSet<String>(); + StringBuilder rawExitlistentryBuilder = new StringBuilder(); + while (true) { + String line = br.readLine(); + if ((line == null || line.startsWith("ExitNode ")) && + fingerprint != null) { + for (String exitAddressLine : exitAddressLines) { + String[] parts = exitAddressLine.split(" "); + String exitAddress = parts[1]; + /* TODO Extend the following code for IPv6 once the exit list + * format supports it. */ + String[] exitAddressParts = exitAddress.split("\."); + byte[] exitAddress24Bytes = new byte[3]; + exitAddress24Bytes[0] = (byte) Integer.parseInt( + exitAddressParts[0]); + exitAddress24Bytes[1] = (byte) Integer.parseInt( + exitAddressParts[1]); + exitAddress24Bytes[2] = (byte) Integer.parseInt( + exitAddressParts[2]); + String exitAddress24 = Hex.encodeHexString( + exitAddress24Bytes); + String scannedTime = parts[2] + " " + parts[3]; + long scannedMillis = -1L, publishedMillis = -1L; + try { + scannedMillis = parseFormat.parse(scannedTime).getTime(); + } catch (ParseException e) { + System.out.println("Could not parse timestamp in " + + "'" + file + "'. Skipping."); + return; + } + byte[] rawExitlistentry = rawExitlistentryBuilder.toString(). + getBytes(); + importExitlistentry(fingerprint, exitAddress24, exitAddress, + scannedMillis, rawExitlistentry); + } + exitAddressLines.clear(); + rawExitlistentryBuilder = new StringBuilder(); + } + if (line == null) { + break; + } + rawExitlistentryBuilder.append(line + "\n"); + if (line.startsWith("ExitNode ")) { + fingerprint = line.substring("ExitNode ".length()). + toLowerCase(); + } else if (line.startsWith("ExitAddress ")) { + exitAddressLines.add(line); + } + } + br.close(); + } catch (IOException e) { + System.out.println("Could not parse exit list. Skipping."); + return; + } + } + + /* Import an exit list entry into the database. */ + private static void importExitlistentry(String fingerprint, + String exitAddress24, String exitAddress, long scannedMillis, + byte[] rawExitlistentry) { + try { + insertExitlistentryStatement.clearParameters(); + insertExitlistentryStatement.setString(1, fingerprint); + insertExitlistentryStatement.setString(2, exitAddress24); + insertExitlistentryStatement.setString(3, exitAddress); + insertExitlistentryStatement.setTimestamp(4, + new Timestamp(scannedMillis), calendarUTC); + insertExitlistentryStatement.setBytes(5, rawExitlistentry); + insertExitlistentryStatement.execute(); + } catch (SQLException e) { + System.out.println("Could not import exit list entry. Exiting."); + System.exit(1); + } + } + + /* Write parse history from memory to disk for the next execution. */ + private static void writeImportHistoryToDisk() { + File parseHistoryFile = new File("stats/exonerator-import-history"); + try { + BufferedWriter bw = new BufferedWriter(new FileWriter( + parseHistoryFile)); + for (Map.Entry<String, Long> historyEntry : + nextImportHistory.entrySet()) { + bw.write(String.valueOf(historyEntry.getValue()) + "," + + historyEntry.getKey() + "\n"); + } + bw.close(); + } catch (IOException e) { + System.out.println("File 'stats/exonerator-import-history' could " + + "not be written. Ignoring."); + } + } + + /* Close the database connection. */ + private static void closeDatabaseConnection() { + try { + connection.close(); + } catch (SQLException e) { + System.out.println("Could not close database connection. " + + "Ignoring."); + } + } + + /* Delete the exonerator-lock file to allow the next executing of this + * tool. */ + private static void deleteLockFile() { + new File("exonerator-lock").delete(); + } +} + diff --git a/src/org/torproject/ernie/web/ExoneraTorBetaServlet.java b/src/org/torproject/ernie/web/ExoneraTorBetaServlet.java new file mode 100644 index 0000000..c2e7b96 --- /dev/null +++ b/src/org/torproject/ernie/web/ExoneraTorBetaServlet.java @@ -0,0 +1,991 @@ +package org.torproject.ernie.web; + +import java.io.*; +import java.math.*; +import java.sql.*; +import java.text.*; +import java.util.*; +import java.util.logging.*; +import java.util.regex.*; + +import javax.naming.*; +import javax.servlet.*; +import javax.servlet.http.*; +import javax.sql.*; + +import org.apache.commons.codec.binary.*; +import org.apache.commons.lang.*; + +public class ExoneraTorBetaServlet extends HttpServlet { + + private DataSource ds; + + private Logger logger; + + public void init() { + + /* Initialize logger. */ + this.logger = Logger.getLogger( + ExoneraTorBetaServlet.class.toString()); + + /* Look up data source. */ + try { + Context cxt = new InitialContext(); + this.ds = (DataSource) cxt.lookup("java:comp/env/jdbc/exonerator"); + this.logger.info("Successfully looked up data source."); + } catch (NamingException e) { + this.logger.log(Level.WARNING, "Could not look up data source", e); + } + } + + private void writeHeader(PrintWriter out) throws IOException { + out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 " + + "Transitional//EN\">\n" + + "<html>\n" + + " <head>\n" + + " <title>Tor Metrics Portal: ExoneraTor</title>\n" + + " <meta http-equiv="content-type" content="text/html; " + + "charset=ISO-8859-1">\n" + + " <link href="/css/stylesheet-ltr.css" type="text/css" " + + "rel="stylesheet">\n" + + " <link href="/images/favicon.ico" " + + "type="image/x-icon" rel="shortcut icon">\n" + + " </head>\n" + + " <body>\n" + + " <div class="center">\n" + + " <table class="banner" border="0" cellpadding="0" " + + "cellspacing="0" summary="">\n" + + " <tr>\n" + + " <td class="banner-left"><a " + + "href="/index.html"><img src="/images/top-left.png" " + + "alt="Click to go to home page" width="193" " + + "height="79"></a></td>\n" + + " <td class="banner-middle">\n" + + " <a href="/">Home</a>\n" + + " <a href="graphs.html">Graphs</a>\n" + + " <a href="research.html">Research</a>\n" + + " <a href="status.html">Status</a>\n" + + " <br>\n" + + " <font size="2">\n" + + " <a href="networkstatus.html">Network Status</a>\n" + + " <a class="current">ExoneraTor</a>\n" + + " <a href="relay-search.html">Relay Search</a>\n" + + " <a href="consensus-health.html">Consensus " + + "Health</a>\n" + + " </font>\n" + + " </td>\n" + + " <td class="banner-right"></td>\n" + + " </tr>\n" + + " </table>\n" + + " <div class="main-column" style="margin:5; " + + "Padding:0;">\n" + + " <h2>ExoneraTor</h2>\n" + + " <h3>or: a website that tells you whether some IP " + + "address was a Tor relay</h3>\n" + + " <p>ExoneraTor tells you whether there was a Tor relay " + + "running on a given IP address at a given time. ExoneraTor " + + "can further find out whether this relay permitted exiting " + + "to a given server and/or TCP port. ExoneraTor learns about " + + "these facts from parsing the public relay lists and relay " + + "descriptors that are collected from the Tor directory " + + "authorities and the exit lists collected by TorDNSEL.</p>\n" + + " <br>\n" + + " <p>This is a <b>BETA</b> version of ExoneraTor. " + + "Beware of bugs. The stable version of ExoneraTor is still " + + "available <a href="exonerator.html">here</a>. The " + + "visible changes in this BETA version are:</p>\n" + + " <ul>\n" + + " <li>Results now include IP addresses from exit " + + "lists, too.</li>\n" + + " <li>It's not required anymore to specify an exact " + + "timestamp, but now a date is enough.</li>\n" + + " </ul><br>\n" + + " <p><font color="red"><b>Notice:</b> Note that the " + + "information you are providing below may be leaked to anyone " + + "who can read the network traffic between you and this web " + + "server or who has access to this web server. If you need to " + + "keep the IP addresses and incident times confidential, you " + + "should download the <a href="tools.html#exonerator">Java " + + "or Python version of ExoneraTor</a> and run it on your " + + "local machine.</font></p>\n" + + " <br>\n"); + } + + private void writeFooter(PrintWriter out) throws IOException { + out.println(" <br>\n" + + " </div>\n" + + " </div>\n" + + " <div class="bottom" id="bottom">\n" + + " <p>This material is supported in part by the National " + + "Science Foundation under Grant No. CNS-0959138. Any " + + "opinions, finding, and conclusions or recommendations " + + "expressed in this material are those of the author(s) and " + + "do not necessarily reflect the views of the National " + + "Science Foundation.</p>\n" + + " <p>"Tor" and the "Onion Logo" are <a " + + "href="https://www.torproject.org/docs/trademark-faq.html.en" + + "">registered trademarks</a> of The Tor Project, Inc.</p>\n" + + " <p>Data on this site is freely available under a <a " + + "href="http://creativecommons.org/publicdomain/zero/1.0/%5C%22%3E" + + "CC0 no copyright declaration</a>: To the extent possible " + + "under law, the Tor Project has waived all copyright and " + + "related or neighboring rights in the data. Graphs are " + + "licensed under a <a " + + "href="http://creativecommons.org/licenses/by/3.0/us/%5C%22%3E" + + "Creative Commons Attribution 3.0 United States " + + "License</a>.</p>\n" + + " </div>\n" + + " </body>\n" + + "</html>"); + out.close(); + } + + public void doGet(HttpServletRequest request, + HttpServletResponse response) throws IOException, + ServletException { + + /* Start writing response. */ + PrintWriter out = response.getWriter(); + writeHeader(out); + + /* Open a database connection that we'll use to handle the whole + * request. */ + Connection conn = null; + try { + conn = this.ds.getConnection(); + } catch (SQLException e) { + out.println("<p><font color="red"><b>Warning: </b></font>Unable " + + "to connect to the database. If this problem persists, " + + "please <a href="mailto:tor-assistants@torproject.org">let " + + "us know</a>!</p>\n"); + writeFooter(out); + return; + } + + /* Look up first and last consensus in the database. */ + long firstValidAfter = -1L, lastValidAfter = -1L; + try { + Statement statement = conn.createStatement(); + String query = "SELECT MIN(validafter) AS first, " + + "MAX(validafter) AS last FROM consensus"; + ResultSet rs = statement.executeQuery(query); + if (rs.next()) { + firstValidAfter = rs.getTimestamp(1).getTime(); + lastValidAfter = rs.getTimestamp(2).getTime(); + } + rs.close(); + statement.close(); + } catch (SQLException e) { + /* Looks like we don't have any consensuses. */ + } + if (firstValidAfter < 0L || lastValidAfter < 0L) { + out.println("<p><font color="red"><b>Warning: </b></font>This " + + "server doesn't have any relay lists available. If this " + + "problem persists, please " + + "<a href="mailto:tor-assistants@torproject.org">let us " + + "know</a>!</p>\n"); + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + + out.println("<a name="relay"></a><h3>Was there a Tor relay running " + + "on this IP address?</h3>"); + + /* Parse IP parameter. */ + /* TODO Extend the parsing code to accept IPv6 addresses, too. */ + Pattern ipAddressPattern = Pattern.compile( + "^([01]?\d\d?|2[0-4]\d|25[0-5])\." + + "([01]?\d\d?|2[0-4]\d|25[0-5])\." + + "([01]?\d\d?|2[0-4]\d|25[0-5])\." + + "([01]?\d\d?|2[0-4]\d|25[0-5])$"); + String ipParameter = request.getParameter("ip"); + String relayIP = "", ipWarning = ""; + if (ipParameter != null && ipParameter.length() > 0) { + Matcher ipParameterMatcher = ipAddressPattern.matcher(ipParameter); + if (ipParameterMatcher.matches()) { + String[] ipParts = ipParameter.split("\."); + relayIP = Integer.parseInt(ipParts[0]) + "." + + Integer.parseInt(ipParts[1]) + "." + + Integer.parseInt(ipParts[2]) + "." + + Integer.parseInt(ipParts[3]); + } else { + ipWarning = """ + (ipParameter.length() > 20 ? + StringEscapeUtils.escapeHtml(ipParameter.substring(0, 20)) + + "[...]" : StringEscapeUtils.escapeHtml(ipParameter)) + + "" is not a valid IP address."; + } + } + + /* Parse timestamp parameter. */ + String timestampParameter = request.getParameter("timestamp"); + long timestamp = 0L; + boolean timestampIsDate = false; + String timestampStr = "", timestampWarning = ""; + SimpleDateFormat shortDateTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm"); + shortDateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + if (timestampParameter != null && timestampParameter.length() > 0) { + try { + if (timestampParameter.split(" ").length == 1) { + timestamp = dateFormat.parse(timestampParameter).getTime(); + timestampStr = dateFormat.format(timestamp); + timestampIsDate = true; + } else { + timestamp = shortDateTimeFormat.parse(timestampParameter). + getTime(); + timestampStr = shortDateTimeFormat.format(timestamp); + } + if (timestamp < firstValidAfter || timestamp > lastValidAfter) { + timestampWarning = "Please pick a date or timestamp between "" + + shortDateTimeFormat.format(firstValidAfter) + "" and "" + + shortDateTimeFormat.format(lastValidAfter) + ""."; + timestamp = 0L; + } + } catch (ParseException e) { + /* We have no way to handle this exception, other than leaving + timestampStr at "". */ + timestampWarning = """ + (timestampParameter.length() > 20 ? + StringEscapeUtils.escapeHtml(timestampParameter. + substring(0, 20)) + "[...]" : + StringEscapeUtils.escapeHtml(timestampParameter)) + + "" is not a valid date or timestamp."; + } + } + + /* If either IP address or timestamp is provided, the other one must + * be provided, too. */ + if (relayIP.length() < 1 && timestampStr.length() > 0 && + ipWarning.length() < 1) { + ipWarning = "Please provide an IP address."; + } + if (relayIP.length() > 0 && timestamp < 1 && + timestampWarning.length() < 1) { + timestampWarning = "Please provide a date or timestamp."; + } + + /* Parse target IP parameter. */ + String targetIP = "", targetPort = "", target = ""; + String[] targetIPParts = null; + String targetAddrParameter = request.getParameter("targetaddr"); + String targetAddrWarning = ""; + if (targetAddrParameter != null && targetAddrParameter.length() > 0) { + Matcher targetAddrParameterMatcher = + ipAddressPattern.matcher(targetAddrParameter); + if (targetAddrParameterMatcher.matches()) { + String[] targetAddrParts = targetAddrParameter.split("\."); + targetIP = Integer.parseInt(targetAddrParts[0]) + "." + + Integer.parseInt(targetAddrParts[1]) + "." + + Integer.parseInt(targetAddrParts[2]) + "." + + Integer.parseInt(targetAddrParts[3]); + target = targetIP; + targetIPParts = targetIP.split("\."); + } else { + targetAddrWarning = """ + (targetAddrParameter.length() > 20 ? + StringEscapeUtils.escapeHtml(targetAddrParameter.substring( + 0, 20)) + "[...]" : StringEscapeUtils.escapeHtml( + targetAddrParameter)) + "" is not a valid IP address."; + } + } + + /* Parse target port parameter. */ + String targetPortParameter = request.getParameter("targetport"); + String targetPortWarning = ""; + if (targetPortParameter != null && targetPortParameter.length() > 0) { + Pattern targetPortPattern = Pattern.compile("\d+"); + if (targetPortParameter.length() < 5 && + targetPortPattern.matcher(targetPortParameter).matches() && + !targetPortParameter.equals("0") && + Integer.parseInt(targetPortParameter) < 65536) { + targetPort = targetPortParameter; + if (target != null) { + target += ":" + targetPort; + } else { + target = targetPort; + } + } else { + targetPortWarning = """ + (targetPortParameter.length() > 8 ? + StringEscapeUtils.escapeHtml(targetPortParameter. + substring(0, 8)) + "[...]" : + StringEscapeUtils.escapeHtml(targetPortParameter)) + + "" is not a valid TCP port."; + } + } + + /* If target port is provided, a target address must be provided, + * too. */ + /* TODO Relax this requirement. */ + if (targetPort.length() > 0 && targetIP.length() < 1 && + targetAddrWarning.length() < 1) { + targetAddrWarning = "Please provide an IP address."; + } + + /* Write form with IP address and timestamp. */ + out.println(" <form action="#relay">\n" + + " <input type="hidden" name="targetaddr" " + + (targetIP.length() > 0 ? " value="" + targetIP + """ : "") + + ">\n" + + " <input type="hidden" name="targetPort"" + + (targetPort.length() > 0 ? " value="" + targetPort + """ : "") + + ">\n" + + " <table>\n" + + " <tr>\n" + + " <td align="right">IP address in question:" + + "</td>\n" + + " <td><input type="text" name="ip"" + + (relayIP.length() > 0 ? " value="" + relayIP + """ + : "") + + ">" + + (ipWarning.length() > 0 ? "<br><font color="red">" + + ipWarning + "</font>" : "") + + "</td>\n" + + " <td><i>(Ex.: 1.2.3.4)</i></td>\n" + + " </tr>\n" + + " <tr>\n" + + " <td align="right">Date or timestamp, in " + + "UTC:</td>\n" + + " <td><input type="text" name="timestamp"" + + (timestampStr.length() > 0 ? " value="" + timestampStr + """ + : "") + + ">" + + (timestampWarning.length() > 0 ? "<br><font color="red">" + + timestampWarning + "</font>" : "") + + "</td>\n" + + " <td><i>(Ex.: 2010-01-01 or 2010-01-01 12:00)" + + "</i></td>\n" + + " </tr>\n" + + " <tr>\n" + + " <td></td>\n" + + " <td>\n" + + " <input type="submit">\n" + + " <input type="reset">\n" + + " </td>\n" + + " <td></td>\n" + + " </tr>\n" + + " </table>\n" + + " </form>\n"); + + if (relayIP.length() < 1 || timestamp < 1) { + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + + out.printf("<p>Looking up IP address %s in the relay lists " + + "published ", relayIP); + long timestampFrom, timestampTo; + if (timestampIsDate) { + /* If we only have a date, consider all consensuses published on the + * given date, plus the ones published 3 hours before the given date + * and until 23:59:59. */ + timestampFrom = timestamp - 3L * 60L * 60L * 1000L; + timestampTo = timestamp + (24L * 60L * 60L - 1L) * 1000L; + out.printf("on %s", timestampStr); + } else { + /* If we have an exact timestamp, consider the consensuses published + * in the 3 hours preceding the UTC timestamp. */ + timestampFrom = timestamp - 3L * 60L * 60L * 1000L; + timestampTo = timestamp; + out.printf("between %s and %s UTC", + shortDateTimeFormat.format(timestampFrom), + shortDateTimeFormat.format(timestampTo)); + } + /* If we don't find any relays in the given time interval, also look + * at consensuses published 12 hours before and 12 hours after the + * interval, in case the user got the "UTC" bit wrong. */ + long timestampTooOld = timestampFrom - 12L * 60L * 60L * 1000L; + long timestampTooNew = timestampTo + 12L * 60L * 60L * 1000L; + out.print(" as well as in the relevant exit lists. Clients could " + + "have selected any of these relays to build circuits. " + + "You may follow the links to relay lists and relay descriptors " + + "to grep for the lines printed below and confirm that results " + + "are correct.<br>"); + SimpleDateFormat validAfterTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + validAfterTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + String fromValidAfter = validAfterTimeFormat.format(timestampTooOld); + String toValidAfter = validAfterTimeFormat.format(timestampTooNew); + SortedSet<Long> tooOldConsensuses = new TreeSet<Long>(); + SortedSet<Long> relevantConsensuses = new TreeSet<Long>(); + SortedSet<Long> tooNewConsensuses = new TreeSet<Long>(); + try { + Statement statement = conn.createStatement(); + String query = "SELECT validafter FROM consensus " + + "WHERE validafter >= '" + fromValidAfter + + "' AND validafter <= '" + toValidAfter + "'"; + ResultSet rs = statement.executeQuery(query); + while (rs.next()) { + long consensusTime = rs.getTimestamp(1).getTime(); + if (consensusTime < timestampFrom) { + tooOldConsensuses.add(consensusTime); + } else if (consensusTime > timestampTo) { + tooNewConsensuses.add(consensusTime); + } else { + relevantConsensuses.add(consensusTime); + } + } + rs.close(); + statement.close(); + } catch (SQLException e) { + /* Looks like we don't have any consensuses in the requested + * interval. */ + } + SortedSet<Long> allConsensuses = new TreeSet<Long>(); + allConsensuses.addAll(tooOldConsensuses); + allConsensuses.addAll(relevantConsensuses); + allConsensuses.addAll(tooNewConsensuses); + if (allConsensuses.isEmpty()) { + out.println(" <p>No relay lists found!</p>\n" + + " <p>Result is INDECISIVE!</p>\n" + + " <p>We cannot make any statement whether there was " + + "a Tor relay running on IP address " + relayIP + + (timestampIsDate ? " on " : " at ") + timestampStr + "! We " + + "did not find any relevant relay lists at the given time. If " + + "you think this is an error on our side, please " + + "<a href="mailto:tor-assistants@torproject.org">contact " + + "us</a>!</p>\n"); + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + + /* Search for status entries with the given IP address as onion + * routing address, plus status entries of relays having an exit list + * entry with the given IP address as exit address. */ + SortedMap<Long, SortedMap<String, String>> statusEntries = + new TreeMap<Long, SortedMap<String, String>>(); + SortedSet<Long> positiveConsensusesNoTarget = new TreeSet<Long>(); + SortedMap<String, Set<Long>> relevantDescriptors = + new TreeMap<String, Set<Long>>(); + try { + CallableStatement cs = conn.prepareCall( + "{call search_statusentries_by_address_date(?, ?)}"); + cs.setString(1, relayIP); + cs.setDate(2, new java.sql.Date(timestamp)); + ResultSet rs = cs.executeQuery(); + while (rs.next()) { + byte[] rawstatusentry = rs.getBytes(1); + String descriptor = rs.getString(2); + long validafter = rs.getTimestamp(3).getTime(); + positiveConsensusesNoTarget.add(validafter); + if (!relevantDescriptors.containsKey(descriptor)) { + relevantDescriptors.put(descriptor, new HashSet<Long>()); + } + relevantDescriptors.get(descriptor).add(validafter); + String fingerprint = rs.getString(4); + boolean orAddressMatches = rs.getString(5).equals(relayIP); + String exitaddress = rs.getString(6); + String rLine = new String(rawstatusentry); + rLine = rLine.substring(0, rLine.indexOf("\n")); + String[] parts = rLine.split(" "); + String htmlString = "r " + parts[1] + " " + parts[2] + " " + + "<a href="serverdesc?desc-id=" + descriptor + "" " + + "target="_blank">" + parts[3] + "</a> " + parts[4] + + " " + parts[5] + " " + (orAddressMatches ? "<b>" : "") + + parts[6] + (orAddressMatches ? "</b>" : "") + " " + parts[7] + + " " + parts[8] + "\n"; + if (exitaddress != null && exitaddress.length() > 0) { + long scanned = rs.getTimestamp(7).getTime(); + htmlString += " [ExitAddress <b>" + exitaddress + + "</b> " + validAfterTimeFormat.format(scanned) + + "]\n"; + } + if (!statusEntries.containsKey(validafter)) { + statusEntries.put(validafter, new TreeMap<String, String>()); + } + statusEntries.get(validafter).put(fingerprint, htmlString); + } + rs.close(); + cs.close(); + } catch (SQLException e) { + /* Nothing found. */ + } + + /* Print out what we found. */ + SimpleDateFormat validAfterUrlFormat = new SimpleDateFormat( + "yyyy-MM-dd-HH-mm-ss"); + validAfterUrlFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + out.print("<pre><code>"); + for (long consensus : allConsensuses) { + if (relevantConsensuses.contains(consensus)) { + String validAfterDatetime = validAfterTimeFormat.format( + consensus); + String validAfterString = validAfterUrlFormat.format(consensus); + out.print("valid-after <b>" + + "<a href="consensus?valid-after=" + + validAfterString + "" target="_blank">" + + validAfterDatetime + "</b></a>\n"); + if (statusEntries.containsKey(consensus)) { + for (String htmlString : + statusEntries.get(consensus).values()) { + out.print(htmlString); + } + } + out.print("\n"); + } + } + out.print("</code></pre>"); + if (relevantDescriptors.isEmpty()) { + out.printf(" <p>None found!</p>\n" + + " <p>Result is NEGATIVE with high certainty!</p>\n" + + " <p>We did not find IP " + + "address " + relayIP + " in any of the relay or exit lists " + + "that were published between %s and %s.</p>\n", + dateFormat.format(timestampTooOld), + dateFormat.format(timestampTooNew)); + /* Run another query to find out if there are relays running on + * other IP addresses in the same /24 network and tell the user + * about it. */ + SortedSet<String> addressesInSameNetwork = new TreeSet<String>(); + String[] relayIPParts = relayIP.split("\."); + byte[] address24Bytes = new byte[3]; + address24Bytes[0] = (byte) Integer.parseInt(relayIPParts[0]); + address24Bytes[1] = (byte) Integer.parseInt(relayIPParts[1]); + address24Bytes[2] = (byte) Integer.parseInt(relayIPParts[2]); + String address24 = Hex.encodeHexString(address24Bytes); + try { + CallableStatement cs = conn.prepareCall( + "{call search_addresses_in_same_24 (?, ?)}"); + cs.setString(1, address24); + cs.setDate(2, new java.sql.Date(timestamp)); + ResultSet rs = cs.executeQuery(); + while (rs.next()) { + Map<String, String> resultEntry = new HashMap<String, String>(); + String address = rs.getString(1); + addressesInSameNetwork.add(address); + } + rs.close(); + cs.close(); + } catch (SQLException e) { + /* No other addresses in the same /24 found. */ + } + if (!addressesInSameNetwork.isEmpty()) { + out.print(" <p>The following other IP addresses of Tor " + + "relays in the same /24 network were found in relay and/or " + + "exit lists around the time that could be related to IP " + + "address " + relayIP + ":</p>\n"); + out.print(" <ul>\n"); + for (String s : addressesInSameNetwork) { + out.print(" <li>" + s + "</li>\n"); + } + out.print(" </ul>\n"); + } + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + + /* Print out result. */ + boolean inMostRelevantConsensuses = false, + inOtherRelevantConsensus = false, + inTooOldConsensuses = false, + inTooNewConsensuses = false; + for (long match : positiveConsensusesNoTarget) { + if (timestampIsDate && + dateFormat.format(match).equals(timestampStr)) { + inMostRelevantConsensuses = true; + } else if (!timestampIsDate && + match == relevantConsensuses.last()) { + inMostRelevantConsensuses = true; + } else if (relevantConsensuses.contains(match)) { + inOtherRelevantConsensus = true; + } else if (tooOldConsensuses.contains(match)) { + inTooOldConsensuses = true; + } else if (tooNewConsensuses.contains(match)) { + inTooNewConsensuses = true; + } + } + if (inMostRelevantConsensuses) { + out.print(" <p>Result is POSITIVE with high certainty!" + + "</p>\n" + + " <p>We found one or more relays on IP address " + + relayIP + " in "); + if (timestampIsDate) { + out.print("relay list published on " + timestampStr); + } else { + out.print("the most recent relay list preceding " + timestampStr); + } + out.print(" that clients were likely to know.</p>\n"); + } else { + if (inOtherRelevantConsensus) { + out.println(" <p>Result is POSITIVE " + + "with moderate certainty!</p>\n"); + out.println("<p>We found one or more relays on IP address " + + relayIP + ", but not in "); + if (timestampIsDate) { + out.print("a relay list published on " + timestampStr); + } else { + out.print("the most recent relay list preceding " + timestampStr); + } + out.print(". A possible reason for the relay being missing in a " + + "relay list might be that some of the directory " + + "authorities had difficulties connecting to the relay. " + + "However, clients might still have used the relay.</p>\n"); + } else { + out.println(" <p>Result is NEGATIVE " + + "with high certainty!</p>\n"); + out.println(" <p>We did not find any relay on IP address " + + relayIP + + " in the relay lists 3 hours preceding " + timestampStr + + ".</p>\n"); + if (inTooOldConsensuses || inTooNewConsensuses) { + if (inTooOldConsensuses && !inTooNewConsensuses) { + out.println(" <p>Note that we found a matching relay " + + "in relay lists that were published between 15 and 3 " + + "hours before " + timestampStr + ".</p>\n"); + } else if (!inTooOldConsensuses && inTooNewConsensuses) { + out.println(" <p>Note that we found a matching relay " + + "in relay lists that were published up to 12 hours " + + "after " + timestampStr + ".</p>\n"); + } else { + out.println(" <p>Note that we found a matching relay " + + "in relay lists that were published between 15 and 3 " + + "hours before and in relay lists that were published " + + "up to 12 hours after " + timestampStr + ".</p>\n"); + } + if (timestampIsDate) { + out.println("<p>Be sure to try out the previous/next day or " + + "provide an exact timestamp in UTC.</p>"); + } else { + out.println("<p>Make sure that the timestamp you " + + "provided is correctly converted to the UTC " + + "timezone.</p>"); + } + } + /* We didn't find any descriptor. No need to look up targets. */ + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + } + + /* Second part: target */ + out.println("<br><a name="exit"></a><h3>Was this relay configured " + + "to permit exiting to a given target?</h3>"); + + out.println(" <form action="#exit">\n" + + " <input type="hidden" name="timestamp"\n" + + " value="" + timestampStr + "">\n" + + " <input type="hidden" name="ip" " + + "value="" + relayIP + "">\n" + + " <table>\n" + + " <tr>\n" + + " <td align="right">Target address:</td>\n" + + " <td><input type="text" name="targetaddr"" + + (targetIP.length() > 0 ? " value="" + targetIP + """ : "") + + "">" + + (targetAddrWarning.length() > 0 ? "<br><font color="red">" + + targetAddrWarning + "</font>" : "") + + "</td>\n" + + " <td><i>(Ex.: 4.3.2.1)</i></td>\n" + + " </tr>\n" + + " <tr>\n" + + " <td align="right">Target port:</td>\n" + + " <td><input type="text" name="targetport"" + + (targetPort.length() > 0 ? " value="" + targetPort + """ + : "") + + ">" + + (targetPortWarning.length() > 0 ? "<br><font color="red">" + + targetPortWarning + "</font>" : "") + + "</td>\n" + + " <td><i>(Ex.: 80)</i></td>\n" + + " </tr>\n" + + " <tr>\n" + + " <td></td>\n" + + " <td>\n" + + " <input type="submit">\n" + + " <input type="reset">\n" + + " </td>\n" + + " <td></td>\n" + + " </tr>\n" + + " </table>\n" + + " </form>\n"); + + if (targetIP.length() < 1) { + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + + /* Parse router descriptors to check exit policies. */ + out.println("<p>Searching the relay descriptors published by the " + + "relay on IP address " + relayIP + " to find out whether this " + + "relay permitted exiting to " + target + ". You may follow the " + + "links above to the relay descriptors and grep them for the " + + "lines printed below to confirm that results are correct.</p>"); + SortedSet<Long> positiveConsensuses = new TreeSet<Long>(); + Set<String> missingDescriptors = new HashSet<String>(); + Set<String> descriptors = relevantDescriptors.keySet(); + for (String descriptor : descriptors) { + byte[] rawDescriptor = null; + try { + String query = "SELECT rawdescriptor FROM descriptor " + + "WHERE descriptor = '" + descriptor + "'"; + Statement statement = conn.createStatement(); + ResultSet rs = statement.executeQuery(query); + if (rs.next()) { + rawDescriptor = rs.getBytes(1); + } + rs.close(); + statement.close(); + } catch (SQLException e) { + /* Consider this descriptors as 'missing'. */ + continue; + } + if (rawDescriptor != null && rawDescriptor.length > 0) { + missingDescriptors.remove(descriptor); + String rawDescriptorString = new String(rawDescriptor, + "US-ASCII"); + try { + BufferedReader br = new BufferedReader( + new StringReader(rawDescriptorString)); + String line = null, routerLine = null, publishedLine = null; + StringBuilder acceptRejectLines = new StringBuilder(); + boolean foundMatch = false; + while ((line = br.readLine()) != null) { + if (line.startsWith("router ")) { + routerLine = line; + } else if (line.startsWith("published ")) { + publishedLine = line; + } else if (line.startsWith("reject ") || + line.startsWith("accept ")) { + if (foundMatch) { + out.println(line); + continue; + } + boolean ruleAccept = line.split(" ")[0].equals("accept"); + String ruleAddress = line.split(" ")[1].split(":")[0]; + if (!ruleAddress.equals("*")) { + if (!ruleAddress.contains("/") && + !ruleAddress.equals(targetIP)) { + /* IP address does not match. */ + acceptRejectLines.append(line + "\n"); + continue; + } + String[] ruleIPParts = ruleAddress.split("/")[0]. + split("\."); + int ruleNetwork = ruleAddress.contains("/") ? + Integer.parseInt(ruleAddress.split("/")[1]) : 32; + for (int i = 0; i < 4; i++) { + if (ruleNetwork == 0) { + break; + } else if (ruleNetwork >= 8) { + if (ruleIPParts[i].equals(targetIPParts[i])) { + ruleNetwork -= 8; + } else { + break; + } + } else { + int mask = 255 ^ 255 >>> ruleNetwork; + if ((Integer.parseInt(ruleIPParts[i]) & mask) == + (Integer.parseInt(targetIPParts[i]) & mask)) { + ruleNetwork = 0; + } + break; + } + } + if (ruleNetwork > 0) { + /* IP address does not match. */ + acceptRejectLines.append(line + "\n"); + continue; + } + } + String rulePort = line.split(" ")[1].split(":")[1]; + if (targetPort.length() < 1 && !ruleAccept && + !rulePort.equals("*")) { + /* With no port given, we only consider reject :* rules as + matching. */ + acceptRejectLines.append(line + "\n"); + continue; + } + if (targetPort.length() > 0 && !rulePort.equals("*") && + rulePort.contains("-")) { + int fromPort = Integer.parseInt(rulePort.split("-")[0]); + int toPort = Integer.parseInt(rulePort.split("-")[1]); + int targetPortInt = Integer.parseInt(targetPort); + if (targetPortInt < fromPort || + targetPortInt > toPort) { + /* Port not contained in interval. */ + continue; + } + } + if (targetPort.length() > 0) { + if (!rulePort.equals("*") && + !rulePort.contains("-") && + !targetPort.equals(rulePort)) { + /* Ports do not match. */ + acceptRejectLines.append(line + "\n"); + continue; + } + } + boolean relevantMatch = false; + for (long match : relevantDescriptors.get(descriptor)) { + if (relevantConsensuses.contains(match)) { + relevantMatch = true; + } + } + if (relevantMatch) { + String[] routerParts = routerLine.split(" "); + out.println("<pre><code>" + routerParts[0] + " " + + routerParts[1] + " <b>" + routerParts[2] + "</b> " + + routerParts[3] + " " + routerParts[4] + " " + + routerParts[5]); + String[] publishedParts = publishedLine.split(" "); + out.println(publishedParts[0] + " <b>" + + publishedParts[1] + " " + publishedParts[2] + + "</b>"); + out.print(acceptRejectLines.toString()); + out.println("<b>" + line + "</b>"); + foundMatch = true; + } + if (ruleAccept) { + positiveConsensuses.addAll( + relevantDescriptors.get(descriptor)); + } + } + } + br.close(); + if (foundMatch) { + out.println("</code></pre>"); + } + } catch (IOException e) { + /* Could not read descriptor string. */ + continue; + } + } + } + + /* Print out result. */ + inMostRelevantConsensuses = false; + inOtherRelevantConsensus = false; + inTooOldConsensuses = false; + inTooNewConsensuses = false; + for (long match : positiveConsensuses) { + if (timestampIsDate && + dateFormat.format(match).equals(timestampStr)) { + inMostRelevantConsensuses = true; + } else if (!timestampIsDate && match == relevantConsensuses.last()) { + inMostRelevantConsensuses = true; + } else if (relevantConsensuses.contains(match)) { + inOtherRelevantConsensus = true; + } else if (tooOldConsensuses.contains(match)) { + inTooOldConsensuses = true; + } else if (tooNewConsensuses.contains(match)) { + inTooNewConsensuses = true; + } + } + if (inMostRelevantConsensuses) { + out.print(" <p>Result is POSITIVE with high certainty!" + + "</p>\n" + + " <p>We found one or more relays on IP address " + + relayIP + " permitting exit to " + target + " in "); + if (timestampIsDate) { + out.print("relay list published on " + timestampStr); + } else { + out.print("the most recent relay list preceding " + timestampStr); + } + out.print(" that clients were likely to know.</p>\n"); + writeFooter(out); + try { + conn.close(); + } catch (SQLException e) { + } + return; + } + boolean resultIndecisive = target.length() > 0 + && !missingDescriptors.isEmpty(); + if (resultIndecisive) { + out.println(" <p>Result is INDECISIVE!</p>\n" + + " <p>At least one referenced descriptor could not be " + + "found. This is a rare case, but one that (apparently) " + + "happens. We cannot make any good statement about exit " + + "relays without these descriptors. The following descriptors " + + "are missing:</p>"); + for (String desc : missingDescriptors) + out.println(" <p>" + desc + "</p>\n"); + } + if (inOtherRelevantConsensus) { + if (!resultIndecisive) { + out.println(" <p>Result is POSITIVE " + + "with moderate certainty!</p>\n"); + } + out.println("<p>We found one or more relays on IP address " + + relayIP + " permitting exit to " + target + ", but not in "); + if (timestampIsDate) { + out.print("a relay list published on " + timestampStr); + } else { + out.print("the most recent relay list preceding " + timestampStr); + } + out.print(". A possible reason for the relay being missing in a " + + "relay list might be that some of the directory authorities " + + "had difficulties connecting to the relay. However, clients " + + "might still have used the relay.</p>\n"); + } else { + if (!resultIndecisive) { + out.println(" <p>Result is NEGATIVE " + + "with high certainty!</p>\n"); + } + out.println(" <p>We did not find any relay on IP address " + + relayIP + " permitting exit to " + target + + " in the relay list 3 hours preceding " + timestampStr + + ".</p>\n"); + if (inTooOldConsensuses || inTooNewConsensuses) { + if (inTooOldConsensuses && !inTooNewConsensuses) { + out.println(" <p>Note that we found a matching relay in " + + "relay lists that were published between 15 and 3 " + + "hours before " + timestampStr + ".</p>\n"); + } else if (!inTooOldConsensuses && inTooNewConsensuses) { + out.println(" <p>Note that we found a matching relay in " + + "relay lists that were published up to 12 hours after " + + timestampStr + ".</p>\n"); + } else { + out.println(" <p>Note that we found a matching relay in " + + "relay lists that were published between 15 and 3 " + + "hours before and in relay lists that were published up " + + "to 12 hours after " + timestampStr + ".</p>\n"); + } + if (timestampIsDate) { + out.println("<p>Be sure to try out the previous/next day or " + + "provide an exact timestamp in UTC.</p>"); + } else { + out.println("<p>Make sure that the timestamp you provided is " + + "correctly converted to the UTC timezone.</p>"); + } + } + } + if (target != null) { + if (positiveConsensuses.isEmpty() && + !positiveConsensusesNoTarget.isEmpty()) { + out.println(" <p>Note that although the found relay(s) did " + + "not permit exiting to " + target + ", there have been one " + + "or more relays running at the given time.</p>"); + } + } + try { + conn.close(); + } catch (SQLException e) { + } + writeFooter(out); + } +} +
tor-commits@lists.torproject.org