commit 8f63e74709cd05cd812e33f95ffe51b05d6d537c Author: Karsten Loesing karsten.loesing@gmx.net Date: Sun Apr 20 00:49:36 2014 +0200
Add a new fingerprint parameter.
By default, we are limiting all responses to relays or bridges that have been running in the past 7 days. The main reason for this is performance.
We're now making an exception for specific lookups by fingerprint or hashed fingerprint. These lookups don't use the node index, but attempt to read the relay's or bridge's details document and extract all information needed to compile a summary document or locate one of the other documents.
There may be more elegant solutions, but we'll probably have to switch to a database design for them.
Implements #11350. --- src/org/torproject/onionoo/DocumentStore.java | 89 +++++++++++++++++++- src/org/torproject/onionoo/RequestHandler.java | 29 ++++++- src/org/torproject/onionoo/ResourceServlet.java | 20 +++-- .../torproject/onionoo/ResourceServletTest.java | 44 +++++++++- web/index.html | 43 +++++++--- 5 files changed, 204 insertions(+), 21 deletions(-)
diff --git a/src/org/torproject/onionoo/DocumentStore.java b/src/org/torproject/onionoo/DocumentStore.java index e8b35b5..0bc2970 100644 --- a/src/org/torproject/onionoo/DocumentStore.java +++ b/src/org/torproject/onionoo/DocumentStore.java @@ -13,6 +13,7 @@ import java.io.FileWriter; import java.io.IOException; import java.util.Arrays; import java.util.Map; +import java.util.Scanner; import java.util.SortedMap; import java.util.SortedSet; import java.util.Stack; @@ -239,7 +240,93 @@ public class DocumentStore { }
private NodeStatus retrieveNodeStatus(String fingerprint) { - return this.cachedNodeStatuses.get(fingerprint); + if (this.cachedNodeStatuses.containsKey(fingerprint)) { + return this.cachedNodeStatuses.get(fingerprint); + } else if (this.listedArchivedNodeStatuses) { + return null; + } + /* TODO This is an evil hack to support looking up relays or bridges + * that haven't been running for a week without having to load + * 500,000 NodeStatus instances into memory. Maybe there's a better + * way? Or do we need to switch to a real database for this? */ + DetailsDocument detailsDocument = this.retrieveDocumentFile( + DetailsDocument.class, false, fingerprint); + if (detailsDocument == null) { + return null; + } + try { + boolean isRelay = true, running = false; + String nickname = null, address = null, countryCode = null, + hostName = null, defaultPolicy = null, portList = null, + aSNumber = null, contact = null; + SortedSet<String> orAddressesAndPorts = new TreeSet<String>(), + exitAddresses = new TreeSet<String>(), + relayFlags = new TreeSet<String>(); + long lastSeenMillis = -1L, consensusWeight = -1L, + lastRdnsLookup = -1L, firstSeenMillis = -1L, + lastChangedAddresses = -1L; + int orPort = 0, dirPort = 0; + Boolean recommendedVersion = null; + Scanner s = new Scanner(detailsDocument.getDocumentString()); + while (s.hasNextLine()) { + String line = s.nextLine(); + if (!line.contains(":")) { + continue; + } + String[] parts = line.split(":", 2); + String key = parts[0], value = parts[1]; + if (key.equals(""nickname"")) { + if (!value.startsWith(""") || !value.endsWith("",")) { + return null; + } + nickname = value.substring(1, value.length() - 2); + } else if (key.equals(""hashed_fingerprint"")) { + isRelay = false; + } else if (key.equals(""or_addresses"")) { + if (!value.startsWith("[") || !value.endsWith("],")) { + return null; + } + for (String addressAndPort : + value.substring(1, value.length() - 2).split(",")) { + if (addressAndPort.length() < 2 || + !addressAndPort.contains(":")) { + return null; + } + if (address == null) { + address = addressAndPort.substring(1, + addressAndPort.lastIndexOf(":")); + } else { + orAddressesAndPorts.add(addressAndPort); + } + } + } else if (key.equals(""exit_addresses"")) { + if (!value.startsWith("[") || !value.endsWith("],")) { + return null; + } + for (String addressPart : + value.substring(1, value.length() - 2).split(",")) { + exitAddresses.add(addressPart); + } + } else if (key.equals(""running"")) { + if (value.equals("true,")) { + running = true; + } else if (!value.equals("false,")) { + return null; + } + } + } + NodeStatus nodeStatus = new NodeStatus(isRelay, nickname, + fingerprint, address, orAddressesAndPorts, exitAddresses, + lastSeenMillis, orPort, dirPort, relayFlags, consensusWeight, + countryCode, hostName, lastRdnsLookup, defaultPolicy, portList, + firstSeenMillis, lastChangedAddresses, aSNumber, contact, + recommendedVersion); + nodeStatus.setRunning(running); + return nodeStatus; + } catch (Exception e) { + /* Play it safe and fall back to returning nothing. */ + return null; + } }
private <T extends Document> T retrieveDocumentFile( diff --git a/src/org/torproject/onionoo/RequestHandler.java b/src/org/torproject/onionoo/RequestHandler.java index 1efa5a0..67be625 100644 --- a/src/org/torproject/onionoo/RequestHandler.java +++ b/src/org/torproject/onionoo/RequestHandler.java @@ -15,8 +15,11 @@ public class RequestHandler {
private NodeIndex nodeIndex;
+ private DocumentStore documentStore; + public RequestHandler(NodeIndex nodeIndex) { this.nodeIndex = nodeIndex; + this.documentStore = ApplicationFactory.getDocumentStore(); }
private String resourceType; @@ -45,6 +48,11 @@ public class RequestHandler { this.lookup = lookup; }
+ private String fingerprint; + public void setFingerprint(String fingerprint) { + this.fingerprint = fingerprint; + } + private String country; public void setCountry(String country) { this.country = country; @@ -111,6 +119,7 @@ public class RequestHandler { this.filterByType(); this.filterByRunning(); this.filterBySearchTerms(); + this.filterByLookup(); this.filterByFingerprint(); this.filterByCountryCode(); this.filterByASNumber(); @@ -252,7 +261,7 @@ public class RequestHandler { } }
- private void filterByFingerprint() { + private void filterByLookup() { if (this.lookup == null) { return; } @@ -269,6 +278,24 @@ public class RequestHandler { } }
+ private void filterByFingerprint() { + if (this.fingerprint == null) { + return; + } + this.filteredRelays.clear(); + this.filteredBridges.clear(); + String fingerprint = this.fingerprint; + NodeStatus entry = this.documentStore.retrieve(NodeStatus.class, true, + fingerprint); + if (entry != null) { + if (entry.isRelay()) { + this.filteredRelays.put(fingerprint, entry); + } else { + this.filteredBridges.put(fingerprint, entry); + } + } + } + private void filterByCountryCode() { if (this.country == null) { return; diff --git a/src/org/torproject/onionoo/ResourceServlet.java b/src/org/torproject/onionoo/ResourceServlet.java index f2f3005..1817504 100644 --- a/src/org/torproject/onionoo/ResourceServlet.java +++ b/src/org/torproject/onionoo/ResourceServlet.java @@ -146,9 +146,9 @@ public class ResourceServlet extends HttpServlet { /* Make sure that the request doesn't contain any unknown * parameters. */ Set<String> knownParameters = new HashSet<String>(Arrays.asList(( - "type,running,search,lookup,country,as,flag,first_seen_days," - + "last_seen_days,contact,order,limit,offset,fields"). - split(","))); + "type,running,search,lookup,fingerprint,country,as,flag," + + "first_seen_days,last_seen_days,contact,order,limit,offset," + + "fields").split(","))); for (String parameterKey : parameterMap.keySet()) { if (!knownParameters.contains(parameterKey)) { response.sendError(HttpServletResponse.SC_BAD_REQUEST); @@ -190,14 +190,24 @@ public class ResourceServlet extends HttpServlet { rh.setSearch(searchTerms); } if (parameterMap.containsKey("lookup")) { - String fingerprintParameter = this.parseFingerprintParameter( + String lookupParameter = this.parseFingerprintParameter( parameterMap.get("lookup")); + if (lookupParameter == null) { + response.sendError(HttpServletResponse.SC_BAD_REQUEST); + return; + } + String fingerprint = lookupParameter.toUpperCase(); + rh.setLookup(fingerprint); + } + if (parameterMap.containsKey("fingerprint")) { + String fingerprintParameter = this.parseFingerprintParameter( + parameterMap.get("fingerprint")); if (fingerprintParameter == null) { response.sendError(HttpServletResponse.SC_BAD_REQUEST); return; } String fingerprint = fingerprintParameter.toUpperCase(); - rh.setLookup(fingerprint); + rh.setFingerprint(fingerprint); } if (parameterMap.containsKey("country")) { String countryCodeParameter = this.parseCountryCodeParameter( diff --git a/test/org/torproject/onionoo/ResourceServletTest.java b/test/org/torproject/onionoo/ResourceServletTest.java index f5395ed..3b047c6 100644 --- a/test/org/torproject/onionoo/ResourceServletTest.java +++ b/test/org/torproject/onionoo/ResourceServletTest.java @@ -725,7 +725,7 @@ public class ResourceServletTest { }
@Test() - public void testSearchBridgeHashedHashedFingerprint() { + public void testLookupBridgeHashedHashedFingerprint() { this.assertSummaryDocument( "/summary?lookup=CE52F898DB3678BCE33FAC28C92774DE90D618B5", 0, null, 1, new String[] { "gummy" }); @@ -739,6 +739,48 @@ public class ResourceServletTest { }
@Test() + public void testLookupNonExistantFingerprint() { + this.assertSummaryDocument( + "/summary?lookup=0000000000000000000000000000000000000000", 0, + null, 0, null); + } + + @Test() + public void testFingerprintRelayFingerprint() { + this.assertSummaryDocument( + "/summary?fingerprint=000C5F55BD4814B917CC474BD537F1A3B33CCE2A", + 1, new String[] { "TorkaZ" }, 0, null); + } + + @Test() + public void testFingerprintRelayHashedFingerprint() { + this.assertSummaryDocument( + "/summary?fingerprint=5aa14c08d62913e0057a9ad5863b458c0ce94cee", + 0, null, 0, null); + } + + @Test() + public void testFingerprintBridgeHashedFingerprint() { + this.assertSummaryDocument( + "/summary?fingerprint=1FEDE50ED8DBA1DD9F9165F78C8131E4A44AB756", + 0, null, 1, new String[] { "gummy" }); + } + + @Test() + public void testFingerprintBridgeHashedHashedFingerprint() { + this.assertSummaryDocument( + "/summary?fingerprint=CE52F898DB3678BCE33FAC28C92774DE90D618B5", + 0, null, 0, null); + } + + @Test() + public void testFingerprintBridgeOriginalFingerprint() { + this.assertSummaryDocument( + "/summary?fingerprint=0010D49C6DA1E46A316563099F41BFE40B6C7183", + 0, null, 0, null); + } + + @Test() public void testCountryDe() { this.assertSummaryDocument( "/summary?country=de", 1, new String[] { "TorkaZ" }, 0, null); diff --git a/web/index.html b/web/index.html index 29673e9..afe4f88 100644 --- a/web/index.html +++ b/web/index.html @@ -174,8 +174,9 @@ below.</p>
<p> The following methods each return a single document containing zero or -more objects of relays and/or bridges that are currently running or that -have been running in the past week. +more objects of relays and/or bridges. +By default, all relays and bridges are included that have been running in +the past week. </p>
<ul class="api-urls"> @@ -226,8 +227,12 @@ document</a></span>
<h4>Parameters</h4> <p> -Each of the methods above can be parameterized to select only a subset -of relay and/or bridge documents to be included in the response. +Each of the methods can be parameterized to select only a subset of relay +and/or bridge documents that are currently running or that have been +running in the past week. +(The <strong>fingerprint</strong> parameter is special here, because it +allows selecting a specific relay or bridge, regardless of whether it has +been running in the past week.) If multiple parameters are specified, they are combined using a logical AND operation, meaning that only the intersection of relays and bridges matching all parameters is returned. @@ -293,6 +298,22 @@ Lookups are case-insensitive. </li>
<li> +<font color="blue"><b>fingerprint</b></font> +<p> +Return only the relay with the parameter value matching the fingerprint +or the bridge with the parameter value matching the hashed fingerprint. +Fingerprints must consist of 40 hex characters, case does not matter. +This parameter is quite similar to the <strong>lookup</strong> parameter +with two exceptions: +(1) the provided relay fingerprint or hashed bridge fingerprint <i>must +not</i> be hashed (again) using SHA-1; +(2) the response will contain any matching relay or bridge regardless of +whether they have been running in the past week. +<font color="blue">Added on April 20, 2014.</font> +</p> +</li> + +<li> <b>country</b> <p> Return only relays which are located in the @@ -344,8 +365,8 @@ last been seen at least x and at most y days ago. Accepted short forms are "x", "x-", and "-y" which are interpreted as "x-x", "x-infinity", and "0-y". Note that relays and bridges that haven't been running in the past week -are never included in results, so that setting x to 8 or higher will -always lead to an empty result set. +are not included in results, so that setting x to 8 or higher will lead to +an empty result set. </p> </li>
@@ -609,8 +630,6 @@ running in the last bridge network status. </h3>
<p> -Details documents contain all known details of relays and bridges that -have been running in the past week. Details documents are based on the network statuses published by the Tor directories and the server descriptors published by relays and bridges. Details documents contain the following fields: @@ -1387,8 +1406,6 @@ graphs. <p> Bandwidth documents contain aggregate statistics of a relay's or bridge's consumed bandwidth for different time intervals. -Bandwidth documents are available for all relays and bridges that have -been running in the past week. Bandwidth documents contain the following fields: </p>
@@ -1551,7 +1568,7 @@ The specification of graph history objects is similar to those in the Weights documents contain aggregate statistics of a relay's probability to be selected by clients for building paths. Weights documents contain different time intervals and are available for -all relays that have been running in the past week. +relays only. Weights documents contain the following fields: </p>
@@ -1725,7 +1742,7 @@ Clients documents contain estimates of the average number of clients connecting to a bridge every day. There are no clients documents available for relays, just for bridges. Clients documents contain different time intervals and are available for -all bridges that have been running in the past week. +bridges only. Clients documents contain the following fields: </p>
@@ -1894,7 +1911,7 @@ It might be removed in the future without notice.</font> <font color="blue">Added on March 10, 2014.</font> Uptime documents contain fractional uptimes of relays and bridges. Uptime documents contain different time intervals and are available for -all relays and bridges that have been running in the past week. +relays and bridges. Uptime documents contain the following fields: </p>