commit 0d79dbd211e7e7d06e1435cf7d78f1abc3bf4365 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Aug 16 09:56:21 2017 +0200
Reduce database query to single request.
Implements #16596. --- .../org/torproject/exonerator/QueryServlet.java | 263 +++++---------------- src/main/resources/db/exonerator.sql | 77 +++--- 2 files changed, 96 insertions(+), 244 deletions(-)
diff --git a/src/main/java/org/torproject/exonerator/QueryServlet.java b/src/main/java/org/torproject/exonerator/QueryServlet.java index 873a53a..903c9d2 100644 --- a/src/main/java/org/torproject/exonerator/QueryServlet.java +++ b/src/main/java/org/torproject/exonerator/QueryServlet.java @@ -11,7 +11,6 @@ import java.sql.CallableStatement; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -231,141 +230,7 @@ public class QueryServlet extends HttpServlet {
private QueryResponse queryDatabase(String relayIp, long timestamp) {
- QueryResponse response = null; - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - SimpleDateFormat validAfterTimeFormat = new SimpleDateFormat( - "yyyy-MM-dd HH:mm:ss"); - validAfterTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - - /* Open a database connection that we'll use to handle the whole - * request. */ - long requestedConnection = System.currentTimeMillis(); - Connection conn = this.connectToDatabase(); - if (null != conn) { - - response = new QueryResponse(); - response.queryAddress = relayIp; - response.queryDate = dateFormat.format(timestamp); - - /* Look up first and last date in the database. */ - long[] firstAndLastDates = this.queryFirstAndLastDatesFromDatabase( - conn); - if (null != firstAndLastDates) { - response.firstDateInDatabase = dateFormat.format( - firstAndLastDates[0]); - response.lastDateInDatabase = dateFormat.format(firstAndLastDates[1]); - - /* Consider all consensuses published on or within a day of the given - * date. */ - long timestampFrom = timestamp - 24L * 60L * 60L * 1000L; - long timestampTo = timestamp + 2 * 24L * 60L * 60L * 1000L - 1L; - String fromValidAfter = validAfterTimeFormat.format(timestampFrom); - String toValidAfter = validAfterTimeFormat.format(timestampTo); - SortedSet<Long> relevantConsensuses = - this.queryKnownConsensusValidAfterTimes(conn, fromValidAfter, - toValidAfter); - if (null != relevantConsensuses && !relevantConsensuses.isEmpty()) { - response.relevantStatuses = true; - - /* Search for status entries with the given IP address as onion - * routing address, plus status entries of relays having an exit - * list entry with the given IP address as exit address. */ - List<QueryResponse.Match> matches = this.queryStatusEntries(conn, - relayIp, timestamp, validAfterTimeFormat); - if (!matches.isEmpty()) { - response.matches = matches.toArray(new QueryResponse.Match[0]); - - /* If we didn't find anything, run another query to find out if - * there are relays running on other IP addresses in the same /24 or - * /48 network and tell the user about it. */ - } else { - if (!relayIp.contains(":")) { - String address24 = this.convertIpV4ToHex(relayIp) - .substring(0, 6); - if (address24 != null) { - response.nearbyAddresses = this.queryAddressesInSame24(conn, - address24, timestamp).toArray(new String[0]); - } - } else { - String address48 = this.convertIpV6ToHex(relayIp) - .substring(0, 12); - if (address48 != null) { - response.nearbyAddresses = this.queryAddressesInSame48(conn, - address48, timestamp).toArray(new String[0]); - } - } - } - } - } - - /* Close the database connection. */ - this.closeDatabaseConnection(conn, requestedConnection); - } - return response; - } - - private Connection connectToDatabase() { - Connection conn = null; - try { - conn = this.ds.getConnection(); - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Couldn't connect: " + e.getMessage(), e); - } - return conn; - } - - private long[] queryFirstAndLastDatesFromDatabase(Connection conn) { - long[] firstAndLastDates = null; - try { - Statement statement = conn.createStatement(); - String query = "SELECT DATE(MIN(validafter)) AS first, " - + "DATE(MAX(validafter)) AS last FROM statusentry"; - ResultSet rs = statement.executeQuery(query); - if (rs.next()) { - Calendar utcCalendar = Calendar.getInstance( - TimeZone.getTimeZone("UTC")); - firstAndLastDates = new long[] { - rs.getTimestamp(1, utcCalendar).getTime(), - rs.getTimestamp(2, utcCalendar).getTime() - }; - } - rs.close(); - statement.close(); - } catch (SQLException e) { - /* Looks like we don't have any consensuses. */ - firstAndLastDates = null; - } - return firstAndLastDates; - } - - private SortedSet<Long> queryKnownConsensusValidAfterTimes( - Connection conn, String fromValidAfter, String toValidAfter) { - SortedSet<Long> relevantConsensuses = new TreeSet<>(); - try { - Statement statement = conn.createStatement(); - String query = "SELECT DISTINCT validafter FROM statusentry " - + "WHERE validafter >= '" + fromValidAfter - + "' AND validafter <= '" + toValidAfter + "'"; - ResultSet rs = statement.executeQuery(query); - while (rs.next()) { - long consensusTime = rs.getTimestamp(1).getTime(); - relevantConsensuses.add(consensusTime); - } - rs.close(); - statement.close(); - } catch (SQLException e) { - /* Looks like we don't have any consensuses in the requested - * interval. */ - relevantConsensuses = null; - } - return relevantConsensuses; - } - - private List<QueryResponse.Match> queryStatusEntries(Connection conn, - String relayIp, long timestamp, - SimpleDateFormat validAfterTimeFormat) { - List<QueryResponse.Match> matches = new ArrayList<>(); + /* Convert address to hex. */ String addressHex = !relayIp.contains(":") ? this.convertIpV4ToHex(relayIp) : this.convertIpV6ToHex(relayIp); if (addressHex == null) { @@ -373,20 +238,36 @@ public class QueryServlet extends HttpServlet { } String address24Or48Hex = !relayIp.contains(":") ? addressHex.substring(0, 6) : addressHex.substring(0, 12); + + /* Prepare formatting response items. */ + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + SimpleDateFormat validAfterTimeFormat = new SimpleDateFormat( + "yyyy-MM-dd HH:mm:ss"); + validAfterTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + + /* Make the database query. */ + SortedSet<Long> allValidAfters = new TreeSet<>(); + List<QueryResponse.Match> matches = new ArrayList<>(); + SortedSet<String> allAddresses = new TreeSet<>(); try { - CallableStatement cs; - if (!relayIp.contains(":")) { - cs = conn.prepareCall("{call search_by_address24_date(?, ?)}"); - } else { - cs = conn.prepareCall("{call search_by_address48_date(?, ?)}"); - } + final long requestedConnection = System.currentTimeMillis(); + Connection conn = this.ds.getConnection(); + CallableStatement cs = conn.prepareCall(String.format( + "{call search_by_address%s_date(?, ?)}", + relayIp.contains(":") ? 48 : 24)); cs.setString(1, address24Or48Hex); Calendar utcCalendar = Calendar.getInstance( TimeZone.getTimeZone("UTC")); cs.setDate(2, new java.sql.Date(timestamp), utcCalendar); ResultSet rs = cs.executeQuery(); while (rs.next()) { + long validafter = rs.getTimestamp(2, utcCalendar).getTime(); + allValidAfters.add(validafter); byte[] rawstatusentry = rs.getBytes(1); + if (null == rawstatusentry) { + continue; + } SortedSet<String> addresses = new TreeSet<>(); SortedSet<String> addressesHex = new TreeSet<>(); String nickname = null; @@ -414,10 +295,10 @@ public class QueryServlet extends HttpServlet { addresses.add(exitaddress); addressesHex.add(this.convertIpV4ToHex(exitaddress)); } + allAddresses.addAll(addresses); if (!addressesHex.contains(addressHex)) { continue; } - long validafter = rs.getTimestamp(2, utcCalendar).getTime(); String validAfterString = validAfterTimeFormat.format(validafter); String fingerprint = rs.getString(3).toUpperCase(); QueryResponse.Match match = new QueryResponse.Match(); @@ -430,73 +311,55 @@ public class QueryServlet extends HttpServlet { } rs.close(); cs.close(); + conn.close(); + this.logger.info("Returned a database connection to the pool " + + "after " + (System.currentTimeMillis() + - requestedConnection) + " millis."); } catch (SQLException e) { /* Nothing found. */ - matches.clear(); + this.logger.log(Level.WARNING, "Database error: " + e.getMessage(), e); + return null; } - return matches; - }
- private List<String> queryAddressesInSame24(Connection conn, - String address24, long timestamp) { - List<String> addressesInSameNetwork = new ArrayList<>(); - try { - CallableStatement cs = conn.prepareCall( - "{call search_addresses_in_same_24 (?, ?)}"); - cs.setString(1, address24); - cs.setDate(2, new java.sql.Date(timestamp)); - ResultSet rs = cs.executeQuery(); - while (rs.next()) { - String address = rs.getString(1); - if (!addressesInSameNetwork.contains(address)) { - addressesInSameNetwork.add(address); + /* Create a query response object. */ + QueryResponse response = new QueryResponse(); + response.queryAddress = relayIp; + response.queryDate = dateFormat.format(timestamp); + if (!allValidAfters.isEmpty()) { + response.firstDateInDatabase = dateFormat.format(allValidAfters.first()); + response.lastDateInDatabase = dateFormat.format(allValidAfters.last()); + response.relevantStatuses = false; + long timestampFrom = timestamp - 24L * 60L * 60L * 1000L; + long timestampTo = timestamp + 2 * 24L * 60L * 60L * 1000L - 1L; + for (long validAfter : allValidAfters) { + if (validAfter >= timestampFrom && validAfter <= timestampTo) { + response.relevantStatuses = true; + break; } } - rs.close(); - cs.close(); - } catch (SQLException e) { - /* No other addresses in the same /24 found. */ - addressesInSameNetwork = null; - } - return addressesInSameNetwork; - } - - private List<String> queryAddressesInSame48(Connection conn, - String address48, long timestamp) { - List<String> addressesInSameNetwork = new ArrayList<>(); - try { - CallableStatement cs = conn.prepareCall( - "{call search_addresses_in_same_48 (?, ?)}"); - cs.setString(1, address48); - cs.setDate(2, new java.sql.Date(timestamp)); - ResultSet rs = cs.executeQuery(); - while (rs.next()) { - String address = rs.getString(1); - if (!addressesInSameNetwork.contains(address)) { - addressesInSameNetwork.add(address); + if (!matches.isEmpty()) { + response.matches = matches.toArray(new QueryResponse.Match[0]); + } else { + List<String> nearbyAddresses = new ArrayList<>(); + for (String nearbyAddress : allAddresses) { + String nearbyAddressHex = !nearbyAddress.contains(":") + ? this.convertIpV4ToHex(nearbyAddress) + : this.convertIpV6ToHex(nearbyAddress); + String nearbyAddress24Or48Hex = !nearbyAddress.contains(":") + ? nearbyAddressHex.substring(0, 6) + : nearbyAddressHex.substring(0, 12); + if (address24Or48Hex.equals(nearbyAddress24Or48Hex)) { + nearbyAddresses.add(nearbyAddress); + } + } + if (!nearbyAddresses.isEmpty()) { + response.nearbyAddresses = nearbyAddresses.toArray(new String[0]); } } - rs.close(); - cs.close(); - } catch (SQLException e) { - /* No other addresses in the same /48 found. */ - addressesInSameNetwork = null; } - return addressesInSameNetwork; - }
- private void closeDatabaseConnection(Connection conn, - long requestedConnection) { - try { - conn.close(); - this.logger.info("Returned a database connection to the pool " - + "after " + (System.currentTimeMillis() - - requestedConnection) + " millis."); - } catch (SQLException e) { - this.logger.log(Level.WARNING, "Couldn't close: " + e.getMessage(), e); - } - return; + /* Return the query response. */ + return response; } - }
diff --git a/src/main/resources/db/exonerator.sql b/src/main/resources/db/exonerator.sql index 84c5af8..85d5d01 100755 --- a/src/main/resources/db/exonerator.sql +++ b/src/main/resources/db/exonerator.sql @@ -200,19 +200,36 @@ CREATE OR REPLACE FUNCTION search_by_address24_date ( exitaddress TEXT) AS $$ BEGIN RETURN QUERY EXECUTE - -- The first select finds all status entries of relays with the given + -- The first and second selects retrieve the first and last valid-after + -- time in the database. + -- + -- The third select retrieves known valid-after times from 1 day before + -- to 1 day after the given date. + -- + -- The fourth select finds all status entries of relays with the given -- IP address as onion routing address. -- - -- The second select finds status entries of relays having an exit list + -- The fifth select finds status entries of relays having an exit list -- entry with the provided IP address as the exit address. - -- In the second select, + -- In the fifth select, -- - Focus on a time period from 1 day before and 1 day after the -- given date. Also include a second day before the given date -- for exit lists, because it can take up to 24 hours to scan a -- relay again. We should not miss exit list entries here. -- - Consider only exit list scans that took place in the 24 hours -- before the relay was listed in a consensus. - 'SELECT rawstatusentry, + 'SELECT NULL::BYTEA, MIN(validafter), NULL::CHARACTER, NULL::TEXT + FROM statusentry + UNION + SELECT NULL::BYTEA, MAX(validafter), NULL::CHARACTER, NULL::TEXT + FROM statusentry + UNION + SELECT DISTINCT NULL::BYTEA, validafter, NULL::CHARACTER, NULL::TEXT + FROM statusentry + WHERE DATE(validafter) >= ''' || select_date || '''::DATE - 1 + AND DATE(validafter) <= ''' || select_date || '''::DATE + 1 + UNION + SELECT rawstatusentry, validafter, fingerprint, NULL @@ -256,7 +273,18 @@ CREATE OR REPLACE FUNCTION search_by_address48_date ( exitaddress TEXT) AS $$ BEGIN RETURN QUERY EXECUTE - 'SELECT rawstatusentry, + 'SELECT NULL::BYTEA, MIN(validafter), NULL::CHARACTER, NULL::TEXT + FROM statusentry + UNION + SELECT NULL::BYTEA, MAX(validafter), NULL::CHARACTER, NULL::TEXT + FROM statusentry + UNION + SELECT DISTINCT NULL::BYTEA, validafter, NULL::CHARACTER, NULL::TEXT + FROM statusentry + WHERE DATE(validafter) >= ''' || select_date || '''::DATE - 1 + AND DATE(validafter) <= ''' || select_date || '''::DATE + 1 + UNION + SELECT rawstatusentry, validafter, fingerprint, NULL::TEXT @@ -268,42 +296,3 @@ CREATE OR REPLACE FUNCTION search_by_address48_date ( END; $$ LANGUAGE plpgsql;
--- Look up all IPv4 OR and exit addresses in the /24 network of a given --- address to suggest other addresses the user may be looking for. -CREATE OR REPLACE FUNCTION search_addresses_in_same_24 ( - select_address24 CHARACTER(6), - select_date DATE) - RETURNS TABLE(addresstext TEXT, - addressinet INET) AS $$ - SELECT HOST(oraddress), - oraddress - FROM statusentry - WHERE oraddress24 = $1 - AND DATE(validafter) >= $2 - 1 - AND DATE(validafter) <= $2 + 1 - UNION - SELECT HOST(exitaddress), - exitaddress - FROM exitlistentry - WHERE exitaddress24 = $1 - AND DATE(scanned) >= $2 - 2 - AND DATE(scanned) <= $2 + 1 - ORDER BY 2; -$$ LANGUAGE SQL; - --- Look up all IPv6 OR addresses in the /48 network of a given address to --- suggest other addresses the user may be looking for. -CREATE OR REPLACE FUNCTION search_addresses_in_same_48 ( - select_address48 CHARACTER(12), - select_date DATE) - RETURNS TABLE(addresstext TEXT, - addressinet INET) AS $$ - SELECT HOST(oraddress), - oraddress - FROM statusentry - WHERE oraddress48 = $1 - AND DATE(validafter) >= $2 - 1 - AND DATE(validafter) <= $2 + 1 - ORDER BY 2; -$$ LANGUAGE SQL; -