[tor-commits] [exonerator/master] Fix query that took almost forever.

karsten at torproject.org karsten at torproject.org
Mon Feb 8 20:10:41 UTC 2016


commit 155cfffbb1b7f969bb1331a07da0a95a3de409cf
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Nov 19 12:20:58 2015 +0100

    Fix query that took almost forever.
    
    Queries that match one or more exit list entries took almost forever,
    because avoided all the fine indexes we created for it.  The workaround is
    to search by /24 of the requested IP address (or /48 in case of IPv6)
    which we already have in the database.  We should want to do this anyway
    to avoid making subsequent queries if there's no hit for the exact IP
    address that was requested.
    
    Fixes #17488.  Thanks, starlight, for all the feedback!
---
 db/exonerator.sql                                  |  63 +++++++----
 .../torproject/exonerator/ExoneraTorServlet.java   | 121 ++++++++++++++-------
 2 files changed, 119 insertions(+), 65 deletions(-)

diff --git a/db/exonerator.sql b/db/exonerator.sql
index fda9071..04dc528 100755
--- a/db/exonerator.sql
+++ b/db/exonerator.sql
@@ -68,6 +68,11 @@ CREATE INDEX statusentry_oraddress24_validafterdate
 CREATE INDEX statusentry_oraddress48_validafterdate
     ON statusentry (oraddress48, DATE(validafter));
 
+-- The index on fingerprint and valid-after time speeds up joins with
+-- exitlistentry.
+CREATE INDEX statusentry_fingerprint_validafter_fingerprint
+    ON statusentry (fingerprint, validafter);
+
 -- The exitlistentry table stores the results of the active testing,
 -- DNS-based exit list for exit nodes.  An entry in this table means that
 -- a relay was scanned at a given time and found to be exiting to the
@@ -187,48 +192,40 @@ CREATE OR REPLACE FUNCTION insert_exitlistentry (
   END;
 $$ LANGUAGE 'plpgsql';
 
--- Search for status entries with the given IP address as onion routing
--- address, plus status entries of relays having an exit list entry with
--- the given IP address as exit address.
-CREATE OR REPLACE FUNCTION search_statusentries_by_address_date (
-    select_address TEXT,
+-- Search for status entries with an IPv4 onion routing address in the
+-- same /24 network as the given hex-encoded IP address prefix and with a
+-- valid-after date within a day of the given date, plus status entries of
+-- relays having an exit list entry with an exit address in the same /24
+-- network as the given hex-encoded IP address prefix and with a scan time
+-- within a day of the given date.
+CREATE OR REPLACE FUNCTION search_by_address24_date (
+    select_address24 TEXT,
     select_date DATE)
     RETURNS TABLE(rawstatusentry BYTEA,
-          descriptor CHARACTER(40),
           validafter TIMESTAMP WITHOUT TIME ZONE,
           fingerprint CHARACTER(40),
-          oraddress TEXT,
-          exitaddress TEXT,
-          scanned TIMESTAMP WITHOUT TIME ZONE) AS $$
+          exitaddress TEXT) AS $$
   -- The first select finds all status entries of relays with the given
   -- IP address as onion routing address.
   SELECT rawstatusentry,
-        descriptor,
         validafter,
         fingerprint,
-        HOST(oraddress),
-        NULL,
         NULL
       FROM statusentry
-      WHERE oraddress = $1::INET
+      WHERE oraddress24 = $1
       AND DATE(validafter) >= $2 - 1
       AND DATE(validafter) <= $2 + 1
   UNION
   -- The second select finds status entries of relays having an exit list
   -- entry with the provided IP address as the exit address.
-  SELECT statusentry.rawstatusentry,
-        statusentry.descriptor,
+  SELECT DISTINCT statusentry.rawstatusentry,
         statusentry.validafter,
         statusentry.fingerprint,
-        HOST(statusentry.oraddress),
-        HOST(exitlistentry.exitaddress),
-        -- Pick only the last scan result that took place in the 24 hours
-        -- before the valid-after time.
-        MAX(exitlistentry.scanned)
+        HOST(exitlistentry.exitaddress)
       FROM statusentry
       JOIN exitlistentry
       ON statusentry.fingerprint = exitlistentry.fingerprint
-      WHERE exitlistentry.exitaddress = $1::INET
+      WHERE exitlistentry.exitaddress24 = $1
       -- Focus on a time period from 1 day before and 1 day after the
       -- given date.  Also include a second day before the given date
       -- for exit lists, because it can take up to 24 hours to scan a
@@ -242,8 +239,28 @@ CREATE OR REPLACE FUNCTION search_statusentries_by_address_date (
       AND statusentry.validafter >= exitlistentry.scanned
       AND statusentry.validafter - exitlistentry.scanned <=
           '1 day'::INTERVAL
-      GROUP BY 1, 2, 3, 4, 5, 6
-  ORDER BY 3, 4, 6;
+  ORDER BY 2, 3, 4;
+$$ LANGUAGE SQL;
+
+-- Search for status entries with an IPv6 onion routing address in the
+-- same /48 network as the given hex-encoded IP address prefix and with a
+-- valid-after date within a day of the given date.
+CREATE OR REPLACE FUNCTION search_by_address48_date (
+    select_address48 TEXT,
+    select_date DATE)
+    RETURNS TABLE(rawstatusentry BYTEA,
+          validafter TIMESTAMP WITHOUT TIME ZONE,
+          fingerprint CHARACTER(40),
+          exitaddress TEXT) AS $$
+  SELECT rawstatusentry,
+        validafter,
+        fingerprint,
+        NULL::TEXT
+      FROM statusentry
+      WHERE oraddress48 = $1
+      AND DATE(validafter) >= $2 - 1
+      AND DATE(validafter) <= $2 + 1
+  ORDER BY 2, 3;
 $$ LANGUAGE SQL;
 
 -- Look up all IPv4 OR and exit addresses in the /24 network of a given
diff --git a/src/org/torproject/exonerator/ExoneraTorServlet.java b/src/org/torproject/exonerator/ExoneraTorServlet.java
index d2b1c97..66793dd 100644
--- a/src/org/torproject/exonerator/ExoneraTorServlet.java
+++ b/src/org/torproject/exonerator/ExoneraTorServlet.java
@@ -196,42 +196,14 @@ public class ExoneraTorServlet extends HttpServlet {
     if (statusEntries.isEmpty()) {
       addressesInSameNetwork = new ArrayList<String>();
       if (!relayIP.contains(":")) {
-        String[] relayIPParts = relayIP.split("\\.");
-        byte[] address24Bytes = new byte[3];
-        address24Bytes[0] = (byte) Integer.parseInt(relayIPParts[0]);
-        address24Bytes[1] = (byte) Integer.parseInt(relayIPParts[1]);
-        address24Bytes[2] = (byte) Integer.parseInt(relayIPParts[2]);
-        String address24 = Hex.encodeHexString(address24Bytes);
-        addressesInSameNetwork = this.queryAddressesInSame24(conn,
-            address24, timestamp);
-      } else {
-        StringBuilder addressHex = new StringBuilder();
-        int start = relayIP.startsWith("::") ? 1 : 0;
-        int end = relayIP.length() - (relayIP.endsWith("::") ? 1 : 0);
-        String[] parts = relayIP.substring(start, end).split(":", -1);
-        for (int i = 0; i < parts.length; i++) {
-          String part = parts[i];
-          if (part.length() == 0) {
-            addressHex.append("x");
-          } else if (part.length() <= 4) {
-            addressHex.append(String.format("%4s", part));
-          } else {
-            addressHex = null;
-            break;
-          }
-        }
-        String address48 = null;
-        if (addressHex != null) {
-          String addressHexString = addressHex.toString();
-          addressHexString = addressHexString.replaceFirst("x",
-              String.format("%" + (33 - addressHexString.length())
-              + "s", "0"));
-          if (!addressHexString.contains("x") &&
-              addressHexString.length() == 32) {
-            address48 = addressHexString.replaceAll(" ", "0").
-                toLowerCase().substring(0, 12);
-          }
+        String address24 = this.convertIPv4ToHex(relayIP).substring(0, 6);
+        if (address24 != null) {
+          addressesInSameNetwork = this.queryAddressesInSame24(conn,
+              address24, timestamp);
         }
+      } else {
+        String address48 = this.convertIPv6ToHex(relayIP).substring(
+            0, 12);
         if (address48 != null) {
           addressesInSameNetwork = this.queryAddressesInSame48(conn,
               address48, timestamp);
@@ -315,6 +287,50 @@ public class ExoneraTorServlet extends HttpServlet {
     return relayIP;
   }
 
+  private String convertIPv4ToHex(String relayIP) {
+    String[] relayIPParts = relayIP.split("\\.");
+    byte[] address24Bytes = new byte[4];
+    for (int i = 0; i < address24Bytes.length; i++) {
+      address24Bytes[i] = (byte) Integer.parseInt(relayIPParts[i]);
+    }
+    String address24 = Hex.encodeHexString(address24Bytes);
+    return address24;
+  }
+
+  private String convertIPv6ToHex(String relayIP) {
+    if (relayIP.startsWith("[") && relayIP.endsWith("]")) {
+      relayIP = relayIP.substring(1, relayIP.length() - 1);
+    }
+    StringBuilder addressHex = new StringBuilder();
+    int start = relayIP.startsWith("::") ? 1 : 0;
+    int end = relayIP.length() - (relayIP.endsWith("::") ? 1 : 0);
+    String[] parts = relayIP.substring(start, end).split(":", -1);
+    for (int i = 0; i < parts.length; i++) {
+      String part = parts[i];
+      if (part.length() == 0) {
+        addressHex.append("x");
+      } else if (part.length() <= 4) {
+        addressHex.append(String.format("%4s", part));
+      } else {
+        addressHex = null;
+        break;
+      }
+    }
+    String address48 = null;
+    if (addressHex != null) {
+      String addressHexString = addressHex.toString();
+      addressHexString = addressHexString.replaceFirst("x",
+          String.format("%" + (33 - addressHexString.length())
+          + "s", "0"));
+      if (!addressHexString.contains("x") &&
+          addressHexString.length() == 32) {
+        address48 = addressHexString.replaceAll(" ", "0").
+            toLowerCase();
+      }
+    }
+    return address48;
+  }
+
   private String parseTimestampParameter(
       String passedTimestampParameter) {
     String timestampStr = "";
@@ -396,20 +412,32 @@ public class ExoneraTorServlet extends HttpServlet {
       String relayIP, long timestamp,
       SimpleDateFormat validAfterTimeFormat) {
     List<String[]> statusEntries = new ArrayList<String[]>();
+    String addressHex = !relayIP.contains(":")
+        ? this.convertIPv4ToHex(relayIP) : this.convertIPv6ToHex(relayIP);
+    if (addressHex == null) {
+      return null;
+    }
+    String address24Or48Hex = !relayIP.contains(":")
+        ? addressHex.substring(0, 6) : addressHex.substring(0, 12);
     try {
-      CallableStatement cs = conn.prepareCall(
-          "{call search_statusentries_by_address_date(?, ?)}");
-      cs.setString(1, relayIP);
+      CallableStatement cs;
+      if (!relayIP.contains(":")) {
+        cs = conn.prepareCall("{call search_by_address24_date(?, ?)}");
+      } else {
+        cs = conn.prepareCall("{call search_by_address48_date(?, ?)}");
+      }
+      cs.setString(1, address24Or48Hex);
       Calendar utcCalendar = Calendar.getInstance(
           TimeZone.getTimeZone("UTC"));
       cs.setDate(2, new java.sql.Date(timestamp), utcCalendar);
       ResultSet rs = cs.executeQuery();
       while (rs.next()) {
         byte[] rawstatusentry = rs.getBytes(1);
-        SortedSet<String> addresses = new TreeSet<String>();
-        long validafter = rs.getTimestamp(3, utcCalendar).getTime();
+        SortedSet<String> addresses = new TreeSet<String>(),
+            addressesHex = new TreeSet<String>();
+        long validafter = rs.getTimestamp(2, utcCalendar).getTime();
         String validAfterString = validAfterTimeFormat.format(validafter);
-        String fingerprint = rs.getString(4).toUpperCase();
+        String fingerprint = rs.getString(3).toUpperCase();
         String nickname = null;
         String exit = "U";
         for (String line : new String(rawstatusentry).split("\n")) {
@@ -417,17 +445,26 @@ public class ExoneraTorServlet extends HttpServlet {
             String[] parts = line.split(" ");
             nickname = parts[1];
             addresses.add(parts[6]);
+            addressesHex.add(this.convertIPv4ToHex(parts[6]));
           } else if (line.startsWith("a ")) {
             String address = line.substring("a ".length(),
                 line.lastIndexOf(":"));
             addresses.add(address);
+            String orAddressHex = !address.contains(":")
+                ? this.convertIPv4ToHex(address)
+                : this.convertIPv6ToHex(address);
+            addressesHex.add(orAddressHex);
           } else if (line.startsWith("p ")) {
             exit = line.equals("p reject 1-65535") ? "N" : "Y";
           }
         }
-        String exitaddress = rs.getString(6);
+        String exitaddress = rs.getString(4);
         if (exitaddress != null && exitaddress.length() > 0) {
           addresses.add(exitaddress);
+          addressesHex.add(this.convertIPv4ToHex(exitaddress));
+        }
+        if (!addressesHex.contains(addressHex)) {
+          continue;
         }
         StringBuilder sb = new StringBuilder();
         int writtenAddresses = 0;





More information about the tor-commits mailing list