[tor-commits] [metrics-web/master] Rewrite insert_bwhist in SQL.

karsten at torproject.org karsten at torproject.org
Wed Mar 4 19:55:45 UTC 2020


commit 714b2ee0cc9c3d96afbd87b3d12595d549ae58a1
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sun Feb 23 10:53:46 2020 +0100

    Rewrite insert_bwhist in SQL.
    
    The old PL/pgSQL version of this function made three lookups in the
    bwhist table to 1) check whether a row already exists, 2) insert or
    update the row, and 3) update the row once again with array sums. The
    new SQL version uses the INSERT ON CONFLICT statement introduced in
    PostgreSQL 9.5 (Debian stretch has 9.6, buster has 11). The
    performance gain measured using metrics-test is impressive, computed
    aggregates are equivalent.
---
 CHANGELOG.md                   |  2 ++
 src/main/sql/bwhist/tordir.sql | 46 +++++++++++++++++-------------------------
 2 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 43b7e75..b1571c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@
      extrapolated statistics even if computed network fractions are
      zero, to avoid re-processing these statistics over and over.
    - Extract directory authority bytes per day in the bwhist module.
+   - Rewrite insert_bwhist in SQL to improve performance of the bwhist
+     module.
 
  * Minor changes
    - Make Jetty host configurable.
diff --git a/src/main/sql/bwhist/tordir.sql b/src/main/sql/bwhist/tordir.sql
index fad5d2f..9e7bea9 100644
--- a/src/main/sql/bwhist/tordir.sql
+++ b/src/main/sql/bwhist/tordir.sql
@@ -114,34 +114,24 @@ CREATE OR REPLACE FUNCTION insert_bwhist(
     insert_fingerprint CHARACTER(40), insert_date DATE,
     insert_read BIGINT[], insert_written BIGINT[],
     insert_dirread BIGINT[], insert_dirwritten BIGINT[])
-    RETURNS INTEGER AS $$
-  BEGIN
-  IF (SELECT COUNT(*) FROM bwhist
-      WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
-      THEN
-    INSERT INTO bwhist (fingerprint, date, read, written, dirread,
-        dirwritten)
-    VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
-        insert_dirread, insert_dirwritten);
-  ELSE
-    BEGIN
-    UPDATE bwhist
-    SET read = array_merge(read, insert_read),
-        written = array_merge(written, insert_written),
-        dirread = array_merge(dirread, insert_dirread),
-        dirwritten = array_merge(dirwritten, insert_dirwritten)
-    WHERE fingerprint = insert_fingerprint AND date = insert_date;
-    END;
-  END IF;
-  UPDATE bwhist
-  SET read_sum = array_sum(read),
-      written_sum = array_sum(written),
-      dirread_sum = array_sum(dirread),
-      dirwritten_sum = array_sum(dirwritten)
-  WHERE fingerprint = insert_fingerprint AND date = insert_date;
-  RETURN 1;
-  END;
-$$ LANGUAGE plpgsql;
+    RETURNS VOID AS $$
+  INSERT INTO bwhist (fingerprint, date, read, read_sum, written, written_sum,
+        dirread, dirread_sum, dirwritten, dirwritten_sum)
+    VALUES (insert_fingerprint, insert_date, insert_read,
+        array_sum(insert_read), insert_written, array_sum(insert_written),
+        insert_dirread, array_sum(insert_dirread), insert_dirwritten,
+        array_sum(insert_dirwritten))
+    ON CONFLICT ON CONSTRAINT bwhist_pkey DO UPDATE
+    SET read = array_merge(bwhist.read, insert_read),
+        read_sum = array_sum(array_merge(bwhist.read, insert_read)),
+        written = array_merge(bwhist.written, insert_written),
+        written_sum = array_sum(array_merge(bwhist.written, insert_written)),
+        dirread = array_merge(bwhist.dirread, insert_dirread),
+        dirread_sum = array_sum(array_merge(bwhist.dirread, insert_dirread)),
+        dirwritten = array_merge(bwhist.dirwritten, insert_dirwritten),
+        dirwritten_sum = array_sum(
+            array_merge(bwhist.dirwritten, insert_dirwritten));
+$$ LANGUAGE SQL;
 
 -- refresh_* functions
 -- The following functions keep their corresponding aggregate tables





More information about the tor-commits mailing list