commit 714b2ee0cc9c3d96afbd87b3d12595d549ae58a1 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sun Feb 23 10:53:46 2020 +0100
Rewrite insert_bwhist in SQL.
The old PL/pgSQL version of this function made three lookups in the bwhist table to 1) check whether a row already exists, 2) insert or update the row, and 3) update the row once again with array sums. The new SQL version uses the INSERT ON CONFLICT statement introduced in PostgreSQL 9.5 (Debian stretch has 9.6, buster has 11). The performance gain measured using metrics-test is impressive, computed aggregates are equivalent. --- CHANGELOG.md | 2 ++ src/main/sql/bwhist/tordir.sql | 46 +++++++++++++++++------------------------- 2 files changed, 20 insertions(+), 28 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 43b7e75..b1571c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ extrapolated statistics even if computed network fractions are zero, to avoid re-processing these statistics over and over. - Extract directory authority bytes per day in the bwhist module. + - Rewrite insert_bwhist in SQL to improve performance of the bwhist + module.
* Minor changes - Make Jetty host configurable. diff --git a/src/main/sql/bwhist/tordir.sql b/src/main/sql/bwhist/tordir.sql index fad5d2f..9e7bea9 100644 --- a/src/main/sql/bwhist/tordir.sql +++ b/src/main/sql/bwhist/tordir.sql @@ -114,34 +114,24 @@ CREATE OR REPLACE FUNCTION insert_bwhist( insert_fingerprint CHARACTER(40), insert_date DATE, insert_read BIGINT[], insert_written BIGINT[], insert_dirread BIGINT[], insert_dirwritten BIGINT[]) - RETURNS INTEGER AS $$ - BEGIN - IF (SELECT COUNT(*) FROM bwhist - WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0 - THEN - INSERT INTO bwhist (fingerprint, date, read, written, dirread, - dirwritten) - VALUES (insert_fingerprint, insert_date, insert_read, insert_written, - insert_dirread, insert_dirwritten); - ELSE - BEGIN - UPDATE bwhist - SET read = array_merge(read, insert_read), - written = array_merge(written, insert_written), - dirread = array_merge(dirread, insert_dirread), - dirwritten = array_merge(dirwritten, insert_dirwritten) - WHERE fingerprint = insert_fingerprint AND date = insert_date; - END; - END IF; - UPDATE bwhist - SET read_sum = array_sum(read), - written_sum = array_sum(written), - dirread_sum = array_sum(dirread), - dirwritten_sum = array_sum(dirwritten) - WHERE fingerprint = insert_fingerprint AND date = insert_date; - RETURN 1; - END; -$$ LANGUAGE plpgsql; + RETURNS VOID AS $$ + INSERT INTO bwhist (fingerprint, date, read, read_sum, written, written_sum, + dirread, dirread_sum, dirwritten, dirwritten_sum) + VALUES (insert_fingerprint, insert_date, insert_read, + array_sum(insert_read), insert_written, array_sum(insert_written), + insert_dirread, array_sum(insert_dirread), insert_dirwritten, + array_sum(insert_dirwritten)) + ON CONFLICT ON CONSTRAINT bwhist_pkey DO UPDATE + SET read = array_merge(bwhist.read, insert_read), + read_sum = array_sum(array_merge(bwhist.read, insert_read)), + written = array_merge(bwhist.written, insert_written), + written_sum = array_sum(array_merge(bwhist.written, insert_written)), + dirread = array_merge(bwhist.dirread, insert_dirread), + dirread_sum = array_sum(array_merge(bwhist.dirread, insert_dirread)), + dirwritten = array_merge(bwhist.dirwritten, insert_dirwritten), + dirwritten_sum = array_sum( + array_merge(bwhist.dirwritten, insert_dirwritten)); +$$ LANGUAGE SQL;
-- refresh_* functions -- The following functions keep their corresponding aggregate tables
tor-commits@lists.torproject.org