commit 13857257ac1df8c4b4d9bc00e0164b5731289762 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Jan 12 14:54:16 2012 +0100
Tweak refresh functions to use partitioned table. --- db/tordir.sql | 162 ++++++++++++++++++++++++++++++++++++++------------------- 1 files changed, 109 insertions(+), 53 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql index fbb1341..bc93b45 100644 --- a/db/tordir.sql +++ b/db/tordir.sql @@ -438,11 +438,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_network_size() CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM network_size WHERE date IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO network_size (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable) SELECT date, @@ -460,12 +467,12 @@ CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$ COUNT(NULLIF(isstable, FALSE)) AS isstable FROM statusentry WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY DATE(validafter) ) b - NATURAL JOIN relay_statuses_per_day; + NATURAL JOIN relay_statuses_per_day';
RETURN 1; END; @@ -473,11 +480,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_network_size_hour() CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM network_size_hour WHERE DATE(validafter) IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO network_size_hour (validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable) SELECT validafter, COUNT(*) AS avg_running, @@ -487,10 +501,10 @@ CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$ COUNT(NULLIF(isstable, FALSE)) AS avg_stable FROM statusentry WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) - GROUP BY validafter; + GROUP BY validafter';
RETURN 1; END; @@ -498,27 +512,34 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_countries() CREATE OR REPLACE FUNCTION refresh_relay_countries() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM relay_countries WHERE date IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO relay_countries (date, country, relays) SELECT date, country, relays / count AS relays FROM ( SELECT DATE(validafter), - COALESCE(lower((geoip_lookup(address)).country), 'zz') + COALESCE(lower((geoip_lookup(address)).country), ''zz'') AS country, COUNT(*) AS relays FROM statusentry WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY 1, 2 ) b - NATURAL JOIN relay_statuses_per_day; + NATURAL JOIN relay_statuses_per_day';
RETURN 1; END; @@ -526,11 +547,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_platforms() CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM relay_platforms WHERE date IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO relay_platforms (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other) SELECT date, @@ -541,29 +569,29 @@ CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$ other / count AS avg_other FROM ( SELECT DATE(validafter) AS date, - SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) + SUM(CASE WHEN platform LIKE ''%Linux%'' THEN 1 ELSE 0 END) AS linux, - SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) + SUM(CASE WHEN platform LIKE ''%Darwin%'' THEN 1 ELSE 0 END) AS darwin, - SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) + SUM(CASE WHEN platform LIKE ''%BSD%'' THEN 1 ELSE 0 END) AS bsd, - SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) + SUM(CASE WHEN platform LIKE ''%Windows%'' THEN 1 ELSE 0 END) AS windows, - SUM(CASE WHEN platform NOT LIKE '%Windows%' - AND platform NOT LIKE '%Darwin%' - AND platform NOT LIKE '%BSD%' - AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) + SUM(CASE WHEN platform NOT LIKE ''%Windows%'' + AND platform NOT LIKE ''%Darwin%'' + AND platform NOT LIKE ''%BSD%'' + AND platform NOT LIKE ''%Linux%'' THEN 1 ELSE 0 END) AS other FROM descriptor RIGHT JOIN statusentry ON statusentry.descriptor = descriptor.descriptor WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY DATE(validafter) ) b - NATURAL JOIN relay_statuses_per_day; + NATURAL JOIN relay_statuses_per_day';
RETURN 1; END; @@ -571,11 +599,18 @@ $$ LANGUAGE plpgsql;
-- FUNCTION refresh_relay_versions() CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM relay_versions WHERE date IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO relay_versions (date, version, relays) SELECT date, version, relays / count AS relays @@ -586,12 +621,12 @@ CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$ ON descriptor.descriptor = statusentry.descriptor WHERE isrunning = TRUE AND platform IS NOT NULL - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY 1, 2 ) b - NATURAL JOIN relay_statuses_per_day; + NATURAL JOIN relay_statuses_per_day';
RETURN 1; END; @@ -600,11 +635,18 @@ $$ LANGUAGE plpgsql; -- FUNCTION refresh_total_bandwidth() -- This keeps the table total_bandwidth up-to-date when necessary. CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$ + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; BEGIN
+ min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM total_bandwidth WHERE date IN (SELECT date FROM updates);
+ EXECUTE ' INSERT INTO total_bandwidth (bwavg, bwburst, bwobserved, bwadvertised, date) SELECT (SUM(bandwidthavg) @@ -621,16 +663,14 @@ CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$ JOIN relay_statuses_per_day ON DATE(validafter) = relay_statuses_per_day.date WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) - AND DATE(relay_statuses_per_day.date) >= - (SELECT MIN(date) FROM updates) - AND DATE(relay_statuses_per_day.date) <= - (SELECT MAX(date) FROM updates) + AND relay_statuses_per_day.date >= ''' || min_date || ''' + AND relay_statuses_per_day.date < ''' || max_date || ''' AND DATE(relay_statuses_per_day.date) IN (SELECT date FROM updates) - GROUP BY DATE(validafter), relay_statuses_per_day.count; + GROUP BY DATE(validafter), relay_statuses_per_day.count';
RETURN 1; END; @@ -651,8 +691,16 @@ CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$ $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$ - BEGIN + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; + BEGIN + + min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + DELETE FROM bwhist_flags WHERE date IN (SELECT date FROM updates); + EXECUTE ' INSERT INTO bwhist_flags (date, isexit, isguard, read, written) SELECT a.date, isexit, isguard, SUM(read_sum) as read, SUM(written_sum) AS written @@ -663,14 +711,14 @@ CREATE OR REPLACE FUNCTION refresh_bwhist_flags() RETURNS INTEGER AS $$ BOOL_OR(isguard) AS isguard FROM statusentry WHERE isrunning = TRUE - AND DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY 1, 2) a JOIN bwhist ON a.date = bwhist.date AND a.fingerprint = bwhist.fingerprint - GROUP BY 1, 2, 3; + GROUP BY 1, 2, 3'; RETURN 1; END; $$ LANGUAGE plpgsql; @@ -680,11 +728,19 @@ $$ LANGUAGE plpgsql; -- directory request statistics of directory mirrors with bandwidth -- histories. CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ - BEGIN + DECLARE + min_date TIMESTAMP WITHOUT TIME ZONE; + max_date TIMESTAMP WITHOUT TIME ZONE; + BEGIN + + min_date := (SELECT MIN(date) FROM updates); + max_date := (SELECT MAX(date) + 1 FROM updates); + -- Start by deleting user statistics of the dates we're about to -- regenerate. DELETE FROM user_stats WHERE date IN (SELECT date FROM updates); -- Now insert new user statistics. + EXECUTE ' INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd, brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn) SELECT @@ -692,7 +748,7 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ dirreq_stats_by_country.date AS date, dirreq_stats_by_country.country AS country, dirreq_stats_by_country.r AS r, - -- In order to weight the reported directory requests, we're + -- In order to weight the reported directory requests, we are -- counting bytes of relays (except directory authorities) -- matching certain criteria: whether or not they are reporting -- directory requests, whether or not they are reporting @@ -759,14 +815,14 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ ) dirreq_stats_split GROUP BY 1, 2, 3 ) dirreq_stats_by_date - -- We're only interested in requests by directory mirrors, not + -- We are only interested in requests by directory mirrors, not -- directory authorities, so we exclude all relays with the Authority -- flag. RIGHT JOIN ( SELECT fingerprint, DATE(validafter) AS date FROM statusentry - WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + WHERE validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) AND isauthority IS FALSE GROUP BY 1, 2 @@ -782,8 +838,8 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ SELECT fingerprint, date, read_sum AS read, written_sum AS written, dirread_sum AS dirread, dirwritten_sum AS dirwritten FROM bwhist - WHERE date >= (SELECT MIN(date) FROM updates) - AND date <= (SELECT MAX(date) FROM updates) + WHERE date >= ''' || min_date || ''' + AND date < ''' || max_date || ''' AND date IN (SELECT date FROM updates) ) bwhist_by_relay ON dirreq_stats_by_country.date = bwhist_by_relay.date @@ -794,8 +850,8 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport, SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority FROM statusentry - WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates) - AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + WHERE validafter >= ''' || min_date || ''' + AND validafter < ''' || max_date || ''' AND DATE(validafter) IN (SELECT date FROM updates) GROUP BY 1, 2 ) statusentry_by_relay @@ -805,23 +861,23 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ -- For each relay, tell if it has reported directory request -- statistics on a given date. Again, we have to take into account -- that statistics intervals cover more than one calendar date in most - -- cases. The exact number of requests isn't relevant here, but only + -- cases. The exact number of requests is not relevant here, but only -- whether the relay reported directory requests or not. SELECT fingerprint, date, 1 AS requests FROM ( SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date FROM dirreq_stats - WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates) - AND DATE(statsend) <= (SELECT MAX(date) FROM updates) + WHERE DATE(statsend) >= ''' || min_date || ''' + AND DATE(statsend) < ''' || max_date || ''' AND DATE(statsend) IN (SELECT date FROM updates) - AND country = 'zy' + AND country = ''zy'' UNION SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date FROM dirreq_stats - WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates) - AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates) - AND DATE(statsend) - 1 IN (SELECT date FROM updates) - AND country = 'zy' + WHERE DATE(statsend) - 1 >= ''' || min_date || ''' + AND DATE(statsend) - 1 < ''' || max_date || ''' + AND DATE(statsend) IN (SELECT date FROM updates) + AND country = ''zy'' AND EXTRACT(EPOCH FROM DATE(statsend)) - EXTRACT(EPOCH FROM statsend) + seconds > 0 ) dirreq_stats_split @@ -832,7 +888,7 @@ CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$ WHERE dirreq_stats_by_country.country IS NOT NULL -- Group by date, country, and total reported directory requests, -- summing up the bandwidth histories. - GROUP BY 1, 2, 3; + GROUP BY 1, 2, 3'; RETURN 1; END; $$ LANGUAGE plpgsql;