[tor-commits] [metrics-db/master] Remove database schema which belongs in metrics-web.

karsten at torproject.org karsten at torproject.org
Thu Mar 3 18:46:41 UTC 2011


commit 8e12151db9b79e2d0fdbdd8692187881135b14a7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Mar 3 19:45:50 2011 +0100

    Remove database schema which belongs in metrics-web.
---
 db/tordir.sql |  744 ---------------------------------------------------------
 1 files changed, 0 insertions(+), 744 deletions(-)

diff --git a/db/tordir.sql b/db/tordir.sql
deleted file mode 100644
index 11e49dc..0000000
--- a/db/tordir.sql
+++ /dev/null
@@ -1,744 +0,0 @@
--- Copyright 2010 The Tor Project
--- See LICENSE for licensing information
-
--- TABLE descriptor
--- Contains all of the descriptors published by routers.
-CREATE TABLE descriptor (
-    descriptor CHARACTER(40) NOT NULL,
-    nickname CHARACTER VARYING(19) NOT NULL,
-    address CHARACTER VARYING(15) NOT NULL,
-    orport INTEGER NOT NULL,
-    dirport INTEGER NOT NULL,
-    fingerprint CHARACTER(40) NOT NULL,
-    bandwidthavg BIGINT NOT NULL,
-    bandwidthburst BIGINT NOT NULL,
-    bandwidthobserved BIGINT NOT NULL,
-    platform CHARACTER VARYING(256),
-    published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    uptime BIGINT,
-    extrainfo CHARACTER(40),
-    rawdesc BYTEA NOT NULL,
-    CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
-);
-
--- TABLE extrainfo
--- Contains all of the extra-info descriptors published by the routers.
-CREATE TABLE extrainfo (
-    extrainfo CHARACTER(40) NOT NULL,
-    nickname CHARACTER VARYING(19) NOT NULL,
-    fingerprint CHARACTER(40) NOT NULL,
-    published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    rawdesc BYTEA NOT NULL,
-    CONSTRAINT extrainfo_pkey PRIMARY KEY (extrainfo)
-);
-
--- Contains bandwidth histories reported by relays in extra-info
--- descriptors. Each row contains the reported bandwidth in 15-minute
--- intervals for each relay and date.
-CREATE TABLE bwhist (
-    fingerprint CHARACTER(40) NOT NULL,
-    date DATE NOT NULL,
-    read BIGINT[],
-    read_sum BIGINT,
-    written BIGINT[],
-    written_sum BIGINT,
-    dirread BIGINT[],
-    dirread_sum BIGINT,
-    dirwritten BIGINT[],
-    dirwritten_sum BIGINT,
-    CONSTRAINT bwhist_pkey PRIMARY KEY (fingerprint, date)
-);
-
-CREATE INDEX bwhist_date ON bwhist (date);
-
--- TABLE statusentry
--- Contains all of the consensus entries published by the directories.
--- Each statusentry references a valid descriptor.
-CREATE TABLE statusentry (
-    validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    nickname CHARACTER VARYING(19) NOT NULL,
-    fingerprint CHARACTER(40) NOT NULL,
-    descriptor CHARACTER(40) NOT NULL,
-    published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    address CHARACTER VARYING(15) NOT NULL,
-    orport INTEGER NOT NULL,
-    dirport INTEGER NOT NULL,
-    isauthority BOOLEAN DEFAULT FALSE NOT NULL,
-    isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
-    isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
-    isexit BOOLEAN DEFAULT FALSE NOT NULL,
-    isfast BOOLEAN DEFAULT FALSE NOT NULL,
-    isguard BOOLEAN DEFAULT FALSE NOT NULL,
-    ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
-    isnamed BOOLEAN DEFAULT FALSE NOT NULL,
-    isstable BOOLEAN DEFAULT FALSE NOT NULL,
-    isrunning BOOLEAN DEFAULT FALSE NOT NULL,
-    isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
-    isvalid BOOLEAN DEFAULT FALSE NOT NULL,
-    isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
-    isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
-    version CHARACTER VARYING(50),
-    bandwidth BIGINT,
-    ports TEXT,
-    rawdesc BYTEA NOT NULL,
-    CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, fingerprint)
-);
-
--- TABLE consensus
--- Contains all of the consensuses published by the directories.
-CREATE TABLE consensus (
-    validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    rawdesc BYTEA NOT NULL,
-    CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
-);
-
--- TABLE vote
--- Contains all of the votes published by the directories
-CREATE TABLE vote (
-    validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    dirsource CHARACTER(40) NOT NULL,
-    rawdesc BYTEA NOT NULL,
-    CONSTRAINT vote_pkey PRIMARY KEY (validafter, dirsource)
-);
-
--- TABLE connbidirect
--- Contain conn-bi-direct stats strings
-CREATE TABLE connbidirect (
-    source CHARACTER(40) NOT NULL,
-    statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    seconds INTEGER NOT NULL,
-    belownum BIGINT NOT NULL,
-    readnum BIGINT NOT NULL,
-    writenum BIGINT NOT NULL,
-    bothnum BIGINT NOT NULL,
-    CONSTRAINT connbidirect_pkey PRIMARY KEY (source, statsend)
-);
-
--- Create the various indexes we need for searching relays
-CREATE INDEX statusentry_address ON statusentry (address);
-CREATE INDEX statusentry_fingerprint ON statusentry (fingerprint);
-CREATE INDEX statusentry_nickname ON statusentry (LOWER(nickname));
-CREATE INDEX statusentry_validafter ON statusentry (validafter);
-
--- And create an index that we use for precalculating statistics
-CREATE INDEX statusentry_descriptor ON statusentry (descriptor);
-CREATE INDEX statusentry_validafter_date ON statusentry (DATE(validafter));
-
--- TABLE network_size
-CREATE TABLE network_size (
-    date DATE NOT NULL,
-    avg_running INTEGER NOT NULL,
-    avg_exit INTEGER NOT NULL,
-    avg_guard INTEGER NOT NULL,
-    avg_fast INTEGER NOT NULL,
-    avg_stable INTEGER NOT NULL,
-    CONSTRAINT network_size_pkey PRIMARY KEY(date)
-);
-
--- TABLE network_size_hour
-CREATE TABLE network_size_hour (
-    validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    avg_running INTEGER NOT NULL,
-    avg_exit INTEGER NOT NULL,
-    avg_guard INTEGER NOT NULL,
-    avg_fast INTEGER NOT NULL,
-    avg_stable INTEGER NOT NULL,
-    CONSTRAINT network_size_hour_pkey PRIMARY KEY(validafter)
-);
-
--- TABLE relay_platforms
-CREATE TABLE relay_platforms (
-    date DATE NOT NULL,
-    avg_linux INTEGER NOT NULL,
-    avg_darwin INTEGER NOT NULL,
-    avg_bsd INTEGER NOT NULL,
-    avg_windows INTEGER NOT NULL,
-    avg_other INTEGER NOT NULL,
-    CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
-);
-
--- TABLE relay_versions
-CREATE TABLE relay_versions (
-    date DATE NOT NULL,
-    version CHARACTER(5) NOT NULL,
-    relays INTEGER NOT NULL,
-    CONSTRAINT relay_versions_pkey PRIMARY KEY(date, version)
-);
-
--- TABLE total_bandwidth
--- Contains information for the whole network's total bandwidth which is
--- used in the bandwidth graphs.
-CREATE TABLE total_bandwidth (
-    date DATE NOT NULL,
-    bwavg BIGINT NOT NULL,
-    bwburst BIGINT NOT NULL,
-    bwobserved BIGINT NOT NULL,
-    bwadvertised BIGINT NOT NULL,
-    CONSTRAINT total_bandwidth_pkey PRIMARY KEY(date)
-);
-
--- TABLE total_bwhist
--- Contains the total number of read/written and the number of dir bytes
--- read/written by all relays in the network on a given day. The dir bytes
--- are an estimate based on the subset of relays that count dir bytes.
-CREATE TABLE total_bwhist (
-    date DATE NOT NULL,
-    read BIGINT,
-    written BIGINT,
-    CONSTRAINT total_bwhist_pkey PRIMARY KEY(date)
-);
-
--- TABLE user_stats
--- Aggregate statistics on directory requests and byte histories that we
--- use to estimate user numbers.
-CREATE TABLE user_stats (
-    date DATE NOT NULL,
-    country CHARACTER(2) NOT NULL,
-    r BIGINT,
-    dw BIGINT,
-    dr BIGINT,
-    drw BIGINT,
-    drr BIGINT,
-    bw BIGINT,
-    br BIGINT,
-    bwd BIGINT,
-    brd BIGINT,
-    bwr BIGINT,
-    brr BIGINT,
-    bwdr BIGINT,
-    brdr BIGINT,
-    bwp BIGINT,
-    brp BIGINT,
-    bwn BIGINT,
-    brn BIGINT,
-    CONSTRAINT user_stats_pkey PRIMARY KEY(date, country)
-);
-
--- TABLE relay_statuses_per_day
--- A helper table which is commonly used to update the tables above in the
--- refresh_* functions.
-CREATE TABLE relay_statuses_per_day (
-    date DATE NOT NULL,
-    count INTEGER NOT NULL,
-    CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
-);
-
--- Dates to be included in the next refresh run.
-CREATE TABLE scheduled_updates (
-    id SERIAL,
-    date DATE NOT NULL
-);
-
--- Dates in the current refresh run.  When starting a refresh run, we copy
--- the rows from scheduled_updates here in order to delete just those
--- lines after the refresh run.  Otherwise we might forget scheduled dates
--- that have been added during a refresh run.  If this happens we're going
--- to update these dates in the next refresh run.
-CREATE TABLE updates (
-    id INTEGER,
-    date DATE
-);
-
-CREATE LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_statuses_per_day()
--- Updates helper table which is used to refresh the aggregate tables.
-CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
-RETURNS INTEGER AS $$
-    BEGIN
-    DELETE FROM relay_statuses_per_day
-    WHERE date IN (SELECT date FROM updates);
-    INSERT INTO relay_statuses_per_day (date, count)
-    SELECT DATE(validafter) AS date, COUNT(*) AS count
-    FROM consensus
-    WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
-    AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-    AND DATE(validafter) IN (SELECT date FROM updates)
-    GROUP BY DATE(validafter);
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION array_sum (BIGINT[]) RETURNS BIGINT AS $$
-  SELECT SUM($1[i])::bigint
-  FROM generate_series(array_lower($1, 1), array_upper($1, 1)) index(i);
-$$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION insert_bwhist(
-    insert_fingerprint CHARACTER(40), insert_date DATE,
-    insert_read BIGINT[], insert_written BIGINT[],
-    insert_dirread BIGINT[], insert_dirwritten BIGINT[])
-    RETURNS INTEGER AS $$
-  BEGIN
-  IF (SELECT COUNT(*) FROM bwhist
-      WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
-      THEN
-    INSERT INTO bwhist (fingerprint, date, read, written, dirread,
-        dirwritten)
-    VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
-        insert_dirread, insert_dirwritten);
-  ELSE
-    BEGIN
-    UPDATE bwhist
-    SET read[array_lower(insert_read, 1):
-          array_upper(insert_read, 1)] = insert_read,
-        written[array_lower(insert_written, 1):
-          array_upper(insert_written, 1)] = insert_written,
-        dirread[array_lower(insert_dirread, 1):
-          array_upper(insert_dirread, 1)] = insert_dirread,
-        dirwritten[array_lower(insert_dirwritten, 1):
-          array_upper(insert_dirwritten, 1)] = insert_dirwritten
-    WHERE fingerprint = insert_fingerprint AND date = insert_date;
-    -- Updating twice is an ugly workaround for PostgreSQL bug 5840
-    UPDATE bwhist
-    SET read[array_lower(insert_read, 1):
-          array_upper(insert_read, 1)] = insert_read,
-        written[array_lower(insert_written, 1):
-          array_upper(insert_written, 1)] = insert_written,
-        dirread[array_lower(insert_dirread, 1):
-          array_upper(insert_dirread, 1)] = insert_dirread,
-        dirwritten[array_lower(insert_dirwritten, 1):
-          array_upper(insert_dirwritten, 1)] = insert_dirwritten
-    WHERE fingerprint = insert_fingerprint AND date = insert_date;
-    END;
-  END IF;
-  UPDATE bwhist
-  SET read_sum = array_sum(read),
-      written_sum = array_sum(written),
-      dirread_sum = array_sum(dirread),
-      dirwritten_sum = array_sum(dirwritten)
-  WHERE fingerprint = insert_fingerprint AND date = insert_date;
-  RETURN 1;
-  END;
-$$ LANGUAGE plpgsql;
-
--- refresh_* functions
--- The following functions keep their corresponding aggregate tables
--- up-to-date. They should be called every time ERNIE is run, or when new
--- data is finished being added to the descriptor or statusentry tables.
--- They find what new data has been entered or updated based on the
--- updates table.
-
--- FUNCTION refresh_network_size()
-CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
-    BEGIN
-
-    DELETE FROM network_size
-    WHERE date IN (SELECT date FROM updates);
-
-        INSERT INTO network_size
-        (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
-        SELECT
-              DATE(validafter) AS date,
-              COUNT(*) / relay_statuses_per_day.count AS avg_running,
-              SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
-                  / relay_statuses_per_day.count AS avg_exit,
-              SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
-                  / relay_statuses_per_day.count AS avg_guard,
-              SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
-                  / relay_statuses_per_day.count AS avg_fast,
-              SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
-                  / relay_statuses_per_day.count AS avg_stable
-          FROM statusentry
-          JOIN relay_statuses_per_day
-          ON DATE(validafter) = relay_statuses_per_day.date
-          WHERE isrunning = TRUE
-              AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
-              AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-              AND DATE(validafter) IN (SELECT date FROM updates)
-          GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_network_size_hour()
-CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
-    BEGIN
-
-    DELETE FROM network_size_hour
-    WHERE DATE(validafter) IN (SELECT date FROM updates);
-
-    INSERT INTO network_size_hour
-    (validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
-    SELECT validafter, COUNT(*) AS avg_running,
-    SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END) AS avg_exit,
-    SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END) AS avg_guard,
-    SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END) AS avg_fast,
-    SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END) AS avg_stable
-    FROM statusentry
-    WHERE isrunning = TRUE
-    AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
-    AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-    AND DATE(validafter) IN (SELECT date FROM updates)
-    GROUP BY validafter;
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_platforms()
-CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
-    BEGIN
-
-    DELETE FROM relay_platforms
-    WHERE date IN (SELECT date FROM updates);
-
-    INSERT INTO relay_platforms
-    (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
-    SELECT DATE(validafter),
-        SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
-            relay_statuses_per_day.count AS avg_linux,
-        SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
-            relay_statuses_per_day.count AS avg_darwin,
-        SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
-            relay_statuses_per_day.count AS avg_bsd,
-        SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
-            relay_statuses_per_day.count AS avg_windows,
-        SUM(CASE WHEN platform NOT LIKE '%Windows%'
-            AND platform NOT LIKE '%Darwin%'
-            AND platform NOT LIKE '%BSD%'
-            AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
-            relay_statuses_per_day.count AS avg_other
-    FROM descriptor RIGHT JOIN statusentry
-    ON statusentry.descriptor = descriptor.descriptor
-    JOIN relay_statuses_per_day
-    ON DATE(validafter) = relay_statuses_per_day.date
-    WHERE isrunning = TRUE
-          AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
-          AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-          AND DATE(validafter) IN (SELECT date FROM updates)
-          AND DATE(relay_statuses_per_day.date) >=
-              (SELECT MIN(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) <=
-              (SELECT MAX(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) IN
-              (SELECT date FROM updates)
-    GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_versions()
-CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
-    BEGIN
-
-    DELETE FROM relay_versions
-    WHERE date IN (SELECT date FROM updates);
-
-    INSERT INTO relay_versions
-    (date, version, relays)
-    SELECT DATE(validafter), SUBSTRING(platform, 5, 5) AS version,
-           COUNT(*) / relay_statuses_per_day.count AS relays
-    FROM descriptor RIGHT JOIN statusentry
-    ON descriptor.descriptor = statusentry.descriptor
-    JOIN relay_statuses_per_day
-    ON DATE(validafter) = relay_statuses_per_day.date
-    WHERE isrunning = TRUE
-          AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
-          AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-          AND DATE(validafter) IN (SELECT date FROM updates)
-          AND DATE(relay_statuses_per_day.date) >=
-              (SELECT MIN(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) <=
-              (SELECT MAX(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) IN
-              (SELECT date FROM updates)
-          AND platform IS NOT NULL
-    GROUP BY 1, 2, relay_statuses_per_day.count;
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_total_bandwidth()
--- This keeps the table total_bandwidth up-to-date when necessary.
-CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
-    BEGIN
-
-    DELETE FROM total_bandwidth
-    WHERE date IN (SELECT date FROM updates);
-
-    INSERT INTO total_bandwidth
-    (bwavg, bwburst, bwobserved, bwadvertised, date)
-    SELECT (SUM(bandwidthavg)
-            / relay_statuses_per_day.count)::BIGINT AS bwavg,
-        (SUM(bandwidthburst)
-            / relay_statuses_per_day.count)::BIGINT AS bwburst,
-        (SUM(bandwidthobserved)
-            / relay_statuses_per_day.count)::BIGINT AS bwobserved,
-        (SUM(LEAST(bandwidthavg, bandwidthobserved))
-            / relay_statuses_per_day.count)::BIGINT AS bwadvertised,
-        DATE(validafter)
-    FROM descriptor RIGHT JOIN statusentry
-    ON descriptor.descriptor = statusentry.descriptor
-    JOIN relay_statuses_per_day
-    ON DATE(validafter) = relay_statuses_per_day.date
-    WHERE isrunning = TRUE
-          AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
-          AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-          AND DATE(validafter) IN (SELECT date FROM updates)
-          AND DATE(relay_statuses_per_day.date) >=
-              (SELECT MIN(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) <=
-              (SELECT MAX(date) FROM updates)
-          AND DATE(relay_statuses_per_day.date) IN
-              (SELECT date FROM updates)
-    GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
-    RETURN 1;
-    END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
-  BEGIN
-  DELETE FROM total_bwhist WHERE date IN (SELECT date FROM updates);
-  INSERT INTO total_bwhist (date, read, written)
-  SELECT date, SUM(read_sum) AS read, SUM(written_sum) AS written
-  FROM bwhist
-  WHERE date >= (SELECT MIN(date) FROM updates)
-  AND date <= (SELECT MAX(date) FROM updates)
-  AND date IN (SELECT date FROM updates)
-  GROUP BY date;
-  RETURN 1;
-  END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_user_stats()
--- This function refreshes our user statistics by weighting reported
--- directory request statistics of directory mirrors with bandwidth
--- histories.
-CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
-  BEGIN
-  -- Start by deleting user statistics of the dates we're about to
-  -- regenerate.
-  DELETE FROM user_stats WHERE date IN (SELECT date FROM updates);
-  -- Now insert new user statistics.
-  INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd,
-      brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn)
-  SELECT
-         -- We want to learn about total requests by date and country.
-         dirreq_stats_by_country.date AS date,
-         dirreq_stats_by_country.country AS country,
-         dirreq_stats_by_country.r AS r,
-         -- In order to weight the reported directory requests, we're
-         -- counting bytes of relays (except directory authorities)
-         -- matching certain criteria: whether or not they are reporting
-         -- directory requests, whether or not they are reporting
-         -- directory bytes, and whether their directory port is open or
-         -- closed.
-         SUM(CASE WHEN authority IS NOT NULL
-           THEN NULL ELSE dirwritten END) AS dw,
-         SUM(CASE WHEN authority IS NOT NULL
-           THEN NULL ELSE dirread END) AS dr,
-         SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE dirwritten END) AS dwr,
-         SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE dirread END) AS drr,
-         SUM(CASE WHEN authority IS NOT NULL
-           THEN NULL ELSE written END) AS bw,
-         SUM(CASE WHEN authority IS NOT NULL
-           THEN NULL ELSE read END) AS br,
-         SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
-           THEN NULL ELSE written END) AS bwd,
-         SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
-           THEN NULL ELSE read END) AS brd,
-         SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE written END) AS bwr,
-         SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE read END) AS brr,
-         SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
-           OR authority IS NOT NULL THEN NULL ELSE written END) AS bwdr,
-         SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
-           OR authority IS NOT NULL THEN NULL ELSE read END) AS brdr,
-         SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE written END) AS bwp,
-         SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
-           THEN NULL ELSE read END) AS brp,
-         SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
-           THEN NULL ELSE written END) AS bwn,
-         SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
-           THEN NULL ELSE read END) AS brn
-  FROM (
-    -- The first sub-select tells us the total number of directory
-    -- requests per country reported by all directory mirrors.
-    SELECT dirreq_stats_by_date.date AS date, country, SUM(requests) AS r
-    FROM (
-      SELECT fingerprint, date, country, SUM(requests) AS requests
-      FROM (
-        -- There are two selects here, because in most cases the directory
-        -- request statistics cover two calendar dates.
-        SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date,
-          country, FLOOR(requests * (CASE
-          WHEN EXTRACT(EPOCH FROM DATE(statsend)) >
-          EXTRACT(EPOCH FROM statsend) - seconds
-          THEN EXTRACT(EPOCH FROM statsend) -
-          EXTRACT(EPOCH FROM DATE(statsend))
-          ELSE seconds END) / seconds) AS requests
-        FROM dirreq_stats
-        UNION
-        SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date,
-          country, FLOOR(requests *
-          (EXTRACT(EPOCH FROM DATE(statsend)) -
-          EXTRACT(EPOCH FROM statsend) + seconds)
-          / seconds) AS requests
-        FROM dirreq_stats
-        WHERE EXTRACT(EPOCH FROM DATE(statsend)) -
-        EXTRACT(EPOCH FROM statsend) + seconds > 0
-      ) dirreq_stats_split
-      GROUP BY 1, 2, 3
-    ) dirreq_stats_by_date
-    -- We're only interested in requests by directory mirrors, not
-    -- directory authorities, so we exclude all relays with the Authority
-    -- flag.
-    RIGHT JOIN (
-      SELECT fingerprint, DATE(validafter) AS date
-      FROM statusentry
-      WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
-      AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-      AND DATE(validafter) IN (SELECT date FROM updates)
-      AND isauthority IS FALSE
-      GROUP BY 1, 2
-    ) statusentry_dirmirrors
-    ON dirreq_stats_by_date.fingerprint =
-       statusentry_dirmirrors.fingerprint
-    AND dirreq_stats_by_date.date = statusentry_dirmirrors.date
-    GROUP BY 1, 2
-  ) dirreq_stats_by_country
-  LEFT JOIN (
-    -- In the next step, we expand the result by bandwidth histories of
-    -- all relays.
-    SELECT fingerprint, date, read_sum AS read, written_sum AS written,
-           dirread_sum AS dirread, dirwritten_sum AS dirwritten
-    FROM bwhist
-    WHERE date >= (SELECT MIN(date) FROM updates)
-    AND date <= (SELECT MAX(date) FROM updates)
-    AND date IN (SELECT date FROM updates)
-  ) bwhist_by_relay
-  ON dirreq_stats_by_country.date = bwhist_by_relay.date
-  LEFT JOIN (
-    -- For each relay, tell how often it had an open directory port and
-    -- how often it had the Authority flag assigned on a given date.
-    SELECT fingerprint, DATE(validafter) AS date,
-      SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport,
-      SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority
-    FROM statusentry
-    WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
-    AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
-    AND DATE(validafter) IN (SELECT date FROM updates)
-    GROUP BY 1, 2
-  ) statusentry_by_relay
-  ON bwhist_by_relay.fingerprint = statusentry_by_relay.fingerprint
-  AND bwhist_by_relay.date = statusentry_by_relay.date
-  LEFT JOIN (
-    -- For each relay, tell if it has reported directory request
-    -- statistics on a given date. Again, we have to take into account
-    -- that statistics intervals cover more than one calendar date in most
-    -- cases. The exact number of requests isn't relevant here, but only
-    -- whether the relay reported directory requests or not.
-    SELECT fingerprint, date, 1 AS requests
-    FROM (
-      SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date
-      FROM dirreq_stats
-      WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates)
-      AND DATE(statsend) <= (SELECT MAX(date) FROM updates)
-      AND DATE(statsend) IN (SELECT date FROM updates)
-      AND country = 'zy'
-      UNION
-      SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date
-      FROM dirreq_stats
-      WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates)
-      AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates)
-      AND DATE(statsend) - 1 IN (SELECT date FROM updates)
-      AND country = 'zy'
-      AND EXTRACT(EPOCH FROM DATE(statsend)) -
-      EXTRACT(EPOCH FROM statsend) + seconds > 0
-    ) dirreq_stats_split
-    GROUP BY 1, 2
-  ) dirreq_stats_by_relay
-  ON bwhist_by_relay.fingerprint = dirreq_stats_by_relay.fingerprint
-  AND bwhist_by_relay.date = dirreq_stats_by_relay.date
-  WHERE dirreq_stats_by_country.country IS NOT NULL
-  -- Group by date, country, and total reported directory requests,
-  -- summing up the bandwidth histories.
-  GROUP BY 1, 2, 3;
-  RETURN 1;
-  END;
-$$ LANGUAGE plpgsql;
-
--- non-relay statistics
--- The following tables contain pre-aggregated statistics that are not
--- based on relay descriptors or that are not yet derived from the relay
--- descriptors in the database.
-
--- TABLE bridge_network_size
--- Contains average number of running bridges.
-CREATE TABLE bridge_network_size (
-    "date" DATE NOT NULL,
-    avg_running INTEGER NOT NULL,
-    CONSTRAINT bridge_network_size_pkey PRIMARY KEY(date)
-);
-
--- TABLE dirreq_stats
--- Contains daily users by country.
-CREATE TABLE dirreq_stats (
-    source CHARACTER(40) NOT NULL,
-    statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
-    seconds INTEGER NOT NULL,
-    country CHARACTER(2) NOT NULL,
-    requests INTEGER NOT NULL,
-    CONSTRAINT dirreq_stats_pkey
-    PRIMARY KEY (source, statsend, seconds, country)
-);
-
--- TABLE bridge_stats
--- Contains daily bridge users by country.
-CREATE TABLE bridge_stats (
-    "date" DATE NOT NULL,
-    country CHARACTER(2) NOT NULL,
-    users INTEGER NOT NULL,
-    CONSTRAINT bridge_stats_pkey PRIMARY KEY ("date", country)
-);
-
--- TABLE torperf_stats
--- Quantiles and medians of daily torperf results.
-CREATE TABLE torperf_stats (
-    "date" DATE NOT NULL,
-    source CHARACTER VARYING(32) NOT NULL,
-    q1 INTEGER NOT NULL,
-    md INTEGER NOT NULL,
-    q3 INTEGER NOT NULL,
-    timeouts INTEGER NOT NULL,
-    failures INTEGER NOT NULL,
-    requests INTEGER NOT NULL,
-    CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
-);
-
--- TABLE gettor_stats
--- Packages requested from GetTor
-CREATE TABLE gettor_stats (
-    "date" DATE NOT NULL,
-    bundle CHARACTER VARYING(32) NOT NULL,
-    downloads INTEGER NOT NULL,
-    CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
-);
-
--- Refresh all statistics in the database.
-CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
-  BEGIN
-    DELETE FROM updates;
-    INSERT INTO updates SELECT * FROM scheduled_updates;
-    PERFORM refresh_relay_statuses_per_day();
-    PERFORM refresh_network_size();
-    PERFORM refresh_network_size_hour();
-    PERFORM refresh_relay_platforms();
-    PERFORM refresh_relay_versions();
-    PERFORM refresh_total_bandwidth();
-    PERFORM refresh_total_bwhist();
-    PERFORM refresh_user_stats();
-    DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
-  RETURN 1;
-  END;
-$$ LANGUAGE plpgsql;
-



More information about the tor-commits mailing list