tor-commits
Threads by month
- ----- 2025 -----
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
March 2011
- 18 participants
- 683 discussions

[metrics-db/master] Remove database schema which belongs in metrics-web.
by karsten@torproject.org 03 Mar '11
by karsten@torproject.org 03 Mar '11
03 Mar '11
commit 8e12151db9b79e2d0fdbdd8692187881135b14a7
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 19:45:50 2011 +0100
Remove database schema which belongs in metrics-web.
---
db/tordir.sql | 744 ---------------------------------------------------------
1 files changed, 0 insertions(+), 744 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql
deleted file mode 100644
index 11e49dc..0000000
--- a/db/tordir.sql
+++ /dev/null
@@ -1,744 +0,0 @@
--- Copyright 2010 The Tor Project
--- See LICENSE for licensing information
-
--- TABLE descriptor
--- Contains all of the descriptors published by routers.
-CREATE TABLE descriptor (
- descriptor CHARACTER(40) NOT NULL,
- nickname CHARACTER VARYING(19) NOT NULL,
- address CHARACTER VARYING(15) NOT NULL,
- orport INTEGER NOT NULL,
- dirport INTEGER NOT NULL,
- fingerprint CHARACTER(40) NOT NULL,
- bandwidthavg BIGINT NOT NULL,
- bandwidthburst BIGINT NOT NULL,
- bandwidthobserved BIGINT NOT NULL,
- platform CHARACTER VARYING(256),
- published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- uptime BIGINT,
- extrainfo CHARACTER(40),
- rawdesc BYTEA NOT NULL,
- CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
-);
-
--- TABLE extrainfo
--- Contains all of the extra-info descriptors published by the routers.
-CREATE TABLE extrainfo (
- extrainfo CHARACTER(40) NOT NULL,
- nickname CHARACTER VARYING(19) NOT NULL,
- fingerprint CHARACTER(40) NOT NULL,
- published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- rawdesc BYTEA NOT NULL,
- CONSTRAINT extrainfo_pkey PRIMARY KEY (extrainfo)
-);
-
--- Contains bandwidth histories reported by relays in extra-info
--- descriptors. Each row contains the reported bandwidth in 15-minute
--- intervals for each relay and date.
-CREATE TABLE bwhist (
- fingerprint CHARACTER(40) NOT NULL,
- date DATE NOT NULL,
- read BIGINT[],
- read_sum BIGINT,
- written BIGINT[],
- written_sum BIGINT,
- dirread BIGINT[],
- dirread_sum BIGINT,
- dirwritten BIGINT[],
- dirwritten_sum BIGINT,
- CONSTRAINT bwhist_pkey PRIMARY KEY (fingerprint, date)
-);
-
-CREATE INDEX bwhist_date ON bwhist (date);
-
--- TABLE statusentry
--- Contains all of the consensus entries published by the directories.
--- Each statusentry references a valid descriptor.
-CREATE TABLE statusentry (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- nickname CHARACTER VARYING(19) NOT NULL,
- fingerprint CHARACTER(40) NOT NULL,
- descriptor CHARACTER(40) NOT NULL,
- published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- address CHARACTER VARYING(15) NOT NULL,
- orport INTEGER NOT NULL,
- dirport INTEGER NOT NULL,
- isauthority BOOLEAN DEFAULT FALSE NOT NULL,
- isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
- isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
- isexit BOOLEAN DEFAULT FALSE NOT NULL,
- isfast BOOLEAN DEFAULT FALSE NOT NULL,
- isguard BOOLEAN DEFAULT FALSE NOT NULL,
- ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
- isnamed BOOLEAN DEFAULT FALSE NOT NULL,
- isstable BOOLEAN DEFAULT FALSE NOT NULL,
- isrunning BOOLEAN DEFAULT FALSE NOT NULL,
- isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
- isvalid BOOLEAN DEFAULT FALSE NOT NULL,
- isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
- isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
- version CHARACTER VARYING(50),
- bandwidth BIGINT,
- ports TEXT,
- rawdesc BYTEA NOT NULL,
- CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, fingerprint)
-);
-
--- TABLE consensus
--- Contains all of the consensuses published by the directories.
-CREATE TABLE consensus (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- rawdesc BYTEA NOT NULL,
- CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
-);
-
--- TABLE vote
--- Contains all of the votes published by the directories
-CREATE TABLE vote (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- dirsource CHARACTER(40) NOT NULL,
- rawdesc BYTEA NOT NULL,
- CONSTRAINT vote_pkey PRIMARY KEY (validafter, dirsource)
-);
-
--- TABLE connbidirect
--- Contain conn-bi-direct stats strings
-CREATE TABLE connbidirect (
- source CHARACTER(40) NOT NULL,
- statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- seconds INTEGER NOT NULL,
- belownum BIGINT NOT NULL,
- readnum BIGINT NOT NULL,
- writenum BIGINT NOT NULL,
- bothnum BIGINT NOT NULL,
- CONSTRAINT connbidirect_pkey PRIMARY KEY (source, statsend)
-);
-
--- Create the various indexes we need for searching relays
-CREATE INDEX statusentry_address ON statusentry (address);
-CREATE INDEX statusentry_fingerprint ON statusentry (fingerprint);
-CREATE INDEX statusentry_nickname ON statusentry (LOWER(nickname));
-CREATE INDEX statusentry_validafter ON statusentry (validafter);
-
--- And create an index that we use for precalculating statistics
-CREATE INDEX statusentry_descriptor ON statusentry (descriptor);
-CREATE INDEX statusentry_validafter_date ON statusentry (DATE(validafter));
-
--- TABLE network_size
-CREATE TABLE network_size (
- date DATE NOT NULL,
- avg_running INTEGER NOT NULL,
- avg_exit INTEGER NOT NULL,
- avg_guard INTEGER NOT NULL,
- avg_fast INTEGER NOT NULL,
- avg_stable INTEGER NOT NULL,
- CONSTRAINT network_size_pkey PRIMARY KEY(date)
-);
-
--- TABLE network_size_hour
-CREATE TABLE network_size_hour (
- validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- avg_running INTEGER NOT NULL,
- avg_exit INTEGER NOT NULL,
- avg_guard INTEGER NOT NULL,
- avg_fast INTEGER NOT NULL,
- avg_stable INTEGER NOT NULL,
- CONSTRAINT network_size_hour_pkey PRIMARY KEY(validafter)
-);
-
--- TABLE relay_platforms
-CREATE TABLE relay_platforms (
- date DATE NOT NULL,
- avg_linux INTEGER NOT NULL,
- avg_darwin INTEGER NOT NULL,
- avg_bsd INTEGER NOT NULL,
- avg_windows INTEGER NOT NULL,
- avg_other INTEGER NOT NULL,
- CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
-);
-
--- TABLE relay_versions
-CREATE TABLE relay_versions (
- date DATE NOT NULL,
- version CHARACTER(5) NOT NULL,
- relays INTEGER NOT NULL,
- CONSTRAINT relay_versions_pkey PRIMARY KEY(date, version)
-);
-
--- TABLE total_bandwidth
--- Contains information for the whole network's total bandwidth which is
--- used in the bandwidth graphs.
-CREATE TABLE total_bandwidth (
- date DATE NOT NULL,
- bwavg BIGINT NOT NULL,
- bwburst BIGINT NOT NULL,
- bwobserved BIGINT NOT NULL,
- bwadvertised BIGINT NOT NULL,
- CONSTRAINT total_bandwidth_pkey PRIMARY KEY(date)
-);
-
--- TABLE total_bwhist
--- Contains the total number of read/written and the number of dir bytes
--- read/written by all relays in the network on a given day. The dir bytes
--- are an estimate based on the subset of relays that count dir bytes.
-CREATE TABLE total_bwhist (
- date DATE NOT NULL,
- read BIGINT,
- written BIGINT,
- CONSTRAINT total_bwhist_pkey PRIMARY KEY(date)
-);
-
--- TABLE user_stats
--- Aggregate statistics on directory requests and byte histories that we
--- use to estimate user numbers.
-CREATE TABLE user_stats (
- date DATE NOT NULL,
- country CHARACTER(2) NOT NULL,
- r BIGINT,
- dw BIGINT,
- dr BIGINT,
- drw BIGINT,
- drr BIGINT,
- bw BIGINT,
- br BIGINT,
- bwd BIGINT,
- brd BIGINT,
- bwr BIGINT,
- brr BIGINT,
- bwdr BIGINT,
- brdr BIGINT,
- bwp BIGINT,
- brp BIGINT,
- bwn BIGINT,
- brn BIGINT,
- CONSTRAINT user_stats_pkey PRIMARY KEY(date, country)
-);
-
--- TABLE relay_statuses_per_day
--- A helper table which is commonly used to update the tables above in the
--- refresh_* functions.
-CREATE TABLE relay_statuses_per_day (
- date DATE NOT NULL,
- count INTEGER NOT NULL,
- CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
-);
-
--- Dates to be included in the next refresh run.
-CREATE TABLE scheduled_updates (
- id SERIAL,
- date DATE NOT NULL
-);
-
--- Dates in the current refresh run. When starting a refresh run, we copy
--- the rows from scheduled_updates here in order to delete just those
--- lines after the refresh run. Otherwise we might forget scheduled dates
--- that have been added during a refresh run. If this happens we're going
--- to update these dates in the next refresh run.
-CREATE TABLE updates (
- id INTEGER,
- date DATE
-);
-
-CREATE LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_statuses_per_day()
--- Updates helper table which is used to refresh the aggregate tables.
-CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
-RETURNS INTEGER AS $$
- BEGIN
- DELETE FROM relay_statuses_per_day
- WHERE date IN (SELECT date FROM updates);
- INSERT INTO relay_statuses_per_day (date, count)
- SELECT DATE(validafter) AS date, COUNT(*) AS count
- FROM consensus
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY DATE(validafter);
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION array_sum (BIGINT[]) RETURNS BIGINT AS $$
- SELECT SUM($1[i])::bigint
- FROM generate_series(array_lower($1, 1), array_upper($1, 1)) index(i);
-$$ LANGUAGE SQL;
-
-CREATE OR REPLACE FUNCTION insert_bwhist(
- insert_fingerprint CHARACTER(40), insert_date DATE,
- insert_read BIGINT[], insert_written BIGINT[],
- insert_dirread BIGINT[], insert_dirwritten BIGINT[])
- RETURNS INTEGER AS $$
- BEGIN
- IF (SELECT COUNT(*) FROM bwhist
- WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
- THEN
- INSERT INTO bwhist (fingerprint, date, read, written, dirread,
- dirwritten)
- VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
- insert_dirread, insert_dirwritten);
- ELSE
- BEGIN
- UPDATE bwhist
- SET read[array_lower(insert_read, 1):
- array_upper(insert_read, 1)] = insert_read,
- written[array_lower(insert_written, 1):
- array_upper(insert_written, 1)] = insert_written,
- dirread[array_lower(insert_dirread, 1):
- array_upper(insert_dirread, 1)] = insert_dirread,
- dirwritten[array_lower(insert_dirwritten, 1):
- array_upper(insert_dirwritten, 1)] = insert_dirwritten
- WHERE fingerprint = insert_fingerprint AND date = insert_date;
- -- Updating twice is an ugly workaround for PostgreSQL bug 5840
- UPDATE bwhist
- SET read[array_lower(insert_read, 1):
- array_upper(insert_read, 1)] = insert_read,
- written[array_lower(insert_written, 1):
- array_upper(insert_written, 1)] = insert_written,
- dirread[array_lower(insert_dirread, 1):
- array_upper(insert_dirread, 1)] = insert_dirread,
- dirwritten[array_lower(insert_dirwritten, 1):
- array_upper(insert_dirwritten, 1)] = insert_dirwritten
- WHERE fingerprint = insert_fingerprint AND date = insert_date;
- END;
- END IF;
- UPDATE bwhist
- SET read_sum = array_sum(read),
- written_sum = array_sum(written),
- dirread_sum = array_sum(dirread),
- dirwritten_sum = array_sum(dirwritten)
- WHERE fingerprint = insert_fingerprint AND date = insert_date;
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- refresh_* functions
--- The following functions keep their corresponding aggregate tables
--- up-to-date. They should be called every time ERNIE is run, or when new
--- data is finished being added to the descriptor or statusentry tables.
--- They find what new data has been entered or updated based on the
--- updates table.
-
--- FUNCTION refresh_network_size()
-CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
- BEGIN
-
- DELETE FROM network_size
- WHERE date IN (SELECT date FROM updates);
-
- INSERT INTO network_size
- (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
- SELECT
- DATE(validafter) AS date,
- COUNT(*) / relay_statuses_per_day.count AS avg_running,
- SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
- / relay_statuses_per_day.count AS avg_exit,
- SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
- / relay_statuses_per_day.count AS avg_guard,
- SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
- / relay_statuses_per_day.count AS avg_fast,
- SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
- / relay_statuses_per_day.count AS avg_stable
- FROM statusentry
- JOIN relay_statuses_per_day
- ON DATE(validafter) = relay_statuses_per_day.date
- WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_network_size_hour()
-CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
- BEGIN
-
- DELETE FROM network_size_hour
- WHERE DATE(validafter) IN (SELECT date FROM updates);
-
- INSERT INTO network_size_hour
- (validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
- SELECT validafter, COUNT(*) AS avg_running,
- SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END) AS avg_exit,
- SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END) AS avg_guard,
- SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END) AS avg_fast,
- SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END) AS avg_stable
- FROM statusentry
- WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY validafter;
-
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_platforms()
-CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
- BEGIN
-
- DELETE FROM relay_platforms
- WHERE date IN (SELECT date FROM updates);
-
- INSERT INTO relay_platforms
- (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
- SELECT DATE(validafter),
- SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
- relay_statuses_per_day.count AS avg_linux,
- SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
- relay_statuses_per_day.count AS avg_darwin,
- SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
- relay_statuses_per_day.count AS avg_bsd,
- SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
- relay_statuses_per_day.count AS avg_windows,
- SUM(CASE WHEN platform NOT LIKE '%Windows%'
- AND platform NOT LIKE '%Darwin%'
- AND platform NOT LIKE '%BSD%'
- AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
- relay_statuses_per_day.count AS avg_other
- FROM descriptor RIGHT JOIN statusentry
- ON statusentry.descriptor = descriptor.descriptor
- JOIN relay_statuses_per_day
- ON DATE(validafter) = relay_statuses_per_day.date
- WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- AND DATE(relay_statuses_per_day.date) >=
- (SELECT MIN(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) <=
- (SELECT MAX(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) IN
- (SELECT date FROM updates)
- GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_relay_versions()
-CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
- BEGIN
-
- DELETE FROM relay_versions
- WHERE date IN (SELECT date FROM updates);
-
- INSERT INTO relay_versions
- (date, version, relays)
- SELECT DATE(validafter), SUBSTRING(platform, 5, 5) AS version,
- COUNT(*) / relay_statuses_per_day.count AS relays
- FROM descriptor RIGHT JOIN statusentry
- ON descriptor.descriptor = statusentry.descriptor
- JOIN relay_statuses_per_day
- ON DATE(validafter) = relay_statuses_per_day.date
- WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- AND DATE(relay_statuses_per_day.date) >=
- (SELECT MIN(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) <=
- (SELECT MAX(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) IN
- (SELECT date FROM updates)
- AND platform IS NOT NULL
- GROUP BY 1, 2, relay_statuses_per_day.count;
-
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_total_bandwidth()
--- This keeps the table total_bandwidth up-to-date when necessary.
-CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
- BEGIN
-
- DELETE FROM total_bandwidth
- WHERE date IN (SELECT date FROM updates);
-
- INSERT INTO total_bandwidth
- (bwavg, bwburst, bwobserved, bwadvertised, date)
- SELECT (SUM(bandwidthavg)
- / relay_statuses_per_day.count)::BIGINT AS bwavg,
- (SUM(bandwidthburst)
- / relay_statuses_per_day.count)::BIGINT AS bwburst,
- (SUM(bandwidthobserved)
- / relay_statuses_per_day.count)::BIGINT AS bwobserved,
- (SUM(LEAST(bandwidthavg, bandwidthobserved))
- / relay_statuses_per_day.count)::BIGINT AS bwadvertised,
- DATE(validafter)
- FROM descriptor RIGHT JOIN statusentry
- ON descriptor.descriptor = statusentry.descriptor
- JOIN relay_statuses_per_day
- ON DATE(validafter) = relay_statuses_per_day.date
- WHERE isrunning = TRUE
- AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- AND DATE(relay_statuses_per_day.date) >=
- (SELECT MIN(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) <=
- (SELECT MAX(date) FROM updates)
- AND DATE(relay_statuses_per_day.date) IN
- (SELECT date FROM updates)
- GROUP BY DATE(validafter), relay_statuses_per_day.count;
-
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
- BEGIN
- DELETE FROM total_bwhist WHERE date IN (SELECT date FROM updates);
- INSERT INTO total_bwhist (date, read, written)
- SELECT date, SUM(read_sum) AS read, SUM(written_sum) AS written
- FROM bwhist
- WHERE date >= (SELECT MIN(date) FROM updates)
- AND date <= (SELECT MAX(date) FROM updates)
- AND date IN (SELECT date FROM updates)
- GROUP BY date;
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- FUNCTION refresh_user_stats()
--- This function refreshes our user statistics by weighting reported
--- directory request statistics of directory mirrors with bandwidth
--- histories.
-CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
- BEGIN
- -- Start by deleting user statistics of the dates we're about to
- -- regenerate.
- DELETE FROM user_stats WHERE date IN (SELECT date FROM updates);
- -- Now insert new user statistics.
- INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd,
- brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn)
- SELECT
- -- We want to learn about total requests by date and country.
- dirreq_stats_by_country.date AS date,
- dirreq_stats_by_country.country AS country,
- dirreq_stats_by_country.r AS r,
- -- In order to weight the reported directory requests, we're
- -- counting bytes of relays (except directory authorities)
- -- matching certain criteria: whether or not they are reporting
- -- directory requests, whether or not they are reporting
- -- directory bytes, and whether their directory port is open or
- -- closed.
- SUM(CASE WHEN authority IS NOT NULL
- THEN NULL ELSE dirwritten END) AS dw,
- SUM(CASE WHEN authority IS NOT NULL
- THEN NULL ELSE dirread END) AS dr,
- SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
- THEN NULL ELSE dirwritten END) AS dwr,
- SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
- THEN NULL ELSE dirread END) AS drr,
- SUM(CASE WHEN authority IS NOT NULL
- THEN NULL ELSE written END) AS bw,
- SUM(CASE WHEN authority IS NOT NULL
- THEN NULL ELSE read END) AS br,
- SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
- THEN NULL ELSE written END) AS bwd,
- SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
- THEN NULL ELSE read END) AS brd,
- SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
- THEN NULL ELSE written END) AS bwr,
- SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
- THEN NULL ELSE read END) AS brr,
- SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
- OR authority IS NOT NULL THEN NULL ELSE written END) AS bwdr,
- SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
- OR authority IS NOT NULL THEN NULL ELSE read END) AS brdr,
- SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
- THEN NULL ELSE written END) AS bwp,
- SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
- THEN NULL ELSE read END) AS brp,
- SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
- THEN NULL ELSE written END) AS bwn,
- SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
- THEN NULL ELSE read END) AS brn
- FROM (
- -- The first sub-select tells us the total number of directory
- -- requests per country reported by all directory mirrors.
- SELECT dirreq_stats_by_date.date AS date, country, SUM(requests) AS r
- FROM (
- SELECT fingerprint, date, country, SUM(requests) AS requests
- FROM (
- -- There are two selects here, because in most cases the directory
- -- request statistics cover two calendar dates.
- SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date,
- country, FLOOR(requests * (CASE
- WHEN EXTRACT(EPOCH FROM DATE(statsend)) >
- EXTRACT(EPOCH FROM statsend) - seconds
- THEN EXTRACT(EPOCH FROM statsend) -
- EXTRACT(EPOCH FROM DATE(statsend))
- ELSE seconds END) / seconds) AS requests
- FROM dirreq_stats
- UNION
- SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date,
- country, FLOOR(requests *
- (EXTRACT(EPOCH FROM DATE(statsend)) -
- EXTRACT(EPOCH FROM statsend) + seconds)
- / seconds) AS requests
- FROM dirreq_stats
- WHERE EXTRACT(EPOCH FROM DATE(statsend)) -
- EXTRACT(EPOCH FROM statsend) + seconds > 0
- ) dirreq_stats_split
- GROUP BY 1, 2, 3
- ) dirreq_stats_by_date
- -- We're only interested in requests by directory mirrors, not
- -- directory authorities, so we exclude all relays with the Authority
- -- flag.
- RIGHT JOIN (
- SELECT fingerprint, DATE(validafter) AS date
- FROM statusentry
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- AND isauthority IS FALSE
- GROUP BY 1, 2
- ) statusentry_dirmirrors
- ON dirreq_stats_by_date.fingerprint =
- statusentry_dirmirrors.fingerprint
- AND dirreq_stats_by_date.date = statusentry_dirmirrors.date
- GROUP BY 1, 2
- ) dirreq_stats_by_country
- LEFT JOIN (
- -- In the next step, we expand the result by bandwidth histories of
- -- all relays.
- SELECT fingerprint, date, read_sum AS read, written_sum AS written,
- dirread_sum AS dirread, dirwritten_sum AS dirwritten
- FROM bwhist
- WHERE date >= (SELECT MIN(date) FROM updates)
- AND date <= (SELECT MAX(date) FROM updates)
- AND date IN (SELECT date FROM updates)
- ) bwhist_by_relay
- ON dirreq_stats_by_country.date = bwhist_by_relay.date
- LEFT JOIN (
- -- For each relay, tell how often it had an open directory port and
- -- how often it had the Authority flag assigned on a given date.
- SELECT fingerprint, DATE(validafter) AS date,
- SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport,
- SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority
- FROM statusentry
- WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
- AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
- AND DATE(validafter) IN (SELECT date FROM updates)
- GROUP BY 1, 2
- ) statusentry_by_relay
- ON bwhist_by_relay.fingerprint = statusentry_by_relay.fingerprint
- AND bwhist_by_relay.date = statusentry_by_relay.date
- LEFT JOIN (
- -- For each relay, tell if it has reported directory request
- -- statistics on a given date. Again, we have to take into account
- -- that statistics intervals cover more than one calendar date in most
- -- cases. The exact number of requests isn't relevant here, but only
- -- whether the relay reported directory requests or not.
- SELECT fingerprint, date, 1 AS requests
- FROM (
- SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date
- FROM dirreq_stats
- WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates)
- AND DATE(statsend) <= (SELECT MAX(date) FROM updates)
- AND DATE(statsend) IN (SELECT date FROM updates)
- AND country = 'zy'
- UNION
- SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date
- FROM dirreq_stats
- WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates)
- AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates)
- AND DATE(statsend) - 1 IN (SELECT date FROM updates)
- AND country = 'zy'
- AND EXTRACT(EPOCH FROM DATE(statsend)) -
- EXTRACT(EPOCH FROM statsend) + seconds > 0
- ) dirreq_stats_split
- GROUP BY 1, 2
- ) dirreq_stats_by_relay
- ON bwhist_by_relay.fingerprint = dirreq_stats_by_relay.fingerprint
- AND bwhist_by_relay.date = dirreq_stats_by_relay.date
- WHERE dirreq_stats_by_country.country IS NOT NULL
- -- Group by date, country, and total reported directory requests,
- -- summing up the bandwidth histories.
- GROUP BY 1, 2, 3;
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
--- non-relay statistics
--- The following tables contain pre-aggregated statistics that are not
--- based on relay descriptors or that are not yet derived from the relay
--- descriptors in the database.
-
--- TABLE bridge_network_size
--- Contains average number of running bridges.
-CREATE TABLE bridge_network_size (
- "date" DATE NOT NULL,
- avg_running INTEGER NOT NULL,
- CONSTRAINT bridge_network_size_pkey PRIMARY KEY(date)
-);
-
--- TABLE dirreq_stats
--- Contains daily users by country.
-CREATE TABLE dirreq_stats (
- source CHARACTER(40) NOT NULL,
- statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
- seconds INTEGER NOT NULL,
- country CHARACTER(2) NOT NULL,
- requests INTEGER NOT NULL,
- CONSTRAINT dirreq_stats_pkey
- PRIMARY KEY (source, statsend, seconds, country)
-);
-
--- TABLE bridge_stats
--- Contains daily bridge users by country.
-CREATE TABLE bridge_stats (
- "date" DATE NOT NULL,
- country CHARACTER(2) NOT NULL,
- users INTEGER NOT NULL,
- CONSTRAINT bridge_stats_pkey PRIMARY KEY ("date", country)
-);
-
--- TABLE torperf_stats
--- Quantiles and medians of daily torperf results.
-CREATE TABLE torperf_stats (
- "date" DATE NOT NULL,
- source CHARACTER VARYING(32) NOT NULL,
- q1 INTEGER NOT NULL,
- md INTEGER NOT NULL,
- q3 INTEGER NOT NULL,
- timeouts INTEGER NOT NULL,
- failures INTEGER NOT NULL,
- requests INTEGER NOT NULL,
- CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
-);
-
--- TABLE gettor_stats
--- Packages requested from GetTor
-CREATE TABLE gettor_stats (
- "date" DATE NOT NULL,
- bundle CHARACTER VARYING(32) NOT NULL,
- downloads INTEGER NOT NULL,
- CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
-);
-
--- Refresh all statistics in the database.
-CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
- BEGIN
- DELETE FROM updates;
- INSERT INTO updates SELECT * FROM scheduled_updates;
- PERFORM refresh_relay_statuses_per_day();
- PERFORM refresh_network_size();
- PERFORM refresh_network_size_hour();
- PERFORM refresh_relay_platforms();
- PERFORM refresh_relay_versions();
- PERFORM refresh_total_bandwidth();
- PERFORM refresh_total_bwhist();
- PERFORM refresh_user_stats();
- DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
- RETURN 1;
- END;
-$$ LANGUAGE plpgsql;
-
1
0
commit 12ead2691cc540f5d2bbd5c6918ec50fff29b188
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 19:46:13 2011 +0100
Add database schema.
---
db/tordir.sql | 744 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 744 insertions(+), 0 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql
new file mode 100644
index 0000000..11e49dc
--- /dev/null
+++ b/db/tordir.sql
@@ -0,0 +1,744 @@
+-- Copyright 2010 The Tor Project
+-- See LICENSE for licensing information
+
+-- TABLE descriptor
+-- Contains all of the descriptors published by routers.
+CREATE TABLE descriptor (
+ descriptor CHARACTER(40) NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ address CHARACTER VARYING(15) NOT NULL,
+ orport INTEGER NOT NULL,
+ dirport INTEGER NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ bandwidthavg BIGINT NOT NULL,
+ bandwidthburst BIGINT NOT NULL,
+ bandwidthobserved BIGINT NOT NULL,
+ platform CHARACTER VARYING(256),
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ uptime BIGINT,
+ extrainfo CHARACTER(40),
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
+);
+
+-- TABLE extrainfo
+-- Contains all of the extra-info descriptors published by the routers.
+CREATE TABLE extrainfo (
+ extrainfo CHARACTER(40) NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT extrainfo_pkey PRIMARY KEY (extrainfo)
+);
+
+-- Contains bandwidth histories reported by relays in extra-info
+-- descriptors. Each row contains the reported bandwidth in 15-minute
+-- intervals for each relay and date.
+CREATE TABLE bwhist (
+ fingerprint CHARACTER(40) NOT NULL,
+ date DATE NOT NULL,
+ read BIGINT[],
+ read_sum BIGINT,
+ written BIGINT[],
+ written_sum BIGINT,
+ dirread BIGINT[],
+ dirread_sum BIGINT,
+ dirwritten BIGINT[],
+ dirwritten_sum BIGINT,
+ CONSTRAINT bwhist_pkey PRIMARY KEY (fingerprint, date)
+);
+
+CREATE INDEX bwhist_date ON bwhist (date);
+
+-- TABLE statusentry
+-- Contains all of the consensus entries published by the directories.
+-- Each statusentry references a valid descriptor.
+CREATE TABLE statusentry (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ nickname CHARACTER VARYING(19) NOT NULL,
+ fingerprint CHARACTER(40) NOT NULL,
+ descriptor CHARACTER(40) NOT NULL,
+ published TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ address CHARACTER VARYING(15) NOT NULL,
+ orport INTEGER NOT NULL,
+ dirport INTEGER NOT NULL,
+ isauthority BOOLEAN DEFAULT FALSE NOT NULL,
+ isbadexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isbaddirectory BOOLEAN DEFAULT FALSE NOT NULL,
+ isexit BOOLEAN DEFAULT FALSE NOT NULL,
+ isfast BOOLEAN DEFAULT FALSE NOT NULL,
+ isguard BOOLEAN DEFAULT FALSE NOT NULL,
+ ishsdir BOOLEAN DEFAULT FALSE NOT NULL,
+ isnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isstable BOOLEAN DEFAULT FALSE NOT NULL,
+ isrunning BOOLEAN DEFAULT FALSE NOT NULL,
+ isunnamed BOOLEAN DEFAULT FALSE NOT NULL,
+ isvalid BOOLEAN DEFAULT FALSE NOT NULL,
+ isv2dir BOOLEAN DEFAULT FALSE NOT NULL,
+ isv3dir BOOLEAN DEFAULT FALSE NOT NULL,
+ version CHARACTER VARYING(50),
+ bandwidth BIGINT,
+ ports TEXT,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT statusentry_pkey PRIMARY KEY (validafter, fingerprint)
+);
+
+-- TABLE consensus
+-- Contains all of the consensuses published by the directories.
+CREATE TABLE consensus (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT consensus_pkey PRIMARY KEY (validafter)
+);
+
+-- TABLE vote
+-- Contains all of the votes published by the directories
+CREATE TABLE vote (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ dirsource CHARACTER(40) NOT NULL,
+ rawdesc BYTEA NOT NULL,
+ CONSTRAINT vote_pkey PRIMARY KEY (validafter, dirsource)
+);
+
+-- TABLE connbidirect
+-- Contain conn-bi-direct stats strings
+CREATE TABLE connbidirect (
+ source CHARACTER(40) NOT NULL,
+ statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ seconds INTEGER NOT NULL,
+ belownum BIGINT NOT NULL,
+ readnum BIGINT NOT NULL,
+ writenum BIGINT NOT NULL,
+ bothnum BIGINT NOT NULL,
+ CONSTRAINT connbidirect_pkey PRIMARY KEY (source, statsend)
+);
+
+-- Create the various indexes we need for searching relays
+CREATE INDEX statusentry_address ON statusentry (address);
+CREATE INDEX statusentry_fingerprint ON statusentry (fingerprint);
+CREATE INDEX statusentry_nickname ON statusentry (LOWER(nickname));
+CREATE INDEX statusentry_validafter ON statusentry (validafter);
+
+-- And create an index that we use for precalculating statistics
+CREATE INDEX statusentry_descriptor ON statusentry (descriptor);
+CREATE INDEX statusentry_validafter_date ON statusentry (DATE(validafter));
+
+-- TABLE network_size
+CREATE TABLE network_size (
+ date DATE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ avg_exit INTEGER NOT NULL,
+ avg_guard INTEGER NOT NULL,
+ avg_fast INTEGER NOT NULL,
+ avg_stable INTEGER NOT NULL,
+ CONSTRAINT network_size_pkey PRIMARY KEY(date)
+);
+
+-- TABLE network_size_hour
+CREATE TABLE network_size_hour (
+ validafter TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ avg_exit INTEGER NOT NULL,
+ avg_guard INTEGER NOT NULL,
+ avg_fast INTEGER NOT NULL,
+ avg_stable INTEGER NOT NULL,
+ CONSTRAINT network_size_hour_pkey PRIMARY KEY(validafter)
+);
+
+-- TABLE relay_platforms
+CREATE TABLE relay_platforms (
+ date DATE NOT NULL,
+ avg_linux INTEGER NOT NULL,
+ avg_darwin INTEGER NOT NULL,
+ avg_bsd INTEGER NOT NULL,
+ avg_windows INTEGER NOT NULL,
+ avg_other INTEGER NOT NULL,
+ CONSTRAINT relay_platforms_pkey PRIMARY KEY(date)
+);
+
+-- TABLE relay_versions
+CREATE TABLE relay_versions (
+ date DATE NOT NULL,
+ version CHARACTER(5) NOT NULL,
+ relays INTEGER NOT NULL,
+ CONSTRAINT relay_versions_pkey PRIMARY KEY(date, version)
+);
+
+-- TABLE total_bandwidth
+-- Contains information for the whole network's total bandwidth which is
+-- used in the bandwidth graphs.
+CREATE TABLE total_bandwidth (
+ date DATE NOT NULL,
+ bwavg BIGINT NOT NULL,
+ bwburst BIGINT NOT NULL,
+ bwobserved BIGINT NOT NULL,
+ bwadvertised BIGINT NOT NULL,
+ CONSTRAINT total_bandwidth_pkey PRIMARY KEY(date)
+);
+
+-- TABLE total_bwhist
+-- Contains the total number of read/written and the number of dir bytes
+-- read/written by all relays in the network on a given day. The dir bytes
+-- are an estimate based on the subset of relays that count dir bytes.
+CREATE TABLE total_bwhist (
+ date DATE NOT NULL,
+ read BIGINT,
+ written BIGINT,
+ CONSTRAINT total_bwhist_pkey PRIMARY KEY(date)
+);
+
+-- TABLE user_stats
+-- Aggregate statistics on directory requests and byte histories that we
+-- use to estimate user numbers.
+CREATE TABLE user_stats (
+ date DATE NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ r BIGINT,
+ dw BIGINT,
+ dr BIGINT,
+ drw BIGINT,
+ drr BIGINT,
+ bw BIGINT,
+ br BIGINT,
+ bwd BIGINT,
+ brd BIGINT,
+ bwr BIGINT,
+ brr BIGINT,
+ bwdr BIGINT,
+ brdr BIGINT,
+ bwp BIGINT,
+ brp BIGINT,
+ bwn BIGINT,
+ brn BIGINT,
+ CONSTRAINT user_stats_pkey PRIMARY KEY(date, country)
+);
+
+-- TABLE relay_statuses_per_day
+-- A helper table which is commonly used to update the tables above in the
+-- refresh_* functions.
+CREATE TABLE relay_statuses_per_day (
+ date DATE NOT NULL,
+ count INTEGER NOT NULL,
+ CONSTRAINT relay_statuses_per_day_pkey PRIMARY KEY(date)
+);
+
+-- Dates to be included in the next refresh run.
+CREATE TABLE scheduled_updates (
+ id SERIAL,
+ date DATE NOT NULL
+);
+
+-- Dates in the current refresh run. When starting a refresh run, we copy
+-- the rows from scheduled_updates here in order to delete just those
+-- lines after the refresh run. Otherwise we might forget scheduled dates
+-- that have been added during a refresh run. If this happens we're going
+-- to update these dates in the next refresh run.
+CREATE TABLE updates (
+ id INTEGER,
+ date DATE
+);
+
+CREATE LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_statuses_per_day()
+-- Updates helper table which is used to refresh the aggregate tables.
+CREATE OR REPLACE FUNCTION refresh_relay_statuses_per_day()
+RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM relay_statuses_per_day
+ WHERE date IN (SELECT date FROM updates);
+ INSERT INTO relay_statuses_per_day (date, count)
+ SELECT DATE(validafter) AS date, COUNT(*) AS count
+ FROM consensus
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY DATE(validafter);
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION array_sum (BIGINT[]) RETURNS BIGINT AS $$
+ SELECT SUM($1[i])::bigint
+ FROM generate_series(array_lower($1, 1), array_upper($1, 1)) index(i);
+$$ LANGUAGE SQL;
+
+CREATE OR REPLACE FUNCTION insert_bwhist(
+ insert_fingerprint CHARACTER(40), insert_date DATE,
+ insert_read BIGINT[], insert_written BIGINT[],
+ insert_dirread BIGINT[], insert_dirwritten BIGINT[])
+ RETURNS INTEGER AS $$
+ BEGIN
+ IF (SELECT COUNT(*) FROM bwhist
+ WHERE fingerprint = insert_fingerprint AND date = insert_date) = 0
+ THEN
+ INSERT INTO bwhist (fingerprint, date, read, written, dirread,
+ dirwritten)
+ VALUES (insert_fingerprint, insert_date, insert_read, insert_written,
+ insert_dirread, insert_dirwritten);
+ ELSE
+ BEGIN
+ UPDATE bwhist
+ SET read[array_lower(insert_read, 1):
+ array_upper(insert_read, 1)] = insert_read,
+ written[array_lower(insert_written, 1):
+ array_upper(insert_written, 1)] = insert_written,
+ dirread[array_lower(insert_dirread, 1):
+ array_upper(insert_dirread, 1)] = insert_dirread,
+ dirwritten[array_lower(insert_dirwritten, 1):
+ array_upper(insert_dirwritten, 1)] = insert_dirwritten
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ -- Updating twice is an ugly workaround for PostgreSQL bug 5840
+ UPDATE bwhist
+ SET read[array_lower(insert_read, 1):
+ array_upper(insert_read, 1)] = insert_read,
+ written[array_lower(insert_written, 1):
+ array_upper(insert_written, 1)] = insert_written,
+ dirread[array_lower(insert_dirread, 1):
+ array_upper(insert_dirread, 1)] = insert_dirread,
+ dirwritten[array_lower(insert_dirwritten, 1):
+ array_upper(insert_dirwritten, 1)] = insert_dirwritten
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ END;
+ END IF;
+ UPDATE bwhist
+ SET read_sum = array_sum(read),
+ written_sum = array_sum(written),
+ dirread_sum = array_sum(dirread),
+ dirwritten_sum = array_sum(dirwritten)
+ WHERE fingerprint = insert_fingerprint AND date = insert_date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- refresh_* functions
+-- The following functions keep their corresponding aggregate tables
+-- up-to-date. They should be called every time ERNIE is run, or when new
+-- data is finished being added to the descriptor or statusentry tables.
+-- They find what new data has been entered or updated based on the
+-- updates table.
+
+-- FUNCTION refresh_network_size()
+CREATE OR REPLACE FUNCTION refresh_network_size() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM network_size
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO network_size
+ (date, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
+ SELECT
+ DATE(validafter) AS date,
+ COUNT(*) / relay_statuses_per_day.count AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END)
+ / relay_statuses_per_day.count AS avg_stable
+ FROM statusentry
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_network_size_hour()
+CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM network_size_hour
+ WHERE DATE(validafter) IN (SELECT date FROM updates);
+
+ INSERT INTO network_size_hour
+ (validafter, avg_running, avg_exit, avg_guard, avg_fast, avg_stable)
+ SELECT validafter, COUNT(*) AS avg_running,
+ SUM(CASE WHEN isexit IS TRUE THEN 1 ELSE 0 END) AS avg_exit,
+ SUM(CASE WHEN isguard IS TRUE THEN 1 ELSE 0 END) AS avg_guard,
+ SUM(CASE WHEN isfast IS TRUE THEN 1 ELSE 0 END) AS avg_fast,
+ SUM(CASE WHEN isstable IS TRUE THEN 1 ELSE 0 END) AS avg_stable
+ FROM statusentry
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY validafter;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_platforms()
+CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM relay_platforms
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO relay_platforms
+ (date, avg_linux, avg_darwin, avg_bsd, avg_windows, avg_other)
+ SELECT DATE(validafter),
+ SUM(CASE WHEN platform LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_linux,
+ SUM(CASE WHEN platform LIKE '%Darwin%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_darwin,
+ SUM(CASE WHEN platform LIKE '%BSD%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_bsd,
+ SUM(CASE WHEN platform LIKE '%Windows%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_windows,
+ SUM(CASE WHEN platform NOT LIKE '%Windows%'
+ AND platform NOT LIKE '%Darwin%'
+ AND platform NOT LIKE '%BSD%'
+ AND platform NOT LIKE '%Linux%' THEN 1 ELSE 0 END) /
+ relay_statuses_per_day.count AS avg_other
+ FROM descriptor RIGHT JOIN statusentry
+ ON statusentry.descriptor = descriptor.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_relay_versions()
+CREATE OR REPLACE FUNCTION refresh_relay_versions() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM relay_versions
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO relay_versions
+ (date, version, relays)
+ SELECT DATE(validafter), SUBSTRING(platform, 5, 5) AS version,
+ COUNT(*) / relay_statuses_per_day.count AS relays
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ AND platform IS NOT NULL
+ GROUP BY 1, 2, relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_total_bandwidth()
+-- This keeps the table total_bandwidth up-to-date when necessary.
+CREATE OR REPLACE FUNCTION refresh_total_bandwidth() RETURNS INTEGER AS $$
+ BEGIN
+
+ DELETE FROM total_bandwidth
+ WHERE date IN (SELECT date FROM updates);
+
+ INSERT INTO total_bandwidth
+ (bwavg, bwburst, bwobserved, bwadvertised, date)
+ SELECT (SUM(bandwidthavg)
+ / relay_statuses_per_day.count)::BIGINT AS bwavg,
+ (SUM(bandwidthburst)
+ / relay_statuses_per_day.count)::BIGINT AS bwburst,
+ (SUM(bandwidthobserved)
+ / relay_statuses_per_day.count)::BIGINT AS bwobserved,
+ (SUM(LEAST(bandwidthavg, bandwidthobserved))
+ / relay_statuses_per_day.count)::BIGINT AS bwadvertised,
+ DATE(validafter)
+ FROM descriptor RIGHT JOIN statusentry
+ ON descriptor.descriptor = statusentry.descriptor
+ JOIN relay_statuses_per_day
+ ON DATE(validafter) = relay_statuses_per_day.date
+ WHERE isrunning = TRUE
+ AND DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND DATE(relay_statuses_per_day.date) >=
+ (SELECT MIN(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) <=
+ (SELECT MAX(date) FROM updates)
+ AND DATE(relay_statuses_per_day.date) IN
+ (SELECT date FROM updates)
+ GROUP BY DATE(validafter), relay_statuses_per_day.count;
+
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION refresh_total_bwhist() RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM total_bwhist WHERE date IN (SELECT date FROM updates);
+ INSERT INTO total_bwhist (date, read, written)
+ SELECT date, SUM(read_sum) AS read, SUM(written_sum) AS written
+ FROM bwhist
+ WHERE date >= (SELECT MIN(date) FROM updates)
+ AND date <= (SELECT MAX(date) FROM updates)
+ AND date IN (SELECT date FROM updates)
+ GROUP BY date;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- FUNCTION refresh_user_stats()
+-- This function refreshes our user statistics by weighting reported
+-- directory request statistics of directory mirrors with bandwidth
+-- histories.
+CREATE OR REPLACE FUNCTION refresh_user_stats() RETURNS INTEGER AS $$
+ BEGIN
+ -- Start by deleting user statistics of the dates we're about to
+ -- regenerate.
+ DELETE FROM user_stats WHERE date IN (SELECT date FROM updates);
+ -- Now insert new user statistics.
+ INSERT INTO user_stats (date, country, r, dw, dr, drw, drr, bw, br, bwd,
+ brd, bwr, brr, bwdr, brdr, bwp, brp, bwn, brn)
+ SELECT
+ -- We want to learn about total requests by date and country.
+ dirreq_stats_by_country.date AS date,
+ dirreq_stats_by_country.country AS country,
+ dirreq_stats_by_country.r AS r,
+ -- In order to weight the reported directory requests, we're
+ -- counting bytes of relays (except directory authorities)
+ -- matching certain criteria: whether or not they are reporting
+ -- directory requests, whether or not they are reporting
+ -- directory bytes, and whether their directory port is open or
+ -- closed.
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE dirwritten END) AS dw,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE dirread END) AS dr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE dirwritten END) AS dwr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE dirread END) AS drr,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE written END) AS bw,
+ SUM(CASE WHEN authority IS NOT NULL
+ THEN NULL ELSE read END) AS br,
+ SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwd,
+ SUM(CASE WHEN dirwritten = 0 OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brd,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwr,
+ SUM(CASE WHEN requests IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brr,
+ SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
+ OR authority IS NOT NULL THEN NULL ELSE written END) AS bwdr,
+ SUM(CASE WHEN dirwritten = 0 OR requests IS NULL
+ OR authority IS NOT NULL THEN NULL ELSE read END) AS brdr,
+ SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwp,
+ SUM(CASE WHEN opendirport IS NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brp,
+ SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
+ THEN NULL ELSE written END) AS bwn,
+ SUM(CASE WHEN opendirport IS NOT NULL OR authority IS NOT NULL
+ THEN NULL ELSE read END) AS brn
+ FROM (
+ -- The first sub-select tells us the total number of directory
+ -- requests per country reported by all directory mirrors.
+ SELECT dirreq_stats_by_date.date AS date, country, SUM(requests) AS r
+ FROM (
+ SELECT fingerprint, date, country, SUM(requests) AS requests
+ FROM (
+ -- There are two selects here, because in most cases the directory
+ -- request statistics cover two calendar dates.
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date,
+ country, FLOOR(requests * (CASE
+ WHEN EXTRACT(EPOCH FROM DATE(statsend)) >
+ EXTRACT(EPOCH FROM statsend) - seconds
+ THEN EXTRACT(EPOCH FROM statsend) -
+ EXTRACT(EPOCH FROM DATE(statsend))
+ ELSE seconds END) / seconds) AS requests
+ FROM dirreq_stats
+ UNION
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date,
+ country, FLOOR(requests *
+ (EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds)
+ / seconds) AS requests
+ FROM dirreq_stats
+ WHERE EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds > 0
+ ) dirreq_stats_split
+ GROUP BY 1, 2, 3
+ ) dirreq_stats_by_date
+ -- We're only interested in requests by directory mirrors, not
+ -- directory authorities, so we exclude all relays with the Authority
+ -- flag.
+ RIGHT JOIN (
+ SELECT fingerprint, DATE(validafter) AS date
+ FROM statusentry
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ AND isauthority IS FALSE
+ GROUP BY 1, 2
+ ) statusentry_dirmirrors
+ ON dirreq_stats_by_date.fingerprint =
+ statusentry_dirmirrors.fingerprint
+ AND dirreq_stats_by_date.date = statusentry_dirmirrors.date
+ GROUP BY 1, 2
+ ) dirreq_stats_by_country
+ LEFT JOIN (
+ -- In the next step, we expand the result by bandwidth histories of
+ -- all relays.
+ SELECT fingerprint, date, read_sum AS read, written_sum AS written,
+ dirread_sum AS dirread, dirwritten_sum AS dirwritten
+ FROM bwhist
+ WHERE date >= (SELECT MIN(date) FROM updates)
+ AND date <= (SELECT MAX(date) FROM updates)
+ AND date IN (SELECT date FROM updates)
+ ) bwhist_by_relay
+ ON dirreq_stats_by_country.date = bwhist_by_relay.date
+ LEFT JOIN (
+ -- For each relay, tell how often it had an open directory port and
+ -- how often it had the Authority flag assigned on a given date.
+ SELECT fingerprint, DATE(validafter) AS date,
+ SUM(CASE WHEN dirport > 0 THEN 1 ELSE NULL END) AS opendirport,
+ SUM(CASE WHEN isauthority IS TRUE THEN 1 ELSE NULL END) AS authority
+ FROM statusentry
+ WHERE DATE(validafter) >= (SELECT MIN(date) FROM updates)
+ AND DATE(validafter) <= (SELECT MAX(date) FROM updates)
+ AND DATE(validafter) IN (SELECT date FROM updates)
+ GROUP BY 1, 2
+ ) statusentry_by_relay
+ ON bwhist_by_relay.fingerprint = statusentry_by_relay.fingerprint
+ AND bwhist_by_relay.date = statusentry_by_relay.date
+ LEFT JOIN (
+ -- For each relay, tell if it has reported directory request
+ -- statistics on a given date. Again, we have to take into account
+ -- that statistics intervals cover more than one calendar date in most
+ -- cases. The exact number of requests isn't relevant here, but only
+ -- whether the relay reported directory requests or not.
+ SELECT fingerprint, date, 1 AS requests
+ FROM (
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) AS date
+ FROM dirreq_stats
+ WHERE DATE(statsend) >= (SELECT MIN(date) FROM updates)
+ AND DATE(statsend) <= (SELECT MAX(date) FROM updates)
+ AND DATE(statsend) IN (SELECT date FROM updates)
+ AND country = 'zy'
+ UNION
+ SELECT LOWER(source) AS fingerprint, DATE(statsend) - 1 AS date
+ FROM dirreq_stats
+ WHERE DATE(statsend) - 1 >= (SELECT MIN(date) FROM updates)
+ AND DATE(statsend) - 1 <= (SELECT MAX(date) FROM updates)
+ AND DATE(statsend) - 1 IN (SELECT date FROM updates)
+ AND country = 'zy'
+ AND EXTRACT(EPOCH FROM DATE(statsend)) -
+ EXTRACT(EPOCH FROM statsend) + seconds > 0
+ ) dirreq_stats_split
+ GROUP BY 1, 2
+ ) dirreq_stats_by_relay
+ ON bwhist_by_relay.fingerprint = dirreq_stats_by_relay.fingerprint
+ AND bwhist_by_relay.date = dirreq_stats_by_relay.date
+ WHERE dirreq_stats_by_country.country IS NOT NULL
+ -- Group by date, country, and total reported directory requests,
+ -- summing up the bandwidth histories.
+ GROUP BY 1, 2, 3;
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
+-- non-relay statistics
+-- The following tables contain pre-aggregated statistics that are not
+-- based on relay descriptors or that are not yet derived from the relay
+-- descriptors in the database.
+
+-- TABLE bridge_network_size
+-- Contains average number of running bridges.
+CREATE TABLE bridge_network_size (
+ "date" DATE NOT NULL,
+ avg_running INTEGER NOT NULL,
+ CONSTRAINT bridge_network_size_pkey PRIMARY KEY(date)
+);
+
+-- TABLE dirreq_stats
+-- Contains daily users by country.
+CREATE TABLE dirreq_stats (
+ source CHARACTER(40) NOT NULL,
+ statsend TIMESTAMP WITHOUT TIME ZONE NOT NULL,
+ seconds INTEGER NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ requests INTEGER NOT NULL,
+ CONSTRAINT dirreq_stats_pkey
+ PRIMARY KEY (source, statsend, seconds, country)
+);
+
+-- TABLE bridge_stats
+-- Contains daily bridge users by country.
+CREATE TABLE bridge_stats (
+ "date" DATE NOT NULL,
+ country CHARACTER(2) NOT NULL,
+ users INTEGER NOT NULL,
+ CONSTRAINT bridge_stats_pkey PRIMARY KEY ("date", country)
+);
+
+-- TABLE torperf_stats
+-- Quantiles and medians of daily torperf results.
+CREATE TABLE torperf_stats (
+ "date" DATE NOT NULL,
+ source CHARACTER VARYING(32) NOT NULL,
+ q1 INTEGER NOT NULL,
+ md INTEGER NOT NULL,
+ q3 INTEGER NOT NULL,
+ timeouts INTEGER NOT NULL,
+ failures INTEGER NOT NULL,
+ requests INTEGER NOT NULL,
+ CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
+);
+
+-- TABLE gettor_stats
+-- Packages requested from GetTor
+CREATE TABLE gettor_stats (
+ "date" DATE NOT NULL,
+ bundle CHARACTER VARYING(32) NOT NULL,
+ downloads INTEGER NOT NULL,
+ CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
+);
+
+-- Refresh all statistics in the database.
+CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
+ BEGIN
+ DELETE FROM updates;
+ INSERT INTO updates SELECT * FROM scheduled_updates;
+ PERFORM refresh_relay_statuses_per_day();
+ PERFORM refresh_network_size();
+ PERFORM refresh_network_size_hour();
+ PERFORM refresh_relay_platforms();
+ PERFORM refresh_relay_versions();
+ PERFORM refresh_total_bandwidth();
+ PERFORM refresh_total_bwhist();
+ PERFORM refresh_user_stats();
+ DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
+ RETURN 1;
+ END;
+$$ LANGUAGE plpgsql;
+
1
0

03 Mar '11
commit e0934bf15e83c8020351c890b6efc7c78f4e3492
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 18:24:19 2011 +0100
Import GetTor stats into database.
---
config.template | 6 +
src/org/torproject/ernie/cron/Configuration.java | 13 ++
src/org/torproject/ernie/cron/GetTorProcessor.java | 124 ++++++++++++++++++++
src/org/torproject/ernie/cron/Main.java | 7 +
4 files changed, 150 insertions(+), 0 deletions(-)
diff --git a/config.template b/config.template
index 4a5588a..fd37f4d 100644
--- a/config.template
+++ b/config.template
@@ -48,4 +48,10 @@
#
## Relative path to directory to import torperf results from
#TorperfDirectory torperf/
+#
+## Process GetTor stats and import them into the database
+#ProcessGetTorStats 0
+#
+## Relative path to directory where to find GetTor stats
+#GetTorDirectory gettor/
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index dd09bae..e2c630b 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -28,6 +28,8 @@ public class Configuration {
private boolean writeBridgeStats = false;
private boolean importWriteTorperfStats = false;
private String torperfDirectory = "torperf/";
+ private boolean processGetTorStats = false;
+ private String getTorDirectory = "gettor/";
public Configuration() {
/* Initialize logger. */
@@ -82,6 +84,11 @@ public class Configuration {
line.split(" ")[1]) != 0;
} else if (line.startsWith("TorperfDirectory")) {
this.torperfDirectory = line.split(" ")[1];
+ } else if (line.startsWith("ProcessGetTorStats")) {
+ this.processGetTorStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("GetTorDirectory")) {
+ this.getTorDirectory = line.split(" ")[1];
} else {
logger.severe("Configuration file contains unrecognized "
+ "configuration key in line '" + line + "'! Exiting!");
@@ -149,5 +156,11 @@ public class Configuration {
public String getTorperfDirectory() {
return this.torperfDirectory;
}
+ public boolean getProcessGetTorStats() {
+ return this.processGetTorStats;
+ }
+ public String getGetTorDirectory() {
+ return this.getTorDirectory;
+ }
}
diff --git a/src/org/torproject/ernie/cron/GetTorProcessor.java b/src/org/torproject/ernie/cron/GetTorProcessor.java
new file mode 100644
index 0000000..a6ddf11
--- /dev/null
+++ b/src/org/torproject/ernie/cron/GetTorProcessor.java
@@ -0,0 +1,124 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.util.*;
+import java.util.logging.*;
+
+public class GetTorProcessor {
+ public GetTorProcessor(File getTorDirectory, String connectionURL) {
+
+ Logger logger = Logger.getLogger(GetTorProcessor.class.getName());
+
+ /* Parse stats file. */
+ File getTorFile = new File(getTorDirectory, "gettor_stats.txt");
+ if (!getTorFile.exists() || getTorFile.isDirectory()) {
+ logger.warning("Could not read GetTor stats");
+ return;
+ }
+ SortedSet<String> columns = new TreeSet<String>();
+ SortedMap<String, Map<String, Integer>> data =
+ new TreeMap<String, Map<String, Integer>>();
+ try {
+ logger.fine("Parsing GetTor stats...");
+ BufferedReader br = new BufferedReader(new FileReader(getTorFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ String date = parts[0];
+ Map<String, Integer> obs = new HashMap<String, Integer>();
+ data.put(date, obs);
+ for (int i = 2; i < parts.length; i++) {
+ String key = parts[i].split(":")[0].toLowerCase();
+ Integer value = new Integer(parts[i].split(":")[1]);
+ columns.add(key);
+ obs.put(key, value);
+ }
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed parsing GetTor stats!", e);
+ return;
+ } catch (NumberFormatException e) {
+ logger.log(Level.WARNING, "Failed parsing GetTor stats!", e);
+ return;
+ }
+
+ /* Write results to database. */
+ if (connectionURL != null) {
+ try {
+ Map<String, Integer> updateRows = new HashMap<String, Integer>(),
+ insertRows = new HashMap<String, Integer>();
+ for (Map.Entry<String, Map<String, Integer>> e :
+ data.entrySet()) {
+ String date = e.getKey();
+ Map<String, Integer> obs = e.getValue();
+ for (String column : columns) {
+ if (obs.containsKey(column)) {
+ Integer value = obs.get(column);
+ String key = date + "," + column;
+ insertRows.put(key, value);
+ }
+ }
+ }
+ Connection conn = DriverManager.getConnection(connectionURL);
+ PreparedStatement psI = conn.prepareStatement(
+ "INSERT INTO gettor_stats (downloads, date, bundle) "
+ + "VALUES (?, ?, ?)");
+ PreparedStatement psU = conn.prepareStatement(
+ "UPDATE gettor_stats SET downloads = ? "
+ + "WHERE date = ? AND bundle = ?");
+ conn.setAutoCommit(false);
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(
+ "SELECT date, bundle, downloads FROM gettor_stats");
+ while (rs.next()) {
+ String date = rs.getDate(1).toString();
+ String bundle = rs.getString(2);
+ String key = date + "," + bundle;
+ if (insertRows.containsKey(key)) {
+ int insertRow = insertRows.remove(key);
+ int oldCount = rs.getInt(3);
+ if (insertRow != oldCount) {
+ updateRows.put(key, insertRow);
+ }
+ }
+ }
+ for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
+ String[] keyParts = e.getKey().split(",");
+ java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
+ String bundle = keyParts[1];
+ int downloads = e.getValue();
+ psU.clearParameters();
+ psU.setLong(1, downloads);
+ psU.setDate(2, date);
+ psU.setString(3, bundle);
+ psU.executeUpdate();
+ }
+ for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
+ String[] keyParts = e.getKey().split(",");
+ java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
+ String bundle = keyParts[1];
+ int downloads = e.getValue();
+ psI.clearParameters();
+ psI.setLong(1, downloads);
+ psI.setDate(2, date);
+ psI.setString(3, bundle);
+ psI.executeUpdate();
+ }
+ conn.commit();
+ conn.close();
+ } catch (SQLException e) {
+ logger.log(Level.WARNING, "Failed to add GetTor stats to "
+ + "database.", e);
+ }
+ }
+
+ logger.info("Finished processing statistics on Tor packages "
+ + "delivered by GetTor.\nLast date in statistics is "
+ + data.lastKey() + ".");
+ }
+}
+
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 943d326..ec02130 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -111,6 +111,13 @@ public class Main {
statsDirectory, config.getRelayDescriptorDatabaseJDBC());
}
+ // Download and process GetTor stats
+ if (config.getProcessGetTorStats()) {
+ new GetTorProcessor(
+ new File(config.getGetTorDirectory()),
+ config.getRelayDescriptorDatabaseJDBC());
+ }
+
// Remove lock file
lf.releaseLock();
1
0

03 Mar '11
commit f67c7db477740b4f13a949b32c1fec0b5a72c464
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 18:25:31 2011 +0100
Remove GetTor stats database import.
---
config.template | 15 +--
src/org/torproject/ernie/db/Configuration.java | 39 ++----
src/org/torproject/ernie/db/GetTorDownloader.java | 101 +++++++++++++++
src/org/torproject/ernie/db/GetTorProcessor.java | 143 ---------------------
src/org/torproject/ernie/db/Main.java | 7 +-
5 files changed, 122 insertions(+), 183 deletions(-)
diff --git a/config.template b/config.template
index 72cc411..0d6743e 100644
--- a/config.template
+++ b/config.template
@@ -32,12 +32,15 @@
## Relative path to directory to import bridge descriptor snapshots from
#BridgeSnapshotsDirectory bridge-directories/
#
-## Download and process GetTor stats
-#DownloadProcessGetTorStats 0
+## Download GetTor stats
+#DownloadGetTorStats 0
#
## URL to download GetTor stats from
#GetTorStatsURL http://gettor.torproject.org:8080/~gettor/gettor_stats.txt
#
+## Relative path to directory to store GetTor stats in
+#GetTorDirectory gettor/
+#
## Download exit list and store it to disk
#DownloadExitList 0
#
@@ -49,14 +52,6 @@
## Relative path to directory to write directory archives to
#DirectoryArchivesOutputDirectory directory-archive/
#
-## Write aggregate statistics (bridges and bridge users per day, directory
-## clients per day, torperf results, packages requested from GetTor, etc.)
-## to database for later evaluation
-#WriteAggregateStatsDatabase 0
-#
-## JDBC string for relay descriptor database
-#RelayDescriptorDatabaseJDBC jdbc:postgresql://localhost/tordir?user=ernie&password=password
-#
## Write sanitized bridges to disk
#WriteSanitizedBridges 0
#
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index 4d75b59..c1cdbea 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -21,7 +21,6 @@ public class Configuration {
private boolean importDirectoryArchives = false;
private String directoryArchivesDirectory = "archives/";
private boolean keepDirectoryArchiveImportHistory = false;
- private boolean writeAggregateStatsDatabase = false;
private String relayDescriptorDatabaseJdbc =
"jdbc:postgresql://localhost/tordir?user=ernie&password=password";
private boolean writeSanitizedBridges = false;
@@ -33,9 +32,10 @@ public class Configuration {
private boolean downloadRelayDescriptors = false;
private List<String> downloadFromDirectoryAuthorities = Arrays.asList(
"86.59.21.38,194.109.206.212,80.190.246.100:8180".split(","));
- private boolean downloadProcessGetTorStats = false;
+ private boolean downloadGetTorStats = false;
private String getTorStatsUrl = "http://gettor.torproject.org:8080/"
+ "~gettor/gettor_stats.txt";
+ private String getTorDirectory = "gettor/";
private boolean downloadExitList = false;
public Configuration() {
@@ -82,11 +82,6 @@ public class Configuration {
} else if (line.startsWith("KeepDirectoryArchiveImportHistory")) {
this.keepDirectoryArchiveImportHistory = Integer.parseInt(
line.split(" ")[1]) != 0;
- } else if (line.startsWith("WriteAggregateStatsDatabase")) {
- this.writeAggregateStatsDatabase = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("RelayDescriptorDatabaseJDBC")) {
- this.relayDescriptorDatabaseJdbc = line.split(" ")[1];
} else if (line.startsWith("WriteSanitizedBridges")) {
this.writeSanitizedBridges = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -119,14 +114,16 @@ public class Configuration {
new URL("http://" + dir + "/");
this.downloadFromDirectoryAuthorities.add(dir);
}
- } else if (line.startsWith("DownloadProcessGetTorStats")) {
- this.downloadProcessGetTorStats = Integer.parseInt(
+ } else if (line.startsWith("DownloadGetTorStats")) {
+ this.downloadGetTorStats = Integer.parseInt(
line.split(" ")[1]) != 0;
} else if (line.startsWith("GetTorStatsURL")) {
String newUrl = line.split(" ")[1];
/* Test if URL has correct format. */
new URL(newUrl);
this.getTorStatsUrl = newUrl;
+ } else if (line.startsWith("GetTorDirectory")) {
+ this.getTorDirectory = line.split(" ")[1];
} else if (line.startsWith("DownloadExitList")) {
this.downloadExitList = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -158,9 +155,8 @@ public class Configuration {
/** Make some checks if configuration is valid. */
if (!this.importCachedRelayDescriptors &&
!this.importDirectoryArchives && !this.downloadRelayDescriptors &&
- !this.importBridgeSnapshots && !this.downloadProcessGetTorStats &&
+ !this.importBridgeSnapshots && !this.downloadGetTorStats &&
!this.downloadExitList && !this.writeDirectoryArchives &&
- !this.writeAggregateStatsDatabase &&
!this.writeSanitizedBridges) {
logger.warning("We have not been configured to read data from any "
+ "data source or write data to any data sink. You need to "
@@ -182,8 +178,7 @@ public class Configuration {
+ "least one data sink, but we don't have a single data source "
+ "containing relay descriptors.");
}
- if (this.importBridgeSnapshots && !(this.writeSanitizedBridges ||
- this.writeAggregateStatsDatabase)) {
+ if (this.importBridgeSnapshots && !this.writeSanitizedBridges) {
logger.warning("We are configured to import/download bridge "
+ "descriptors, but we don't have a single data sink to write "
+ "bridge descriptors to.");
@@ -193,11 +188,6 @@ public class Configuration {
+ "least one data sink, but we don't have a single data source "
+ "containing bridge descriptors.");
}
- if (this.downloadProcessGetTorStats &&
- !this.writeAggregateStatsDatabase) {
- logger.warning("We are configured to download GetTor statistics, "
- + "but not to import them into the database.");
- }
}
public boolean getWriteDirectoryArchives() {
return this.writeDirectoryArchives;
@@ -220,12 +210,6 @@ public class Configuration {
public boolean getKeepDirectoryArchiveImportHistory() {
return this.keepDirectoryArchiveImportHistory;
}
- public boolean getWriteAggregateStatsDatabase() {
- return this.writeAggregateStatsDatabase;
- }
- public String getRelayDescriptorDatabaseJDBC() {
- return this.relayDescriptorDatabaseJdbc;
- }
public boolean getWriteSanitizedBridges() {
return this.writeSanitizedBridges;
}
@@ -250,12 +234,15 @@ public class Configuration {
public List<String> getDownloadFromDirectoryAuthorities() {
return this.downloadFromDirectoryAuthorities;
}
- public boolean getDownloadProcessGetTorStats() {
- return this.downloadProcessGetTorStats;
+ public boolean getDownloadGetTorStats() {
+ return this.downloadGetTorStats;
}
public String getGetTorStatsUrl() {
return this.getTorStatsUrl;
}
+ public String getGetTorDirectory() {
+ return this.getTorDirectory;
+ }
public boolean getDownloadExitList() {
return this.downloadExitList;
}
diff --git a/src/org/torproject/ernie/db/GetTorDownloader.java b/src/org/torproject/ernie/db/GetTorDownloader.java
new file mode 100644
index 0000000..91e9f5f
--- /dev/null
+++ b/src/org/torproject/ernie/db/GetTorDownloader.java
@@ -0,0 +1,101 @@
+/* Copyright 2010 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.db;
+
+import java.io.*;
+import java.net.*;
+import java.util.*;
+import java.util.logging.*;
+
+public class GetTorDownloader {
+
+ public GetTorDownloader(String gettorStatsUrl, File getTorDirectory) {
+
+ Logger logger = Logger.getLogger(GetTorDownloader.class.getName());
+
+ File getTorFile = new File(getTorDirectory, "gettor_stats.txt");
+ SortedMap<String, String> getTorStats = new TreeMap<String, String>();
+
+ if (getTorFile.exists() && !getTorFile.isDirectory()) {
+ try {
+ logger.fine("Reading local gettor_stats.txt file...");
+ BufferedReader br = new BufferedReader(new FileReader(
+ getTorFile));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ String date = line.split(" ")[0];
+ getTorStats.put(date, line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed parsing local GetTor stats!",
+ e);
+ return;
+ }
+ }
+
+ String unparsed = null;
+ try {
+ logger.fine("Downloading GetTor stats...");
+ URL u = new URL(gettorStatsUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ int response = huc.getResponseCode();
+ if (response == 200) {
+ BufferedInputStream in = new BufferedInputStream(
+ huc.getInputStream());
+ StringBuilder sb = new StringBuilder();
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = in.read(data, 0, 1024)) >= 0) {
+ sb.append(new String(data, 0, len));
+ }
+ in.close();
+ unparsed = sb.toString();
+ }
+ logger.fine("Finished downloading GetTor stats.");
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed downloading GetTor stats", e);
+ return;
+ }
+
+ try {
+ logger.fine("Parsing downloaded GetTor stats...");
+ BufferedReader br = new BufferedReader(new StringReader(unparsed));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ String date = line.split(" ")[0];
+ getTorStats.put(date, line);
+ }
+ br.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed parsing downloaded GetTor stats!",
+ e);
+ return;
+ }
+
+ try {
+ logger.fine("Writing GetTor stats to local gettor_stats.txt "
+ + "file...");
+ if (!getTorDirectory.exists()) {
+ getTorDirectory.mkdirs();
+ }
+ BufferedWriter bw = new BufferedWriter(new FileWriter(getTorFile));
+ for (String line : getTorStats.values()) {
+ bw.write(line + "\n");
+ }
+ bw.close();
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing GetTor stats to local "
+ + "gettor_stats.txt file", e);
+ return;
+ }
+
+ logger.info("Finished downloading and processing statistics on Tor "
+ + "packages delivered by GetTor.\nDownloaded " + unparsed.length()
+ + " bytes. Last date in statistics is " + getTorStats.lastKey()
+ + ".");
+ }
+}
+
diff --git a/src/org/torproject/ernie/db/GetTorProcessor.java b/src/org/torproject/ernie/db/GetTorProcessor.java
deleted file mode 100644
index 6d9a935..0000000
--- a/src/org/torproject/ernie/db/GetTorProcessor.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Copyright 2010 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.*;
-import java.net.*;
-import java.sql.*;
-import java.util.*;
-import java.util.logging.*;
-
-public class GetTorProcessor {
- public GetTorProcessor(String gettorStatsUrl, String connectionURL) {
- Logger logger = Logger.getLogger(GetTorProcessor.class.getName());
- String unparsed = null;
- try {
- logger.fine("Downloading gettor stats...");
- URL u = new URL(gettorStatsUrl);
- HttpURLConnection huc = (HttpURLConnection) u.openConnection();
- huc.setRequestMethod("GET");
- huc.connect();
- int response = huc.getResponseCode();
- if (response == 200) {
- BufferedInputStream in = new BufferedInputStream(
- huc.getInputStream());
- StringBuilder sb = new StringBuilder();
- int len;
- byte[] data = new byte[1024];
- while ((len = in.read(data, 0, 1024)) >= 0) {
- sb.append(new String(data, 0, len));
- }
- in.close();
- unparsed = sb.toString();
- }
- logger.fine("Finished downloading gettor stats.");
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed downloading gettor stats", e);
- return;
- }
-
- SortedSet<String> columns = new TreeSet<String>();
- SortedMap<String, Map<String, Integer>> data =
- new TreeMap<String, Map<String, Integer>>();
- try {
- logger.fine("Parsing downloaded gettor stats...");
- BufferedReader br = new BufferedReader(new StringReader(unparsed));
- String line = null;
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(" ");
- String date = parts[0];
- Map<String, Integer> obs = new HashMap<String, Integer>();
- data.put(date, obs);
- for (int i = 2; i < parts.length; i++) {
- String key = parts[i].split(":")[0].toLowerCase();
- Integer value = new Integer(parts[i].split(":")[1]);
- columns.add(key);
- obs.put(key, value);
- }
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed parsing gettor stats!", e);
- return;
- } catch (NumberFormatException e) {
- logger.log(Level.WARNING, "Failed parsing gettor stats!", e);
- return;
- }
-
- /* Write results to database. */
- if (connectionURL != null) {
- try {
- Map<String, Integer> updateRows = new HashMap<String, Integer>(),
- insertRows = new HashMap<String, Integer>();
- for (Map.Entry<String, Map<String, Integer>> e :
- data.entrySet()) {
- String date = e.getKey();
- Map<String, Integer> obs = e.getValue();
- for (String column : columns) {
- if (obs.containsKey(column)) {
- Integer value = obs.get(column);
- String key = date + "," + column;
- insertRows.put(key, value);
- }
- }
- }
- Connection conn = DriverManager.getConnection(connectionURL);
- PreparedStatement psI = conn.prepareStatement(
- "INSERT INTO gettor_stats (downloads, date, bundle) "
- + "VALUES (?, ?, ?)");
- PreparedStatement psU = conn.prepareStatement(
- "UPDATE gettor_stats SET downloads = ? "
- + "WHERE date = ? AND bundle = ?");
- conn.setAutoCommit(false);
- Statement statement = conn.createStatement();
- ResultSet rs = statement.executeQuery(
- "SELECT date, bundle, downloads FROM gettor_stats");
- while (rs.next()) {
- String date = rs.getDate(1).toString();
- String bundle = rs.getString(2);
- String key = date + "," + bundle;
- if (insertRows.containsKey(key)) {
- int insertRow = insertRows.remove(key);
- int oldCount = rs.getInt(3);
- if (insertRow != oldCount) {
- updateRows.put(key, insertRow);
- }
- }
- }
- for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
- String[] keyParts = e.getKey().split(",");
- java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
- String bundle = keyParts[1];
- int downloads = e.getValue();
- psU.clearParameters();
- psU.setLong(1, downloads);
- psU.setDate(2, date);
- psU.setString(3, bundle);
- psU.executeUpdate();
- }
- for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
- String[] keyParts = e.getKey().split(",");
- java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
- String bundle = keyParts[1];
- int downloads = e.getValue();
- psI.clearParameters();
- psI.setLong(1, downloads);
- psI.setDate(2, date);
- psI.setString(3, bundle);
- psI.executeUpdate();
- }
- conn.commit();
- conn.close();
- } catch (SQLException e) {
- logger.log(Level.WARNING, "Failed to add GetTor stats to "
- + "database.", e);
- }
- }
-
- logger.info("Finished downloading and processing statistics on Tor "
- + "packages delivered by GetTor.\nDownloaded " + unparsed.length()
- + " bytes. Last date in statistics is " + data.lastKey() + ".");
- }
-}
-
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index e10b6c7..657cdfc 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -118,10 +118,9 @@ public class Main {
}
// Download and process GetTor stats
- if (config.getDownloadProcessGetTorStats()) {
- new GetTorProcessor(config.getGetTorStatsUrl(),
- config.getWriteAggregateStatsDatabase() ?
- config.getRelayDescriptorDatabaseJDBC() : null);
+ if (config.getDownloadGetTorStats()) {
+ new GetTorDownloader(config.getGetTorStatsUrl(),
+ new File(config.getGetTorDirectory()));
}
// Download exit list and store it to disk
1
0

r24300: {translation} Update Vidalia's translation workflow (translation/trunk/documentation)
by Tomas Touceda 03 Mar '11
by Tomas Touceda 03 Mar '11
03 Mar '11
Author: chiiph
Date: 2011-03-03 16:53:47 +0000 (Thu, 03 Mar 2011)
New Revision: 24300
Modified:
translation/trunk/documentation/howto.txt
Log:
Update Vidalia's translation workflow
Modified: translation/trunk/documentation/howto.txt
===================================================================
--- translation/trunk/documentation/howto.txt 2011-03-03 16:25:48 UTC (rev 24299)
+++ translation/trunk/documentation/howto.txt 2011-03-03 16:53:47 UTC (rev 24300)
@@ -226,40 +226,21 @@
in 'po/templates' as a template. Remember to commit the file to
SVN.
-Vidalia Installer:
+Vidalia and Vidalia Installer:
The Transifex configuration file, source file and translations can
be found here:
+ https://svn.torproject.org/vidalia/vidalia/trunk/src/vidalia/i18n/po/.
https://svn.torproject.org/vidalia/vidalia/trunk/pkg/win32/po/.
- 1. Converting Translations
+ 1. Converting Translations and creating/updating Translation Template Files
- # TODO: document how to convert translations to a useful format.
- #
- # This is all handled automatically by the build process using
- # some conversion tools and CMake macros written by Matt Edman.
- # See src/tools/{po2ts,ts2po,po2nsh,po2wxl} and the
- # VIDALIA_UPDATE_PO, VIDALIA_ADD_PO, VIDALIA_ADD_NSH and
- # VIDALIA_ADD_WXL macros in src/cmake/VidaliaMacros.cmake.
+ All the conversions from .po to .ts/.nsh/.wxl and back are handled by
+ the build script. The specific part of the make process can be triggered
+ by executing the following command in the vidali's trunk:
- 2. Creating/Updating Translation Template Files
+ $ make i18n-update
- # TODO: document how to convert from a useful format to .pot
-
-Vidalia:
-
- The Transifex configuration file, source file and translations can
- be found here:
- https://svn.torproject.org/vidalia/vidalia/trunk/src/vidalia/i18n/po/.
-
- 1. Converting Translations
-
- # TODO: document how to convert translations to a useful format.
-
- 2. Creating/Updating Translation Template Files
-
- # TODO: document how to convert from a useful format to .pot
-
Orbot:
The Transifex configuration file, source file and translations can
1
0

r24299: {website} Link to the new FAQ page, not the wiki FAQ (website/trunk/docs/en)
by Robert Ransom 03 Mar '11
by Robert Ransom 03 Mar '11
03 Mar '11
Author: rransom
Date: 2011-03-03 16:25:48 +0000 (Thu, 03 Mar 2011)
New Revision: 24299
Modified:
website/trunk/docs/en/documentation.wml
Log:
Link to the new FAQ page, not the wiki FAQ
Modified: website/trunk/docs/en/documentation.wml
===================================================================
--- website/trunk/docs/en/documentation.wml 2011-03-03 14:46:42 UTC (rev 24298)
+++ website/trunk/docs/en/documentation.wml 2011-03-03 16:25:48 UTC (rev 24299)
@@ -46,7 +46,7 @@
<li>
Our <a
- href="<wikifaq>">FAQ</a>
+ href="<page docs/faq>">FAQ</a>
covers all sorts of topics, including questions about setting up a client
or relay, concerns about anonymity attacks, why we didn't build Tor in
other ways, etc.
1
0

[torspec/master] Move exit-scanning-outline: it is superseded by 159 and the torflow paper
by nickm@torproject.org 03 Mar '11
by nickm@torproject.org 03 Mar '11
03 Mar '11
commit 4332a69c963361c8d37ec4ddebf628a4e645b589
Author: Nick Mathewson <nickm(a)torproject.org>
Date: Thu Mar 3 09:55:24 2011 -0500
Move exit-scanning-outline: it is superseded by 159 and the torflow paper
---
proposals/ideas/old/xxx-exit-scanning-outline.txt | 44 +++++++++++++++++++++
proposals/ideas/xxx-exit-scanning-outline.txt | 44 ---------------------
2 files changed, 44 insertions(+), 44 deletions(-)
diff --git a/proposals/ideas/old/xxx-exit-scanning-outline.txt b/proposals/ideas/old/xxx-exit-scanning-outline.txt
new file mode 100644
index 0000000..d840944
--- /dev/null
+++ b/proposals/ideas/old/xxx-exit-scanning-outline.txt
@@ -0,0 +1,44 @@
+1. Scanning process
+ A. Non-HTML/JS HTTP mime types compared via SHA1 hash
+ B. Dynamic HTTP content filtered at 4 levels:
+ 1. IP change+Tor cookie utilization
+ - Tor cookies replayed with new IP in case of changes
+ 2. HTML Tag+Attribute+JS comparison
+ - Comparisons made based only on "relevant" HTML tags
+ and attributes
+ 3. HTML Tag+Attribute+JS diffing
+ - Tags, attributes and JS AST nodes that change during
+ Non-Tor fetches pruned from comparison
+ 4. URLS with > N% of node failures removed
+ - results purged from filesystem at end of scan loop
+ C. SSL scanning handles some forms of dynamic certs
+ 1. Catalogs certs for all IPs resolved locally
+ by getaddrinfo over the duration of the scan.
+ - Updated each test.
+ 2. If the domain presents a new cert for each IP, this
+ is noted on the failure result for the node
+ 3. If the same IP presents two different certs locally,
+ the cert list is first refreshed, and if it happens
+ again, discarded
+ 4. A N% node failure filter also applies
+ D. Scanner can be restarted from any point in the event
+ of scanner or system crashes, or graceful shutdown.
+ - Results+scan state pickled to filesystem continuously
+2. Cron job checks results periodically for reporting
+ A. Divide failures into three types of BadExit based on type
+ and frequency over time and incident rate
+ B. write reject lines to approved-routers for those three types:
+ 1. ID Hex based (for misconfig/network problems easily fixed)
+ 2. IP based (for content modification)
+ 3. IP+mask based (for continuous/egregious content modification)
+ C. Emails results to tor-scanners(a)freehaven.net
+3. Human Review and Appeal
+ A. ID Hex-based BadExit is meant to be possible to removed easily
+ without needing to beg us.
+ - Should this behavior be encouraged?
+ B. Optionally can reserve IP based badexits for human review
+ 1. Results are encapsulated fully on the filesystem and can be
+ reviewed without network access
+ 2. Soat has --rescan to rescan failed nodes from a data directory
+ - New set of URLs used
+
diff --git a/proposals/ideas/xxx-exit-scanning-outline.txt b/proposals/ideas/xxx-exit-scanning-outline.txt
deleted file mode 100644
index d840944..0000000
--- a/proposals/ideas/xxx-exit-scanning-outline.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-1. Scanning process
- A. Non-HTML/JS HTTP mime types compared via SHA1 hash
- B. Dynamic HTTP content filtered at 4 levels:
- 1. IP change+Tor cookie utilization
- - Tor cookies replayed with new IP in case of changes
- 2. HTML Tag+Attribute+JS comparison
- - Comparisons made based only on "relevant" HTML tags
- and attributes
- 3. HTML Tag+Attribute+JS diffing
- - Tags, attributes and JS AST nodes that change during
- Non-Tor fetches pruned from comparison
- 4. URLS with > N% of node failures removed
- - results purged from filesystem at end of scan loop
- C. SSL scanning handles some forms of dynamic certs
- 1. Catalogs certs for all IPs resolved locally
- by getaddrinfo over the duration of the scan.
- - Updated each test.
- 2. If the domain presents a new cert for each IP, this
- is noted on the failure result for the node
- 3. If the same IP presents two different certs locally,
- the cert list is first refreshed, and if it happens
- again, discarded
- 4. A N% node failure filter also applies
- D. Scanner can be restarted from any point in the event
- of scanner or system crashes, or graceful shutdown.
- - Results+scan state pickled to filesystem continuously
-2. Cron job checks results periodically for reporting
- A. Divide failures into three types of BadExit based on type
- and frequency over time and incident rate
- B. write reject lines to approved-routers for those three types:
- 1. ID Hex based (for misconfig/network problems easily fixed)
- 2. IP based (for content modification)
- 3. IP+mask based (for continuous/egregious content modification)
- C. Emails results to tor-scanners(a)freehaven.net
-3. Human Review and Appeal
- A. ID Hex-based BadExit is meant to be possible to removed easily
- without needing to beg us.
- - Should this behavior be encouraged?
- B. Optionally can reserve IP based badexits for human review
- 1. Results are encapsulated fully on the filesystem and can be
- reviewed without network access
- 2. Soat has --rescan to rescan failed nodes from a data directory
- - New set of URLs used
-
1
0

03 Mar '11
commit 9a4cc7a43d93a362af6a7311c2048cc979af7968
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 15:40:47 2011 +0100
Remove torperf statistics from metrics-db.
---
config.template | 6 -
src/org/torproject/ernie/db/Configuration.java | 18 +-
.../torproject/ernie/db/ExitListDownloader.java | 2 +-
src/org/torproject/ernie/db/GetTorProcessor.java | 2 +-
src/org/torproject/ernie/db/Main.java | 8 -
src/org/torproject/ernie/db/TorperfProcessor.java | 346 --------------------
.../ernie/test/TorperfProcessorTest.java | 37 --
7 files changed, 4 insertions(+), 415 deletions(-)
diff --git a/config.template b/config.template
index 3477801..72cc411 100644
--- a/config.template
+++ b/config.template
@@ -32,12 +32,6 @@
## Relative path to directory to import bridge descriptor snapshots from
#BridgeSnapshotsDirectory bridge-directories/
#
-## Import torperf data, if available, and write stats to disk
-#ImportWriteTorperfStats 0
-#
-## Relative path to directory to import torperf results from
-#TorperfDirectory torperf/
-#
## Download and process GetTor stats
#DownloadProcessGetTorStats 0
#
diff --git a/src/org/torproject/ernie/db/Configuration.java b/src/org/torproject/ernie/db/Configuration.java
index 3554e23..4d75b59 100644
--- a/src/org/torproject/ernie/db/Configuration.java
+++ b/src/org/torproject/ernie/db/Configuration.java
@@ -30,8 +30,6 @@ public class Configuration {
private String sanitizedBridgesWriteDirectory = "sanitized-bridges/";
private boolean importBridgeSnapshots = false;
private String bridgeSnapshotsDirectory = "bridge-directories/";
- private boolean importWriteTorperfStats = false;
- private String torperfDirectory = "torperf/";
private boolean downloadRelayDescriptors = false;
private List<String> downloadFromDirectoryAuthorities = Arrays.asList(
"86.59.21.38,194.109.206.212,80.190.246.100:8180".split(","));
@@ -105,11 +103,6 @@ public class Configuration {
line.split(" ")[1]) != 0;
} else if (line.startsWith("BridgeSnapshotsDirectory")) {
this.bridgeSnapshotsDirectory = line.split(" ")[1];
- } else if (line.startsWith("ImportWriteTorperfStats")) {
- this.importWriteTorperfStats = Integer.parseInt(
- line.split(" ")[1]) != 0;
- } else if (line.startsWith("TorperfDirectory")) {
- this.torperfDirectory = line.split(" ")[1];
} else if (line.startsWith("DownloadRelayDescriptors")) {
this.downloadRelayDescriptors = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -165,9 +158,8 @@ public class Configuration {
/** Make some checks if configuration is valid. */
if (!this.importCachedRelayDescriptors &&
!this.importDirectoryArchives && !this.downloadRelayDescriptors &&
- !this.importBridgeSnapshots && !this.importWriteTorperfStats &&
- !this.downloadProcessGetTorStats && !this.downloadExitList &&
- !this.writeDirectoryArchives &&
+ !this.importBridgeSnapshots && !this.downloadProcessGetTorStats &&
+ !this.downloadExitList && !this.writeDirectoryArchives &&
!this.writeAggregateStatsDatabase &&
!this.writeSanitizedBridges) {
logger.warning("We have not been configured to read data from any "
@@ -252,12 +244,6 @@ public class Configuration {
public String getBridgeSnapshotsDirectory() {
return this.bridgeSnapshotsDirectory;
}
- public boolean getImportWriteTorperfStats() {
- return this.importWriteTorperfStats;
- }
- public String getTorperfDirectory() {
- return this.torperfDirectory;
- }
public boolean getDownloadRelayDescriptors() {
return this.downloadRelayDescriptors;
}
diff --git a/src/org/torproject/ernie/db/ExitListDownloader.java b/src/org/torproject/ernie/db/ExitListDownloader.java
index df7d87a..07d2cc4 100644
--- a/src/org/torproject/ernie/db/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/ExitListDownloader.java
@@ -10,7 +10,7 @@ import java.util.logging.*;
public class ExitListDownloader {
public ExitListDownloader() {
- Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
+ Logger logger = Logger.getLogger(ExitListDownloader.class.getName());
try {
logger.fine("Downloading exit list...");
String exitAddressesUrl =
diff --git a/src/org/torproject/ernie/db/GetTorProcessor.java b/src/org/torproject/ernie/db/GetTorProcessor.java
index 8c5687c..6d9a935 100644
--- a/src/org/torproject/ernie/db/GetTorProcessor.java
+++ b/src/org/torproject/ernie/db/GetTorProcessor.java
@@ -10,7 +10,7 @@ import java.util.logging.*;
public class GetTorProcessor {
public GetTorProcessor(String gettorStatsUrl, String connectionURL) {
- Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
+ Logger logger = Logger.getLogger(GetTorProcessor.class.getName());
String unparsed = null;
try {
logger.fine("Downloading gettor stats...");
diff --git a/src/org/torproject/ernie/db/Main.java b/src/org/torproject/ernie/db/Main.java
index 2dc0c86..e10b6c7 100644
--- a/src/org/torproject/ernie/db/Main.java
+++ b/src/org/torproject/ernie/db/Main.java
@@ -117,14 +117,6 @@ public class Main {
sbw = null;
}
- // Import and process torperf stats
- if (config.getImportWriteTorperfStats()) {
- new TorperfProcessor(new File(config.getTorperfDirectory()),
- statsDirectory,
- config.getWriteAggregateStatsDatabase() ?
- config.getRelayDescriptorDatabaseJDBC() : null);
- }
-
// Download and process GetTor stats
if (config.getDownloadProcessGetTorStats()) {
new GetTorProcessor(config.getGetTorStatsUrl(),
diff --git a/src/org/torproject/ernie/db/TorperfProcessor.java b/src/org/torproject/ernie/db/TorperfProcessor.java
deleted file mode 100644
index d6d2e53..0000000
--- a/src/org/torproject/ernie/db/TorperfProcessor.java
+++ /dev/null
@@ -1,346 +0,0 @@
-/* Copyright 2010 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db;
-
-import java.io.*;
-import java.sql.*;
-import java.text.*;
-import java.util.*;
-import java.util.logging.*;
-
-public class TorperfProcessor {
- public TorperfProcessor(File torperfDirectory, File statsDirectory,
- String connectionURL) {
-
- if (torperfDirectory == null || statsDirectory == null) {
- throw new IllegalArgumentException();
- }
-
- Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
- File rawFile = new File(statsDirectory, "torperf-raw");
- File statsFile = new File(statsDirectory, "torperf-stats");
- SortedMap<String, String> rawObs = new TreeMap<String, String>();
- SortedMap<String, String> stats = new TreeMap<String, String>();
- int addedRawObs = 0;
- try {
- if (rawFile.exists()) {
- logger.fine("Reading file " + rawFile.getAbsolutePath() + "...");
- BufferedReader br = new BufferedReader(new FileReader(rawFile));
- String line = br.readLine(); // ignore header
- while ((line = br.readLine()) != null) {
- if (line.split(",").length != 4) {
- logger.warning("Corrupt line in " + rawFile.getAbsolutePath()
- + "!");
- break;
- }
- String key = line.substring(0, line.lastIndexOf(","));
- rawObs.put(key, line);
- }
- br.close();
- logger.fine("Finished reading file " + rawFile.getAbsolutePath()
- + ".");
- }
- if (statsFile.exists()) {
- logger.fine("Reading file " + statsFile.getAbsolutePath()
- + "...");
- BufferedReader br = new BufferedReader(new FileReader(statsFile));
- String line = br.readLine(); // ignore header
- while ((line = br.readLine()) != null) {
- String key = line.split(",")[0] + "," + line.split(",")[1];
- stats.put(key, line);
- }
- br.close();
- logger.fine("Finished reading file " + statsFile.getAbsolutePath()
- + ".");
- }
- if (torperfDirectory.exists()) {
- logger.fine("Importing files in " + torperfDirectory + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(torperfDirectory);
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- } else {
- String source = pop.getName().substring(0,
- pop.getName().indexOf("."));
- String size = pop.getName().split("-")[1];
- long receivedBytes = 1L;
- if (pop.getName().endsWith("kb.data")) {
- receivedBytes *= 1024L;
- } else if (pop.getName().endsWith("mb.data")) {
- receivedBytes *= 1024L * 1024L;
- } else {
- // not a valid .data file
- continue;
- }
- receivedBytes *= Long.parseLong(size.substring(0,
- size.length() - "xb.data".length()));
- BufferedReader br = new BufferedReader(new FileReader(pop));
- String line = null;
- SimpleDateFormat formatter =
- new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss");
- formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
- while ((line = br.readLine()) != null) {
- String[] parts = line.split(" ");
- // remove defective lines as they occurred on gabelmoo
- if (parts.length == 20 && parts[0].length() == 10) {
- long startSec = Long.parseLong(parts[0]);
- String dateTime = formatter.format(startSec * 1000L);
- long completeMillis = Long.parseLong(parts[16])
- * 1000L + Long.parseLong(parts[17]) / 1000L
- - Long.parseLong(parts[0]) * 1000L
- + Long.parseLong(parts[1]) / 1000L;
- String key = source + "," + dateTime;
- String value = key;
- if (parts[16].equals("0")) {
- value += ",-2"; // -2 for timeout
- } else if (Long.parseLong(parts[19]) < receivedBytes) {
- value += ",-1"; // -1 for failure
- } else {
- value += "," + completeMillis;
- }
- if (!rawObs.containsKey(key)) {
- rawObs.put(key, value);
- addedRawObs++;
- }
- }
- }
- br.close();
- }
- }
- logger.fine("Finished importing files in " + torperfDirectory
- + "/.");
- }
- if (rawObs.size() > 0) {
- logger.fine("Writing file " + rawFile.getAbsolutePath() + "...");
- rawFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile));
- bw.append("source,date,start,completemillis\n");
- String tempSourceDate = null;
- Iterator<Map.Entry<String, String>> it =
- rawObs.entrySet().iterator();
- List<Long> dlTimes = new ArrayList<Long>();
- boolean haveWrittenFinalLine = false;
- SortedMap<String, List<Long>> dlTimesAllSources =
- new TreeMap<String, List<Long>>();
- SortedMap<String, long[]> statusesAllSources =
- new TreeMap<String, long[]>();
- long failures = 0, timeouts = 0, requests = 0;
- while (it.hasNext() || !haveWrittenFinalLine) {
- Map.Entry<String, String> next = it.hasNext() ? it.next() : null;
- if (tempSourceDate != null
- && (next == null || !(next.getValue().split(",")[0] + ","
- + next.getValue().split(",")[1]).equals(tempSourceDate))) {
- if (dlTimes.size() > 4) {
- Collections.sort(dlTimes);
- long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
- long md = dlTimes.get(dlTimes.size() / 2 - 1);
- long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
- stats.put(tempSourceDate, tempSourceDate + "," + q1 + ","
- + md + "," + q3 + "," + timeouts + "," + failures + ","
- + requests);
- String allSourceDate = "all" + tempSourceDate.substring(
- tempSourceDate.indexOf("-"));
- if (dlTimesAllSources.containsKey(allSourceDate)) {
- dlTimesAllSources.get(allSourceDate).addAll(dlTimes);
- } else {
- dlTimesAllSources.put(allSourceDate, dlTimes);
- }
- if (statusesAllSources.containsKey(allSourceDate)) {
- long[] status = statusesAllSources.get(allSourceDate);
- status[0] += timeouts;
- status[1] += failures;
- status[2] += requests;
- } else {
- long[] status = new long[3];
- status[0] = timeouts;
- status[1] = failures;
- status[2] = requests;
- statusesAllSources.put(allSourceDate, status);
- }
- }
- dlTimes = new ArrayList<Long>();
- failures = timeouts = requests = 0;
- if (next == null) {
- haveWrittenFinalLine = true;
- }
- }
- if (next != null) {
- bw.append(next.getValue() + "\n");
- String[] parts = next.getValue().split(",");
- tempSourceDate = parts[0] + "," + parts[1];
- long completeMillis = Long.parseLong(parts[3]);
- if (completeMillis == -2L) {
- timeouts++;
- } else if (completeMillis == -1L) {
- failures++;
- } else {
- dlTimes.add(Long.parseLong(parts[3]));
- }
- requests++;
- }
- }
- bw.close();
- for (Map.Entry<String, List<Long>> e :
- dlTimesAllSources.entrySet()) {
- String allSourceDate = e.getKey();
- dlTimes = e.getValue();
- Collections.sort(dlTimes);
- long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
- long md = dlTimes.get(dlTimes.size() / 2 - 1);
- long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
- long[] status = statusesAllSources.get(allSourceDate);
- timeouts = status[0];
- failures = status[1];
- requests = status[2];
- stats.put(allSourceDate, allSourceDate + "," + q1 + "," + md
- + "," + q3 + "," + timeouts + "," + failures + ","
- + requests);
- }
- logger.fine("Finished writing file " + rawFile.getAbsolutePath()
- + ".");
- }
- if (stats.size() > 0) {
- logger.fine("Writing file " + statsFile.getAbsolutePath()
- + "...");
- statsFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile));
- bw.append("source,date,q1,md,q3,timeouts,failures,requests\n");
- for (String s : stats.values()) {
- bw.append(s + "\n");
- }
- bw.close();
- logger.fine("Finished writing file " + statsFile.getAbsolutePath()
- + ".");
- }
- } catch (IOException e) {
- logger.log(Level.WARNING, "Failed writing "
- + rawFile.getAbsolutePath() + " or "
- + statsFile.getAbsolutePath() + "!", e);
- }
-
- /* Write stats. */
- StringBuilder dumpStats = new StringBuilder("Finished writing "
- + "statistics on torperf results.\nAdded " + addedRawObs
- + " new observations in this execution.\n"
- + "Last known obserations by source and file size are:");
- String lastSource = null;
- String lastLine = null;
- for (String s : rawObs.keySet()) {
- String[] parts = s.split(",");
- if (lastSource == null) {
- lastSource = parts[0];
- } else if (!parts[0].equals(lastSource)) {
- dumpStats.append("\n" + lastSource + " " + lastLine.split(",")[1]
- + " " + lastLine.split(",")[2]);
- lastSource = parts[0];
- }
- lastLine = s;
- }
- if (lastSource != null) {
- dumpStats.append("\n" + lastSource + " " + lastLine.split(",")[1]
- + " " + lastLine.split(",")[2]);
- }
- logger.info(dumpStats.toString());
-
- /* Write results to database. */
- if (connectionURL != null) {
- try {
- Map<String, String> insertRows = new HashMap<String, String>();
- insertRows.putAll(stats);
- Set<String> updateRows = new HashSet<String>();
- Connection conn = DriverManager.getConnection(connectionURL);
- conn.setAutoCommit(false);
- Statement statement = conn.createStatement();
- ResultSet rs = statement.executeQuery(
- "SELECT date, source, q1, md, q3, timeouts, failures, "
- + "requests FROM torperf_stats");
- while (rs.next()) {
- String date = rs.getDate(1).toString();
- String source = rs.getString(2);
- String key = source + "," + date;
- if (insertRows.containsKey(key)) {
- String insertRow = insertRows.remove(key);
- String[] newStats = insertRow.split(",");
- long newQ1 = Long.parseLong(newStats[2]);
- long newMd = Long.parseLong(newStats[3]);
- long newQ3 = Long.parseLong(newStats[4]);
- long newTimeouts = Long.parseLong(newStats[5]);
- long newFailures = Long.parseLong(newStats[6]);
- long newRequests = Long.parseLong(newStats[7]);
- long oldQ1 = rs.getLong(3);
- long oldMd = rs.getLong(4);
- long oldQ3 = rs.getLong(5);
- long oldTimeouts = rs.getLong(6);
- long oldFailures = rs.getLong(7);
- long oldRequests = rs.getLong(8);
- if (newQ1 != oldQ1 || newMd != oldMd || newQ3 != oldQ3 ||
- newTimeouts != oldTimeouts ||
- newFailures != oldFailures ||
- newRequests != oldRequests) {
- updateRows.add(insertRow);
- }
- }
- }
- PreparedStatement psU = conn.prepareStatement(
- "UPDATE torperf_stats SET q1 = ?, md = ?, q3 = ?, "
- + "timeouts = ?, failures = ?, requests = ? "
- + "WHERE date = ? AND source = ?");
- for (String row : updateRows) {
- String[] newStats = row.split(",");
- String source = newStats[0];
- java.sql.Date date = java.sql.Date.valueOf(newStats[1]);
- long q1 = Long.parseLong(newStats[2]);
- long md = Long.parseLong(newStats[3]);
- long q3 = Long.parseLong(newStats[4]);
- long timeouts = Long.parseLong(newStats[5]);
- long failures = Long.parseLong(newStats[6]);
- long requests = Long.parseLong(newStats[7]);
- psU.clearParameters();
- psU.setLong(1, q1);
- psU.setLong(2, md);
- psU.setLong(3, q3);
- psU.setLong(4, timeouts);
- psU.setLong(5, failures);
- psU.setLong(6, requests);
- psU.setDate(7, date);
- psU.setString(8, source);
- psU.executeUpdate();
- }
- PreparedStatement psI = conn.prepareStatement(
- "INSERT INTO torperf_stats (q1, md, q3, timeouts, failures, "
- + "requests, date, source) VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
- for (String row : insertRows.values()) {
- String[] newStats = row.split(",");
- String source = newStats[0];
- java.sql.Date date = java.sql.Date.valueOf(newStats[1]);
- long q1 = Long.parseLong(newStats[2]);
- long md = Long.parseLong(newStats[3]);
- long q3 = Long.parseLong(newStats[4]);
- long timeouts = Long.parseLong(newStats[5]);
- long failures = Long.parseLong(newStats[6]);
- long requests = Long.parseLong(newStats[7]);
- psI.clearParameters();
- psI.setLong(1, q1);
- psI.setLong(2, md);
- psI.setLong(3, q3);
- psI.setLong(4, timeouts);
- psI.setLong(5, failures);
- psI.setLong(6, requests);
- psI.setDate(7, date);
- psI.setString(8, source);
- psI.executeUpdate();
- }
- conn.commit();
- conn.close();
- } catch (SQLException e) {
- logger.log(Level.WARNING, "Failed to add torperf stats to "
- + "database.", e);
- }
- }
- }
-}
-
diff --git a/src/org/torproject/ernie/test/TorperfProcessorTest.java b/src/org/torproject/ernie/test/TorperfProcessorTest.java
deleted file mode 100644
index 015eec3..0000000
--- a/src/org/torproject/ernie/test/TorperfProcessorTest.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2011 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.test;
-
-import org.torproject.ernie.db.*;
-
-import java.io.*;
-
-import org.junit.*;
-import org.junit.rules.*;
-import static org.junit.Assert.*;
-
-public class TorperfProcessorTest {
-
- private File tempTorperfDirectory;
- private File tempStatsDirectory;
-
- @Rule
- public TemporaryFolder folder = new TemporaryFolder();
-
- @Before
- public void createTempDirectories() {
- this.tempTorperfDirectory = folder.newFolder("torperf");
- this.tempStatsDirectory = folder.newFolder("stats");
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testTorperfDirectoryNull() {
- new TorperfProcessor(null, this.tempStatsDirectory, null);
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testStatsDirectoryNull() {
- new TorperfProcessor(this.tempTorperfDirectory, null, null);
- }
-}
-
1
0

[metrics-web/master] Prepare torperf statistics as part of metrics-web.
by karsten@torproject.org 03 Mar '11
by karsten@torproject.org 03 Mar '11
03 Mar '11
commit e643794d9b2215bb6a1c9b2100c42af04f4b2552
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Mar 3 15:40:26 2011 +0100
Prepare torperf statistics as part of metrics-web.
---
build.xml | 12 +
config.template | 6 +
src/org/torproject/ernie/cron/Configuration.java | 13 +
src/org/torproject/ernie/cron/Main.java | 6 +
.../torproject/ernie/cron/TorperfProcessor.java | 346 ++++++++++++++++++++
5 files changed, 383 insertions(+), 0 deletions(-)
diff --git a/build.xml b/build.xml
index cdae4e1..bd06daf 100644
--- a/build.xml
+++ b/build.xml
@@ -51,6 +51,18 @@
</java>
</target>
+ <!-- Run unit tests. -->
+ <target name="test" depends="compile">
+ <junit haltonfailure="true" printsummary="off">
+ <classpath refid="classpath"/>
+ <formatter type="plain" usefile="false"/>
+ <batchtest>
+ <fileset dir="${classes}"
+ includes="**/*Test.class"/>
+ </batchtest>
+ </junit>
+ </target>
+
<!-- Create a .war file for deployment. -->
<target name="make-war"
depends="compile">
diff --git a/config.template b/config.template
index f2dc9cb..4a5588a 100644
--- a/config.template
+++ b/config.template
@@ -42,4 +42,10 @@
#
## Write bridge stats to disk
#WriteBridgeStats 0
+#
+## Import torperf data, if available, and write stats to disk
+#ImportWriteTorperfStats 0
+#
+## Relative path to directory to import torperf results from
+#TorperfDirectory torperf/
diff --git a/src/org/torproject/ernie/cron/Configuration.java b/src/org/torproject/ernie/cron/Configuration.java
index 818f9e9..dd09bae 100644
--- a/src/org/torproject/ernie/cron/Configuration.java
+++ b/src/org/torproject/ernie/cron/Configuration.java
@@ -26,6 +26,8 @@ public class Configuration {
private String relayDescriptorRawFilesDirectory = "pg-import/";
private boolean writeConsensusHealth = false;
private boolean writeBridgeStats = false;
+ private boolean importWriteTorperfStats = false;
+ private String torperfDirectory = "torperf/";
public Configuration() {
/* Initialize logger. */
@@ -75,6 +77,11 @@ public class Configuration {
} else if (line.startsWith("WriteBridgeStats")) {
this.writeBridgeStats = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("ImportWriteTorperfStats")) {
+ this.importWriteTorperfStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("TorperfDirectory")) {
+ this.torperfDirectory = line.split(" ")[1];
} else {
logger.severe("Configuration file contains unrecognized "
+ "configuration key in line '" + line + "'! Exiting!");
@@ -136,5 +143,11 @@ public class Configuration {
public boolean getWriteBridgeStats() {
return this.writeBridgeStats;
}
+ public boolean getImportWriteTorperfStats() {
+ return this.importWriteTorperfStats;
+ }
+ public String getTorperfDirectory() {
+ return this.torperfDirectory;
+ }
}
diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java
index 0551586..943d326 100644
--- a/src/org/torproject/ernie/cron/Main.java
+++ b/src/org/torproject/ernie/cron/Main.java
@@ -105,6 +105,12 @@ public class Main {
csfh = null;
}
+ // Import and process torperf stats
+ if (config.getImportWriteTorperfStats()) {
+ new TorperfProcessor(new File(config.getTorperfDirectory()),
+ statsDirectory, config.getRelayDescriptorDatabaseJDBC());
+ }
+
// Remove lock file
lf.releaseLock();
diff --git a/src/org/torproject/ernie/cron/TorperfProcessor.java b/src/org/torproject/ernie/cron/TorperfProcessor.java
new file mode 100644
index 0000000..1883e9c
--- /dev/null
+++ b/src/org/torproject/ernie/cron/TorperfProcessor.java
@@ -0,0 +1,346 @@
+/* Copyright 2011 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.ernie.cron;
+
+import java.io.*;
+import java.sql.*;
+import java.text.*;
+import java.util.*;
+import java.util.logging.*;
+
+public class TorperfProcessor {
+ public TorperfProcessor(File torperfDirectory, File statsDirectory,
+ String connectionURL) {
+
+ if (torperfDirectory == null || statsDirectory == null) {
+ throw new IllegalArgumentException();
+ }
+
+ Logger logger = Logger.getLogger(TorperfProcessor.class.getName());
+ File rawFile = new File(statsDirectory, "torperf-raw");
+ File statsFile = new File(statsDirectory, "torperf-stats");
+ SortedMap<String, String> rawObs = new TreeMap<String, String>();
+ SortedMap<String, String> stats = new TreeMap<String, String>();
+ int addedRawObs = 0;
+ try {
+ if (rawFile.exists()) {
+ logger.fine("Reading file " + rawFile.getAbsolutePath() + "...");
+ BufferedReader br = new BufferedReader(new FileReader(rawFile));
+ String line = br.readLine(); // ignore header
+ while ((line = br.readLine()) != null) {
+ if (line.split(",").length != 4) {
+ logger.warning("Corrupt line in " + rawFile.getAbsolutePath()
+ + "!");
+ break;
+ }
+ String key = line.substring(0, line.lastIndexOf(","));
+ rawObs.put(key, line);
+ }
+ br.close();
+ logger.fine("Finished reading file " + rawFile.getAbsolutePath()
+ + ".");
+ }
+ if (statsFile.exists()) {
+ logger.fine("Reading file " + statsFile.getAbsolutePath()
+ + "...");
+ BufferedReader br = new BufferedReader(new FileReader(statsFile));
+ String line = br.readLine(); // ignore header
+ while ((line = br.readLine()) != null) {
+ String key = line.split(",")[0] + "," + line.split(",")[1];
+ stats.put(key, line);
+ }
+ br.close();
+ logger.fine("Finished reading file " + statsFile.getAbsolutePath()
+ + ".");
+ }
+ if (torperfDirectory.exists()) {
+ logger.fine("Importing files in " + torperfDirectory + "/...");
+ Stack<File> filesInInputDir = new Stack<File>();
+ filesInInputDir.add(torperfDirectory);
+ while (!filesInInputDir.isEmpty()) {
+ File pop = filesInInputDir.pop();
+ if (pop.isDirectory()) {
+ for (File f : pop.listFiles()) {
+ filesInInputDir.add(f);
+ }
+ } else {
+ String source = pop.getName().substring(0,
+ pop.getName().indexOf("."));
+ String size = pop.getName().split("-")[1];
+ long receivedBytes = 1L;
+ if (pop.getName().endsWith("kb.data")) {
+ receivedBytes *= 1024L;
+ } else if (pop.getName().endsWith("mb.data")) {
+ receivedBytes *= 1024L * 1024L;
+ } else {
+ // not a valid .data file
+ continue;
+ }
+ receivedBytes *= Long.parseLong(size.substring(0,
+ size.length() - "xb.data".length()));
+ BufferedReader br = new BufferedReader(new FileReader(pop));
+ String line = null;
+ SimpleDateFormat formatter =
+ new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ // remove defective lines as they occurred on gabelmoo
+ if (parts.length == 20 && parts[0].length() == 10) {
+ long startSec = Long.parseLong(parts[0]);
+ String dateTime = formatter.format(startSec * 1000L);
+ long completeMillis = Long.parseLong(parts[16])
+ * 1000L + Long.parseLong(parts[17]) / 1000L
+ - Long.parseLong(parts[0]) * 1000L
+ + Long.parseLong(parts[1]) / 1000L;
+ String key = source + "," + dateTime;
+ String value = key;
+ if (parts[16].equals("0")) {
+ value += ",-2"; // -2 for timeout
+ } else if (Long.parseLong(parts[19]) < receivedBytes) {
+ value += ",-1"; // -1 for failure
+ } else {
+ value += "," + completeMillis;
+ }
+ if (!rawObs.containsKey(key)) {
+ rawObs.put(key, value);
+ addedRawObs++;
+ }
+ }
+ }
+ br.close();
+ }
+ }
+ logger.fine("Finished importing files in " + torperfDirectory
+ + "/.");
+ }
+ if (rawObs.size() > 0) {
+ logger.fine("Writing file " + rawFile.getAbsolutePath() + "...");
+ rawFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(rawFile));
+ bw.append("source,date,start,completemillis\n");
+ String tempSourceDate = null;
+ Iterator<Map.Entry<String, String>> it =
+ rawObs.entrySet().iterator();
+ List<Long> dlTimes = new ArrayList<Long>();
+ boolean haveWrittenFinalLine = false;
+ SortedMap<String, List<Long>> dlTimesAllSources =
+ new TreeMap<String, List<Long>>();
+ SortedMap<String, long[]> statusesAllSources =
+ new TreeMap<String, long[]>();
+ long failures = 0, timeouts = 0, requests = 0;
+ while (it.hasNext() || !haveWrittenFinalLine) {
+ Map.Entry<String, String> next = it.hasNext() ? it.next() : null;
+ if (tempSourceDate != null
+ && (next == null || !(next.getValue().split(",")[0] + ","
+ + next.getValue().split(",")[1]).equals(tempSourceDate))) {
+ if (dlTimes.size() > 4) {
+ Collections.sort(dlTimes);
+ long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
+ long md = dlTimes.get(dlTimes.size() / 2 - 1);
+ long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
+ stats.put(tempSourceDate, tempSourceDate + "," + q1 + ","
+ + md + "," + q3 + "," + timeouts + "," + failures + ","
+ + requests);
+ String allSourceDate = "all" + tempSourceDate.substring(
+ tempSourceDate.indexOf("-"));
+ if (dlTimesAllSources.containsKey(allSourceDate)) {
+ dlTimesAllSources.get(allSourceDate).addAll(dlTimes);
+ } else {
+ dlTimesAllSources.put(allSourceDate, dlTimes);
+ }
+ if (statusesAllSources.containsKey(allSourceDate)) {
+ long[] status = statusesAllSources.get(allSourceDate);
+ status[0] += timeouts;
+ status[1] += failures;
+ status[2] += requests;
+ } else {
+ long[] status = new long[3];
+ status[0] = timeouts;
+ status[1] = failures;
+ status[2] = requests;
+ statusesAllSources.put(allSourceDate, status);
+ }
+ }
+ dlTimes = new ArrayList<Long>();
+ failures = timeouts = requests = 0;
+ if (next == null) {
+ haveWrittenFinalLine = true;
+ }
+ }
+ if (next != null) {
+ bw.append(next.getValue() + "\n");
+ String[] parts = next.getValue().split(",");
+ tempSourceDate = parts[0] + "," + parts[1];
+ long completeMillis = Long.parseLong(parts[3]);
+ if (completeMillis == -2L) {
+ timeouts++;
+ } else if (completeMillis == -1L) {
+ failures++;
+ } else {
+ dlTimes.add(Long.parseLong(parts[3]));
+ }
+ requests++;
+ }
+ }
+ bw.close();
+ for (Map.Entry<String, List<Long>> e :
+ dlTimesAllSources.entrySet()) {
+ String allSourceDate = e.getKey();
+ dlTimes = e.getValue();
+ Collections.sort(dlTimes);
+ long q1 = dlTimes.get(dlTimes.size() / 4 - 1);
+ long md = dlTimes.get(dlTimes.size() / 2 - 1);
+ long q3 = dlTimes.get(dlTimes.size() * 3 / 4 - 1);
+ long[] status = statusesAllSources.get(allSourceDate);
+ timeouts = status[0];
+ failures = status[1];
+ requests = status[2];
+ stats.put(allSourceDate, allSourceDate + "," + q1 + "," + md
+ + "," + q3 + "," + timeouts + "," + failures + ","
+ + requests);
+ }
+ logger.fine("Finished writing file " + rawFile.getAbsolutePath()
+ + ".");
+ }
+ if (stats.size() > 0) {
+ logger.fine("Writing file " + statsFile.getAbsolutePath()
+ + "...");
+ statsFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(statsFile));
+ bw.append("source,date,q1,md,q3,timeouts,failures,requests\n");
+ for (String s : stats.values()) {
+ bw.append(s + "\n");
+ }
+ bw.close();
+ logger.fine("Finished writing file " + statsFile.getAbsolutePath()
+ + ".");
+ }
+ } catch (IOException e) {
+ logger.log(Level.WARNING, "Failed writing "
+ + rawFile.getAbsolutePath() + " or "
+ + statsFile.getAbsolutePath() + "!", e);
+ }
+
+ /* Write stats. */
+ StringBuilder dumpStats = new StringBuilder("Finished writing "
+ + "statistics on torperf results.\nAdded " + addedRawObs
+ + " new observations in this execution.\n"
+ + "Last known obserations by source and file size are:");
+ String lastSource = null;
+ String lastLine = null;
+ for (String s : rawObs.keySet()) {
+ String[] parts = s.split(",");
+ if (lastSource == null) {
+ lastSource = parts[0];
+ } else if (!parts[0].equals(lastSource)) {
+ dumpStats.append("\n" + lastSource + " " + lastLine.split(",")[1]
+ + " " + lastLine.split(",")[2]);
+ lastSource = parts[0];
+ }
+ lastLine = s;
+ }
+ if (lastSource != null) {
+ dumpStats.append("\n" + lastSource + " " + lastLine.split(",")[1]
+ + " " + lastLine.split(",")[2]);
+ }
+ logger.info(dumpStats.toString());
+
+ /* Write results to database. */
+ if (connectionURL != null) {
+ try {
+ Map<String, String> insertRows = new HashMap<String, String>();
+ insertRows.putAll(stats);
+ Set<String> updateRows = new HashSet<String>();
+ Connection conn = DriverManager.getConnection(connectionURL);
+ conn.setAutoCommit(false);
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(
+ "SELECT date, source, q1, md, q3, timeouts, failures, "
+ + "requests FROM torperf_stats");
+ while (rs.next()) {
+ String date = rs.getDate(1).toString();
+ String source = rs.getString(2);
+ String key = source + "," + date;
+ if (insertRows.containsKey(key)) {
+ String insertRow = insertRows.remove(key);
+ String[] newStats = insertRow.split(",");
+ long newQ1 = Long.parseLong(newStats[2]);
+ long newMd = Long.parseLong(newStats[3]);
+ long newQ3 = Long.parseLong(newStats[4]);
+ long newTimeouts = Long.parseLong(newStats[5]);
+ long newFailures = Long.parseLong(newStats[6]);
+ long newRequests = Long.parseLong(newStats[7]);
+ long oldQ1 = rs.getLong(3);
+ long oldMd = rs.getLong(4);
+ long oldQ3 = rs.getLong(5);
+ long oldTimeouts = rs.getLong(6);
+ long oldFailures = rs.getLong(7);
+ long oldRequests = rs.getLong(8);
+ if (newQ1 != oldQ1 || newMd != oldMd || newQ3 != oldQ3 ||
+ newTimeouts != oldTimeouts ||
+ newFailures != oldFailures ||
+ newRequests != oldRequests) {
+ updateRows.add(insertRow);
+ }
+ }
+ }
+ PreparedStatement psU = conn.prepareStatement(
+ "UPDATE torperf_stats SET q1 = ?, md = ?, q3 = ?, "
+ + "timeouts = ?, failures = ?, requests = ? "
+ + "WHERE date = ? AND source = ?");
+ for (String row : updateRows) {
+ String[] newStats = row.split(",");
+ String source = newStats[0];
+ java.sql.Date date = java.sql.Date.valueOf(newStats[1]);
+ long q1 = Long.parseLong(newStats[2]);
+ long md = Long.parseLong(newStats[3]);
+ long q3 = Long.parseLong(newStats[4]);
+ long timeouts = Long.parseLong(newStats[5]);
+ long failures = Long.parseLong(newStats[6]);
+ long requests = Long.parseLong(newStats[7]);
+ psU.clearParameters();
+ psU.setLong(1, q1);
+ psU.setLong(2, md);
+ psU.setLong(3, q3);
+ psU.setLong(4, timeouts);
+ psU.setLong(5, failures);
+ psU.setLong(6, requests);
+ psU.setDate(7, date);
+ psU.setString(8, source);
+ psU.executeUpdate();
+ }
+ PreparedStatement psI = conn.prepareStatement(
+ "INSERT INTO torperf_stats (q1, md, q3, timeouts, failures, "
+ + "requests, date, source) VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
+ for (String row : insertRows.values()) {
+ String[] newStats = row.split(",");
+ String source = newStats[0];
+ java.sql.Date date = java.sql.Date.valueOf(newStats[1]);
+ long q1 = Long.parseLong(newStats[2]);
+ long md = Long.parseLong(newStats[3]);
+ long q3 = Long.parseLong(newStats[4]);
+ long timeouts = Long.parseLong(newStats[5]);
+ long failures = Long.parseLong(newStats[6]);
+ long requests = Long.parseLong(newStats[7]);
+ psI.clearParameters();
+ psI.setLong(1, q1);
+ psI.setLong(2, md);
+ psI.setLong(3, q3);
+ psI.setLong(4, timeouts);
+ psI.setLong(5, failures);
+ psI.setLong(6, requests);
+ psI.setDate(7, date);
+ psI.setString(8, source);
+ psI.executeUpdate();
+ }
+ conn.commit();
+ conn.close();
+ } catch (SQLException e) {
+ logger.log(Level.WARNING, "Failed to add torperf stats to "
+ + "database.", e);
+ }
+ }
+ }
+}
+
1
0

r24298: {translation} added a comment for vidalia (translation/trunk/documentation)
by Runa Sandvik 03 Mar '11
by Runa Sandvik 03 Mar '11
03 Mar '11
Author: runa
Date: 2011-03-03 14:46:42 +0000 (Thu, 03 Mar 2011)
New Revision: 24298
Modified:
translation/trunk/documentation/howto.txt
Log:
added a comment for vidalia
Modified: translation/trunk/documentation/howto.txt
===================================================================
--- translation/trunk/documentation/howto.txt 2011-03-03 14:26:06 UTC (rev 24297)
+++ translation/trunk/documentation/howto.txt 2011-03-03 14:46:42 UTC (rev 24298)
@@ -235,6 +235,12 @@
1. Converting Translations
# TODO: document how to convert translations to a useful format.
+ #
+ # This is all handled automatically by the build process using
+ # some conversion tools and CMake macros written by Matt Edman.
+ # See src/tools/{po2ts,ts2po,po2nsh,po2wxl} and the
+ # VIDALIA_UPDATE_PO, VIDALIA_ADD_PO, VIDALIA_ADD_NSH and
+ # VIDALIA_ADD_WXL macros in src/cmake/VidaliaMacros.cmake.
2. Creating/Updating Translation Template Files
1
0