commit db79e4247c473b622b1b6b0e8049e0d0aa72c0e6 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sun Jun 15 17:43:07 2014 +0200
Remove large parts of tordir database.
Now that we don't need to serve relay-search requests anymore, we can remove large parts of the tordir database. In particular,
- delete server descriptors older than 14 days, - delete bandwidth history entries older than 14 days, - delete status entries older than 14 days (and undo splitting up that table into monthly sub tables), and - only store dirreq-stats values for all countries together. --- modules/legacy/db/tordir.sql | 81 +++++++------------- .../cron/RelayDescriptorDatabaseImporter.java | 8 +- 2 files changed, 31 insertions(+), 58 deletions(-)
diff --git a/modules/legacy/db/tordir.sql b/modules/legacy/db/tordir.sql index 45b9862..f52e89b 100644 --- a/modules/legacy/db/tordir.sql +++ b/modules/legacy/db/tordir.sql @@ -1,4 +1,4 @@ --- Copyright 2010 The Tor Project +-- Copyright 2010, 2014 The Tor Project -- See LICENSE for licensing information
CREATE LANGUAGE plpgsql; @@ -22,6 +22,14 @@ CREATE TABLE descriptor ( CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor) );
+CREATE OR REPLACE FUNCTION delete_old_descriptor() +RETURNS INTEGER AS $$ + BEGIN + DELETE FROM descriptor WHERE DATE(published) < current_date - 14; + RETURN 1; + END; +$$ LANGUAGE plpgsql; + -- Contains bandwidth histories reported by relays in extra-info -- descriptors. Each row contains the reported bandwidth in 15-minute -- intervals for each relay and date. @@ -41,6 +49,14 @@ CREATE TABLE bwhist (
CREATE INDEX bwhist_date ON bwhist (date);
+CREATE OR REPLACE FUNCTION delete_old_bwhist() +RETURNS INTEGER AS $$ + BEGIN + DELETE FROM bwhist WHERE date < current_date - 14; + RETURN 1; + END; +$$ LANGUAGE plpgsql; + -- TABLE statusentry -- Contains all of the consensus entries published by the directories. -- Each statusentry references a valid descriptor. @@ -73,59 +89,14 @@ CREATE TABLE statusentry ( rawdesc BYTEA NOT NULL );
-CREATE OR REPLACE FUNCTION statusentry_insert_trigger() -RETURNS TRIGGER AS $$ - -DECLARE - tablename TEXT; - selectresult TEXT; - nextmonth TIMESTAMP WITHOUT TIME ZONE; - v_year INTEGER; - v_month INTEGER; - n_year INTEGER; - n_month INTEGER; - -BEGIN - v_year := extract(YEAR FROM NEW.validafter); - v_month := extract(MONTH FROM NEW.validafter); - tablename := 'statusentry_y' || v_year || 'm' || - TO_CHAR(NEW.validafter, 'mm'); - EXECUTE 'SELECT relname FROM pg_class WHERE relname = '''|| tablename || - '''' INTO selectresult; - IF selectresult IS NULL THEN - nextmonth := new.validafter + interval '1 month'; - n_year := extract(YEAR FROM nextmonth); - n_month := extract(MONTH FROM nextmonth); - EXECUTE 'CREATE TABLE ' || tablename || - ' ( CHECK ( validafter >= ''' || v_year || '-' || - TO_CHAR(NEW.validafter, 'mm') || '-01 00:00:00'' ' || - 'AND validafter < ''' || n_year || '-' || - TO_CHAR(nextmonth, 'mm') || - '-01 00:00:00'') ) INHERITS (statusentry)'; - EXECUTE 'ALTER TABLE ' || tablename || ' ADD CONSTRAINT ' || - tablename || '_pkey PRIMARY KEY (validafter, fingerprint)'; - EXECUTE 'CREATE INDEX ' || tablename || '_address ON ' || - tablename || ' (address)'; - EXECUTE 'CREATE INDEX ' || tablename || '_fingerprint ON ' || - tablename || ' (fingerprint)'; - EXECUTE 'CREATE INDEX ' || tablename || '_nickname ON ' || - tablename || ' (LOWER(nickname))'; - EXECUTE 'CREATE INDEX ' || tablename || '_validafter ON ' || - tablename || ' (validafter)'; - EXECUTE 'CREATE INDEX ' || tablename || '_descriptor ON ' || - tablename || ' (descriptor)'; - EXECUTE 'CREATE INDEX ' || tablename || '_validafter_date ON ' || - tablename || ' (DATE(validafter))'; - END IF; - EXECUTE 'INSERT INTO ' || tablename || ' SELECT ($1).*' USING NEW; - RETURN NULL; -END; +CREATE OR REPLACE FUNCTION delete_old_statusentry() +RETURNS INTEGER AS $$ + BEGIN + DELETE FROM statusentry WHERE DATE(validafter) < current_date - 14; + RETURN 1; + END; $$ LANGUAGE plpgsql;
-CREATE TRIGGER insert_statusentry_trigger - BEFORE INSERT ON statusentry - FOR EACH ROW EXECUTE PROCEDURE statusentry_insert_trigger(); - -- TABLE consensus -- Contains all of the consensuses published by the directories. CREATE TABLE consensus ( @@ -878,6 +849,12 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$ PERFORM refresh_user_stats(); RAISE NOTICE '% Deleting processed dates.', timeofday(); DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates); + RAISE NOTICE '% Deleting old descriptors.', timeofday(); + PERFORM delete_old_descriptor(); + RAISE NOTICE '% Deleting old bandwidth histories.', timeofday(); + PERFORM delete_old_bwhist(); + RAISE NOTICE '% Deleting old status entries.', timeofday(); + PERFORM delete_old_statusentry(); RAISE NOTICE '% Terminating refresh run.', timeofday(); RETURN 1; END; diff --git a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java index a51092e..b5c55c8 100644 --- a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java +++ b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java @@ -968,12 +968,8 @@ public final class RelayDescriptorDatabaseImporter { if (descriptor.getDirreqV3Reqs() != null) { int allUsers = 0; Map<String, String> obs = new HashMap<String, String>(); - for (Map.Entry<String, Integer> e : - descriptor.getDirreqV3Reqs().entrySet()) { - String country = e.getKey(); - int users = e.getValue() - 4; - allUsers += users; - obs.put(country, "" + users); + for (int users : descriptor.getDirreqV3Reqs().values()) { + allUsers += users - 4; } obs.put("zy", "" + allUsers); this.addDirReqStats(descriptor.getFingerprint(),