[tor-commits] [metrics-web/master] Remove large parts of tordir database.

karsten at torproject.org karsten at torproject.org
Sun Jun 15 19:42:36 UTC 2014


commit db79e4247c473b622b1b6b0e8049e0d0aa72c0e6
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sun Jun 15 17:43:07 2014 +0200

    Remove large parts of tordir database.
    
    Now that we don't need to serve relay-search requests anymore, we can
    remove large parts of the tordir database.  In particular,
    
     - delete server descriptors older than 14 days,
     - delete bandwidth history entries older than 14 days,
     - delete status entries older than 14 days (and undo splitting up that
       table into monthly sub tables), and
     - only store dirreq-stats values for all countries together.
---
 modules/legacy/db/tordir.sql                       |   81 +++++++-------------
 .../cron/RelayDescriptorDatabaseImporter.java      |    8 +-
 2 files changed, 31 insertions(+), 58 deletions(-)

diff --git a/modules/legacy/db/tordir.sql b/modules/legacy/db/tordir.sql
index 45b9862..f52e89b 100644
--- a/modules/legacy/db/tordir.sql
+++ b/modules/legacy/db/tordir.sql
@@ -1,4 +1,4 @@
--- Copyright 2010 The Tor Project
+-- Copyright 2010, 2014 The Tor Project
 -- See LICENSE for licensing information
 
 CREATE LANGUAGE plpgsql;
@@ -22,6 +22,14 @@ CREATE TABLE descriptor (
     CONSTRAINT descriptor_pkey PRIMARY KEY (descriptor)
 );
 
+CREATE OR REPLACE FUNCTION delete_old_descriptor()
+RETURNS INTEGER AS $$
+    BEGIN
+    DELETE FROM descriptor WHERE DATE(published) < current_date - 14;
+    RETURN 1;
+    END;
+$$ LANGUAGE plpgsql;
+
 -- Contains bandwidth histories reported by relays in extra-info
 -- descriptors. Each row contains the reported bandwidth in 15-minute
 -- intervals for each relay and date.
@@ -41,6 +49,14 @@ CREATE TABLE bwhist (
 
 CREATE INDEX bwhist_date ON bwhist (date);
 
+CREATE OR REPLACE FUNCTION delete_old_bwhist()
+RETURNS INTEGER AS $$
+    BEGIN
+    DELETE FROM bwhist WHERE date < current_date - 14;
+    RETURN 1;
+    END;
+$$ LANGUAGE plpgsql;
+
 -- TABLE statusentry
 -- Contains all of the consensus entries published by the directories.
 -- Each statusentry references a valid descriptor.
@@ -73,59 +89,14 @@ CREATE TABLE statusentry (
     rawdesc BYTEA NOT NULL
 );
 
-CREATE OR REPLACE FUNCTION statusentry_insert_trigger()
-RETURNS TRIGGER AS $$
-
-DECLARE
-  tablename TEXT;
-  selectresult TEXT;
-  nextmonth TIMESTAMP WITHOUT TIME ZONE;
-  v_year INTEGER;
-  v_month INTEGER;
-  n_year INTEGER;
-  n_month INTEGER;
-
-BEGIN
-  v_year := extract(YEAR FROM NEW.validafter);
-  v_month := extract(MONTH FROM NEW.validafter);
-  tablename := 'statusentry_y' || v_year || 'm' ||
-      TO_CHAR(NEW.validafter, 'mm');
-  EXECUTE 'SELECT relname FROM pg_class WHERE relname = '''|| tablename ||
-    '''' INTO selectresult;
-  IF selectresult IS NULL THEN
-    nextmonth := new.validafter + interval '1 month';
-    n_year := extract(YEAR FROM nextmonth);
-    n_month := extract(MONTH FROM nextmonth);
-    EXECUTE 'CREATE TABLE ' || tablename ||
-      ' ( CHECK ( validafter >= ''' || v_year || '-' ||
-      TO_CHAR(NEW.validafter, 'mm') || '-01 00:00:00'' ' ||
-      'AND validafter < ''' || n_year || '-' ||
-      TO_CHAR(nextmonth, 'mm') ||
-      '-01 00:00:00'') ) INHERITS (statusentry)';
-    EXECUTE 'ALTER TABLE ' || tablename || ' ADD CONSTRAINT ' ||
-      tablename || '_pkey PRIMARY KEY (validafter, fingerprint)';
-    EXECUTE 'CREATE INDEX ' || tablename || '_address ON ' ||
-      tablename || ' (address)';
-    EXECUTE 'CREATE INDEX ' || tablename || '_fingerprint ON ' ||
-      tablename || ' (fingerprint)';
-    EXECUTE 'CREATE INDEX ' || tablename || '_nickname ON ' ||
-      tablename || ' (LOWER(nickname))';
-    EXECUTE 'CREATE INDEX ' || tablename || '_validafter ON ' ||
-      tablename || ' (validafter)';
-    EXECUTE 'CREATE INDEX ' || tablename || '_descriptor ON ' ||
-      tablename || ' (descriptor)';
-    EXECUTE 'CREATE INDEX ' || tablename || '_validafter_date ON ' ||
-      tablename || ' (DATE(validafter))';
-  END IF;
-  EXECUTE 'INSERT INTO ' || tablename || ' SELECT ($1).*' USING NEW;
-  RETURN NULL;
-END;
+CREATE OR REPLACE FUNCTION delete_old_statusentry()
+RETURNS INTEGER AS $$
+    BEGIN
+    DELETE FROM statusentry WHERE DATE(validafter) < current_date - 14;
+    RETURN 1;
+    END;
 $$ LANGUAGE plpgsql;
 
-CREATE TRIGGER insert_statusentry_trigger
-  BEFORE INSERT ON statusentry
-  FOR EACH ROW EXECUTE PROCEDURE statusentry_insert_trigger();
-
 -- TABLE consensus
 -- Contains all of the consensuses published by the directories.
 CREATE TABLE consensus (
@@ -878,6 +849,12 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
     PERFORM refresh_user_stats();
     RAISE NOTICE '% Deleting processed dates.', timeofday();
     DELETE FROM scheduled_updates WHERE id IN (SELECT id FROM updates);
+    RAISE NOTICE '% Deleting old descriptors.', timeofday();
+    PERFORM delete_old_descriptor();
+    RAISE NOTICE '% Deleting old bandwidth histories.', timeofday();
+    PERFORM delete_old_bwhist();
+    RAISE NOTICE '% Deleting old status entries.', timeofday();
+    PERFORM delete_old_statusentry();
     RAISE NOTICE '% Terminating refresh run.', timeofday();
   RETURN 1;
   END;
diff --git a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
index a51092e..b5c55c8 100644
--- a/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
+++ b/modules/legacy/src/org/torproject/ernie/cron/RelayDescriptorDatabaseImporter.java
@@ -968,12 +968,8 @@ public final class RelayDescriptorDatabaseImporter {
     if (descriptor.getDirreqV3Reqs() != null) {
       int allUsers = 0;
       Map<String, String> obs = new HashMap<String, String>();
-      for (Map.Entry<String, Integer> e :
-          descriptor.getDirreqV3Reqs().entrySet()) {
-        String country = e.getKey();
-        int users = e.getValue() - 4;
-        allUsers += users;
-        obs.put(country, "" + users);
+      for (int users : descriptor.getDirreqV3Reqs().values()) {
+        allUsers += users - 4;
       }
       obs.put("zy", "" + allUsers);
       this.addDirReqStats(descriptor.getFingerprint(),



More information about the tor-commits mailing list