[tor-commits] [metrics-web/release] Cut off too recent dates from several CSV files.

karsten at torproject.org karsten at torproject.org
Wed May 30 13:45:12 UTC 2018


commit dc50c9ec1e58698d7a97553203ccd6c28c043f67
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Mar 6 17:27:15 2018 +0100

    Cut off too recent dates from several CSV files.
    
    With this patch we're cutting off the last (1) day from:
     - servers.csv (2 -> 1)
     - ipv6servers.csv (2 -> 1)
    
    Before this patch we already cut off the last (1) day from:
     - advbwdist.csv (1 -> 1)
    
    With this patch we're cutting off the last three (3) days from:
     - bandwidth.csv (4 -> 3)
    
    With this patch we're cutting off the last two (2) days from:
     - clients.csv (0 -> 2)
     - userstats-combined.csv (0 -> 2)
     - connbidirect2.csv (0 -> 2)
     - torperf-1.1.csv (0 -> 2)
    
    With the previous patch we started cutting off the last two (2) days
    from:
     - hidserv.csv (0 -> 2)
    
    In a future patch we might start cutting off days from:
     - webstats.csv (0 -> ?)
    
    Fixes more of #25196.
---
 .../metrics/stats/connbidirect/Main.java           |  5 +++
 src/main/sql/clients/init-userstats.sql            |  6 ++++
 src/main/sql/ipv6servers/init-ipv6servers.sql      |  2 +-
 src/main/sql/legacy/tordir.sql                     | 36 +++++++++++-----------
 src/main/sql/onionperf/init-onionperf.sql          |  2 ++
 5 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
index 1911472..4474966 100644
--- a/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/connbidirect/Main.java
@@ -429,6 +429,8 @@ public class Main {
       SortedSet<RawStat> rawStats) {
     DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
     dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    String yesterday = dateFormat.format(System.currentTimeMillis()
+        - ONE_DAY_IN_MILLIS);
     SortedMap<String, List<Short>> fractionsByDateAndDirection
         = new TreeMap<>();
     final String[] directions = new String[] { "read", "write", "both" };
@@ -436,6 +438,9 @@ public class Main {
       if (rawStat.fingerprint != null) {
         String date = dateFormat.format(rawStat.dateDays
             * ONE_DAY_IN_MILLIS);
+        if (date.compareTo(yesterday) >= 0) {
+          continue;
+        }
         short[] fractions = new short[] { rawStat.fractionRead,
             rawStat.fractionWrite, rawStat.fractionBoth };
         for (int i = 0; i < directions.length; i++) {
diff --git a/src/main/sql/clients/init-userstats.sql b/src/main/sql/clients/init-userstats.sql
index 733a516..38521f2 100644
--- a/src/main/sql/clients/init-userstats.sql
+++ b/src/main/sql/clients/init-userstats.sql
@@ -681,6 +681,9 @@ CREATE OR REPLACE VIEW estimated AS SELECT
   -- request statistics.
   WHERE a.frac BETWEEN 0.1 AND 1.0
 
+  -- Skip estimates that are as recent as yesterday or newer.
+  AND a.date < current_date - 1
+
   -- Order results.
   ORDER BY date DESC, node, version, transport, country;
 
@@ -749,6 +752,9 @@ CREATE OR REPLACE VIEW combined AS SELECT
   -- request statistics.
   WHERE a.frac BETWEEN 0.1 AND 1.0
 
+  -- Skip estimates that are as recent as yesterday or newer.
+  AND a.date < current_date - 1
+
   -- Order results.
   ORDER BY date DESC;
 
diff --git a/src/main/sql/ipv6servers/init-ipv6servers.sql b/src/main/sql/ipv6servers/init-ipv6servers.sql
index a5d56f3..8ed9372 100644
--- a/src/main/sql/ipv6servers/init-ipv6servers.sql
+++ b/src/main/sql/ipv6servers/init-ipv6servers.sql
@@ -99,7 +99,7 @@ WITH included_statuses AS (
   GROUP BY DATE(valid_after), server
   HAVING COUNT(status_id) >= 12
   AND DATE(valid_after)
-    < (SELECT MAX(DATE(valid_after)) FROM included_statuses) - 1
+    < (SELECT MAX(DATE(valid_after)) FROM included_statuses)
 )
 SELECT DATE(valid_after) AS valid_after_date, server,
   CASE WHEN server = 'relay' THEN guard_relay ELSE NULL END AS guard_relay,
diff --git a/src/main/sql/legacy/tordir.sql b/src/main/sql/legacy/tordir.sql
index e0ea651..5d254f6 100644
--- a/src/main/sql/legacy/tordir.sql
+++ b/src/main/sql/legacy/tordir.sql
@@ -720,7 +720,7 @@ CREATE VIEW stats_servers AS
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   NULL AS platform, TRUE AS ec2bridge, NULL AS relays,
   avg_running_ec2 AS bridges FROM bridge_network_size
-  WHERE date < current_date - 1)
+  WHERE date < current_date)
 UNION ALL
   (SELECT COALESCE(network_size.date, bridge_network_size.date) AS date,
   NULL AS flag, NULL AS country, NULL AS version, NULL AS platform,
@@ -729,57 +729,57 @@ UNION ALL
   FULL OUTER JOIN bridge_network_size
   ON network_size.date = bridge_network_size.date
   WHERE COALESCE(network_size.date, bridge_network_size.date) <
-  current_date - 1)
+  current_date)
 UNION ALL
   (SELECT date, 'Exit' AS flag, NULL AS country, NULL AS version,
   NULL AS platform, NULL AS ec2bridge, avg_exit AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date - 1)
+  NULL AS bridges FROM network_size WHERE date < current_date)
 UNION ALL
   (SELECT date, 'Guard' AS flag, NULL AS country, NULL AS version,
   NULL AS platform, NULL AS ec2bridge, avg_guard AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date - 1)
+  NULL AS bridges FROM network_size WHERE date < current_date)
 UNION ALL
   (SELECT date, 'Fast' AS flag, NULL AS country, NULL AS version,
   NULL AS platform, NULL AS ec2bridge, avg_fast AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date - 1)
+  NULL AS bridges FROM network_size WHERE date < current_date)
 UNION ALL
   (SELECT date, 'Stable' AS flag, NULL AS country, NULL AS version,
   NULL AS platform, NULL AS ec2bridge, avg_stable AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date - 1)
+  NULL AS bridges FROM network_size WHERE date < current_date)
 UNION ALL
   (SELECT date, 'HSDir' AS flag, NULL AS country, NULL AS version,
   NULL AS platform, NULL AS ec2bridge, avg_hsdir AS relays,
-  NULL AS bridges FROM network_size WHERE date < current_date - 1)
+  NULL AS bridges FROM network_size WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, CASE WHEN country != 'zz' THEN country
   ELSE '??' END AS country, NULL AS version, NULL AS platform,
   NULL AS ec2bridge, relays, NULL AS bridges FROM relay_countries
-  WHERE date < current_date - 1)
+  WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, version, NULL AS platform,
   NULL AS ec2bridge, relays, NULL AS bridges FROM relay_versions
-  WHERE date < current_date - 1)
+  WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   'Linux' AS platform, NULL AS ec2bridge, avg_linux AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date - 1)
+  NULL AS bridges FROM relay_platforms WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   'Darwin' AS platform, NULL AS ec2bridge, avg_darwin AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date - 1)
+  NULL AS bridges FROM relay_platforms WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   'BSD' AS platform, NULL AS ec2bridge, avg_bsd AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date - 1)
+  NULL AS bridges FROM relay_platforms WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   'Windows' AS platform, NULL AS ec2bridge, avg_windows AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date - 1)
+  NULL AS bridges FROM relay_platforms WHERE date < current_date)
 UNION ALL
   (SELECT date, NULL AS flag, NULL AS country, NULL AS version,
   'Other' AS platform, NULL AS ec2bridge, avg_other AS relays,
-  NULL AS bridges FROM relay_platforms WHERE date < current_date - 1)
-ORDER BY 1, 2, 3, 4, 5, 6;
+  NULL AS bridges FROM relay_platforms WHERE date < current_date)
+ORDER BY date, flag, country, version, platform, ec2bridge;
 
 -- View for exporting bandwidth statistics.
 CREATE VIEW stats_bandwidth AS
@@ -797,7 +797,7 @@ CREATE VIEW stats_bandwidth AS
   AND bandwidth_flags.isexit = bwhist_flags.isexit
   AND bandwidth_flags.isguard = bwhist_flags.isguard
   WHERE COALESCE(bandwidth_flags.date, bwhist_flags.date) <
-  current_date - 3)
+  current_date - 2)
 UNION ALL
   (SELECT COALESCE(total_bandwidth.date, total_bwhist.date, u.date)
   AS date, NULL AS isexit, NULL AS isguard,
@@ -818,6 +818,6 @@ UNION ALL
   AND bwp / bwd <= 3) u
   ON COALESCE(total_bandwidth.date, total_bwhist.date) = u.date
   WHERE COALESCE(total_bandwidth.date, total_bwhist.date, u.date) <
-  current_date - 3)
-ORDER BY 1, 2, 3;
+  current_date - 2)
+ORDER BY date, isexit, isguard;
 
diff --git a/src/main/sql/onionperf/init-onionperf.sql b/src/main/sql/onionperf/init-onionperf.sql
index 9207bcf..557eabb 100644
--- a/src/main/sql/onionperf/init-onionperf.sql
+++ b/src/main/sql/onionperf/init-onionperf.sql
@@ -69,6 +69,7 @@ SELECT DATE(start) AS date,
     AND readbytes < filesize THEN 1 ELSE NULL END) AS failures,
   COUNT(*) AS requests
 FROM measurements
+WHERE DATE(start) < current_date - 1
 GROUP BY date, filesize, source, server
 UNION
 SELECT DATE(start) AS date,
@@ -83,6 +84,7 @@ SELECT DATE(start) AS date,
     AND readbytes < filesize THEN 1 ELSE NULL END) AS failures,
   COUNT(*) AS requests
 FROM measurements
+WHERE DATE(start) < current_date - 1
 GROUP BY date, filesize, 3, server) sub
 ORDER BY date, filesize, source, server;
 





More information about the tor-commits mailing list