tor-commits
Threads by month
- ----- 2025 -----
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
November 2019
- 20 participants
- 2924 discussions
09 Nov '19
commit b605298c66c8c348fe589062dc1ddd3da293c8db
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Dec 10 15:17:46 2018 +0100
Rewrite advbwdist's aggregate.R in Java.
This is yet another step torwards making the daily update Java-only.
Implements #28801.
---
build.xml | 17 --------
src/main/R/advbwdist/aggregate.R | 25 ------------
.../torproject/metrics/stats/advbwdist/Main.java | 46 ++++++++++++++++++++++
3 files changed, 46 insertions(+), 42 deletions(-)
diff --git a/build.xml b/build.xml
index 93eda7b..42965bf 100644
--- a/build.xml
+++ b/build.xml
@@ -347,9 +347,6 @@
<target name="advbwdist">
<property name="module.name" value="advbwdist" />
<antcall target="run-java" />
- <antcall target="run-R" >
- <param name="module.Rscript" value="aggregate.R" />
- </antcall>
</target>
<target name="hidserv" >
@@ -453,20 +450,6 @@
<echo message="Java module ${module.name} finished. " />
</target>
- <target name="run-R" >
- <echo message="Running R module ${module.name}, script ${module.Rscript} ... " />
- <property name="Rscript"
- value="${Rsources}/${module.name}/${module.Rscript}" />
- <exec executable="R"
- dir="${modulebase}/${module.name}"
- failonerror="true" >
- <arg value="--slave"/>
- <arg value="-f"/>
- <arg value="${Rscript}" />
- </exec>
- <echo message="R module ${module.name}, script ${module.Rscript} finished. " />
- </target>
-
<!-- The following line adds the common targets and properties
for Metrics' Java Projects.
-->
diff --git a/src/main/R/advbwdist/aggregate.R b/src/main/R/advbwdist/aggregate.R
deleted file mode 100644
index 1c67dff..0000000
--- a/src/main/R/advbwdist/aggregate.R
+++ /dev/null
@@ -1,25 +0,0 @@
-require(reshape)
-t <- read.csv("stats/advbwdist-validafter.csv",
- colClasses = c("character", "logical", "integer", "integer", "integer"),
- stringsAsFactors = FALSE)
-
-currSysDate <- paste(Sys.Date() - 1, "23:59:59")
-t <- t[t$valid_after < currSysDate, ]
-t$date <- as.factor(substr(t$valid_after, 1, 10))
-t$isexit <- !is.na(t$isexit)
-t$relay <- ifelse(is.na(t$relay), -1, t$relay)
-t$percentile <- ifelse(is.na(t$percentile), -1, t$percentile)
-
-t <- aggregate(list(advbw = t$advbw), by = list(date = t$date,
- isexit = t$isexit, relay = t$relay, percentile = t$percentile),
- FUN = median)
-
-t$isexit <- ifelse(t$isexit, "t", "")
-t$relay <- ifelse(t$relay < 0, NA, t$relay)
-t$percentile <- ifelse(t$percentile < 0, NA, t$percentile)
-t$advbw <- floor(t$advbw)
-
-t <- t[order(t$date, t$isexit, t$relay, t$percentile), ]
-
-write.csv(t, "stats/advbwdist.csv", quote = FALSE, row.names = FALSE, na = "")
-
diff --git a/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java b/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java
index 7216581..6c4f4ac 100644
--- a/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/advbwdist/Main.java
@@ -10,15 +10,19 @@ import org.torproject.descriptor.NetworkStatusEntry;
import org.torproject.descriptor.RelayNetworkStatusConsensus;
import org.torproject.descriptor.ServerDescriptor;
+import org.apache.commons.math3.stat.descriptive.rank.Median;
import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
+import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -138,6 +142,39 @@ public class Main {
}
descriptorReader.saveHistoryFile(historyFile);
bw.close();
+
+ /* Aggregate statistics. */
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ String today = dateFormat.format(new Date());
+ SortedMap<String, List<Long>> preAggregatedValues = new TreeMap<>();
+ try (BufferedReader br = new BufferedReader(new FileReader(resultsFile))) {
+ br.readLine(); /* Skip header. */
+ String line;
+ while (null != (line = br.readLine())) {
+ String[] parts = line.split(",");
+ String date = parts[0].substring(0, 10);
+ if (date.compareTo(today) >= 0) {
+ continue;
+ }
+ String isExit = parts[1].equals("TRUE") ? "t" : "";
+ String keyWithoutTime = String.format("%s,%s,%s,%s",
+ date, isExit, parts[2], parts[3]);
+ long value = Long.parseLong(parts[4]);
+ preAggregatedValues.putIfAbsent(keyWithoutTime, new ArrayList<>());
+ preAggregatedValues.get(keyWithoutTime).add(value);
+ }
+ }
+ File aggregateResultsFile = new File("stats/advbwdist.csv");
+ aggregateResultsFile.getParentFile().mkdirs();
+ try (BufferedWriter bw2 = new BufferedWriter(
+ new FileWriter(aggregateResultsFile))) {
+ bw2.write("date,isexit,relay,percentile,advbw\n");
+ for (Map.Entry<String, List<Long>> e : preAggregatedValues.entrySet()) {
+ bw2.write(String.format("%s,%.0f%n", e.getKey(),
+ computeMedian(e.getValue())));
+ }
+ }
}
/** Compute percentiles (between 0 and 100) for the given list of values, and
@@ -168,5 +205,14 @@ public class Main {
}
return computedPercentiles;
}
+
+ /** Return the median for the given list of values, or <code>Double.NaN</code>
+ * if the given list is empty. */
+ static double computeMedian(List<Long> valueList) {
+ Median median = new Median()
+ .withEstimationType(Percentile.EstimationType.R_7);
+ median.setData(valueList.stream().mapToDouble(Long::doubleValue).toArray());
+ return Math.floor(median.evaluate());
+ }
}
1
0
commit c8a3414347c8df7aed3e63be4c704f1dd43aded4
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Jan 7 09:59:18 2019 +0100
Stop hard-coding versions.
---
src/main/R/rserver/graphs.R | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 03b5b93..d3ea90a 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -381,18 +381,18 @@ write_networksize <- function(start_p = NULL, end_p = NULL, path_p) {
}
prepare_versions <- function(start_p, end_p) {
- read.csv(paste(stats_dir, "versions.csv", sep = ""),
- colClasses = c("date" = "Date")) %>%
+ read_csv(paste(stats_dir, "versions.csv", sep = ""),
+ col_types = cols(
+ date = col_date(format = ""),
+ version = col_character(),
+ relays = col_double())) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
}
plot_versions <- function(start_p, end_p, path_p) {
s <- prepare_versions(start_p, end_p)
- known_versions <- c("Other", "0.1.0", "0.1.1", "0.1.2", "0.2.0",
- "0.2.1", "0.2.2", "0.2.3", "0.2.4", "0.2.5", "0.2.6", "0.2.7",
- "0.2.8", "0.2.9", "0.3.0", "0.3.1", "0.3.2", "0.3.3", "0.3.4",
- "0.3.5")
+ known_versions <- unique(s$version)
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
colours <- data.frame(breaks = known_versions,
values = rep(brewer.pal(min(12, length(known_versions)), "Paired"),
1
0
[metrics-web/release] Update news.json to version 307 of doc/MetricsTimeline.
by karsten@torproject.org 09 Nov '19
by karsten@torproject.org 09 Nov '19
09 Nov '19
commit f6f0570819a8a2e05c3e22636b21e00629d50b8f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Jan 7 12:27:51 2019 +0100
Update news.json to version 307 of doc/MetricsTimeline.
---
src/main/resources/web/json/news.json | 202 +++++++++++++++++++++++++++++-----
1 file changed, 173 insertions(+), 29 deletions(-)
diff --git a/src/main/resources/web/json/news.json b/src/main/resources/web/json/news.json
index 9810770..26bf78a 100644
--- a/src/main/resources/web/json/news.json
+++ b/src/main/resources/web/json/news.json
@@ -2489,6 +2489,15 @@
"target" : "https://en.wikipedia.org/wiki/Hurricane_Maria#Puerto_Rico_3"
} ]
}, {
+ "start" : "2017-10-04",
+ "protocols" : [ "fte" ],
+ "short_description" : "Permanent hardware failure of default FTE bridge 128.105.214.161:8080.",
+ "description" : "Permanent hardware failure of default FTE bridge 128.105.214.161:8080.",
+ "links" : [ {
+ "label" : "comment",
+ "target" : "https://bugs.torproject.org/28521#comment:2"
+ } ]
+}, {
"start" : "2017-10-05",
"protocols" : [ "ipv4", "ipv6" ],
"short_description" : "geoip and geoip6 databases updated to \"October 4 2017 Maxmind GeoLite2 Country\"",
@@ -2534,6 +2543,9 @@
"links" : [ {
"label" : "wikipedia",
"target" : "https://en.wikipedia.org/wiki/19th_National_Congress_of_the_Communist_Party…"
+ }, {
+ "label" : "Psiphon users",
+ "target" : "https://media.ccc.de/v/35c3-9964-cat_mouse_evading_the_censors_in_2018#t=20…"
} ]
}, {
"start" : "2017-10-25",
@@ -2754,6 +2766,9 @@
}, {
"label" : "tweet",
"target" : "https://twitter.com/nusenu_/status/948914485045145601"
+ }, {
+ "label" : "Psiphon users",
+ "target" : "https://media.ccc.de/v/35c3-9964-cat_mouse_evading_the_censors_in_2018#t=17…"
} ]
}, {
"start" : "2018-01-01",
@@ -3045,6 +3060,16 @@
"target" : "https://metrics.torproject.org/userstats-bridge-country.html?start=2018-03-…"
} ]
}, {
+ "start" : "2018-04-20",
+ "end" : "2018-04-27",
+ "protocols" : [ "onion" ],
+ "short_description" : "The number of v2 onion services increases from 70k to 120k.",
+ "description" : "The number of v2 onion services increases from 70k to 120k.",
+ "links" : [ {
+ "label" : "ticket",
+ "target" : "https://bugs.torproject.org/26081"
+ } ]
+}, {
"start" : "2018-04-28",
"places" : [ "ru" ],
"short_description" : "Russia unblocks about 3 million IP addresses belonging to Amazon and OVH.",
@@ -3074,6 +3099,9 @@
}, {
"label" : "bridge graph",
"target" : "https://metrics.torproject.org/userstats-bridge-country.html?start=2018-03-…"
+ }, {
+ "label" : "Psiphon users",
+ "target" : "https://media.ccc.de/v/35c3-9964-cat_mouse_evading_the_censors_in_2018#t=19…"
} ]
}, {
"start" : "2018-05-08",
@@ -3169,6 +3197,16 @@
"target" : "https://www.accessnow.org/venezuela-blocks-tor/"
} ]
}, {
+ "start" : "2018-06-24",
+ "end" : "2018-07-06",
+ "protocols" : [ "snowflake" ],
+ "short_description" : "Metrics for Snowflake are missing, for unknown reasons",
+ "description" : "Metrics for Snowflake are missing, for unknown reasons. The gap is nearly contemporaneous with the gap the measurements for all transports/bridges caused by Bifroest–Serge bridge authority switchover, but starts about 2 weeks earlier in Snowflake's case.",
+ "links" : [ {
+ "label" : "ticket",
+ "target" : "https://bugs.torproject.org/26783"
+ } ]
+}, {
"start" : "2018-06-27",
"protocols" : [ "meek" ],
"short_description" : "Release of Tor Browser 8.0a9 with non-working meek.",
@@ -3198,6 +3236,9 @@
}, {
"label" : "AllAfrica article",
"target" : "http://allafrica.com/stories/201807040129.html"
+ }, {
+ "label" : "OONI report",
+ "target" : "https://ooni.io/post/uganda-social-media-tax/"
} ]
}, {
"start" : "2018-07-04",
@@ -3253,6 +3294,25 @@
} ]
}, {
"start" : "2018-07-14",
+ "end" : "2018-07-25",
+ "places" : [ "iq" ],
+ "short_description" : "Protests, Internet shutdowns, and social media blocks in Iraq.",
+ "description" : "Protests, Internet shutdowns, and social media blocks in Iraq.",
+ "links" : [ {
+ "label" : "relay graph",
+ "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2018-06-0…"
+ }, {
+ "label" : "NetBlocks post",
+ "target" : "https://netblocks.org/reports/study-shows-extent-of-iraq-internet-shutdown-…"
+ }, {
+ "label" : "NRT article about end",
+ "target" : "http://www.nrttv.com/EN/News.aspx?id=2810&MapID=1"
+ }, {
+ "label" : "Psiphon users",
+ "target" : "https://media.ccc.de/v/35c3-9964-cat_mouse_evading_the_censors_in_2018#t=17…"
+ } ]
+}, {
+ "start" : "2018-07-14",
"protocols" : [ "bridge" ],
"short_description" : "Release of Tor 0.2.9.16, 0.3.2.11, 0.3.3.9, and 0.3.4.5-rc",
"description" : "Release of Tor 0.2.9.16, 0.3.2.11, 0.3.3.9, and 0.3.4.5-rc. Switches bridge authority from Bifroest to <a href=\"https://metrics.torproject.org/rs.html#details/BA44A889E64B93FAA2B114E02C2A…">Serge</a>. The number of bridges begins counting up from zero as bridges are upgraded. The estimated number of bridge users remained unavailable until 2018-07-21 because of the discontinuity.",
@@ -3302,6 +3362,9 @@
}, {
"label" : "Daily Star article on throttling",
"target" : "https://www.thedailystar.net/country/bangladesh-mobile-internet-speed-broug…"
+ }, {
+ "label" : "NetBlocks report",
+ "target" : "https://netblocks.org/reports/bangladesh-internet-shutdown-student-protests…"
} ]
}, {
"start" : "2018-08-04",
@@ -3316,6 +3379,95 @@
"target" : "https://lists.torproject.org/pipermail/tor-relays/2018-August/015850.html"
} ]
}, {
+ "start" : "2018-09-26",
+ "ongoing" : true,
+ "protocols" : [ "fte" ],
+ "short_description" : "Outage of default FTE bridge 128.105.214.162:8080.",
+ "description" : "Outage of default FTE bridge 128.105.214.162:8080.",
+ "links" : [ {
+ "label" : "comment",
+ "target" : "https://bugs.torproject.org/28521#comment:3"
+ } ]
+}, {
+ "start" : "2018-09-26",
+ "ongoing" : true,
+ "protocols" : [ "fte" ],
+ "short_description" : "Outage of default FTE bridge 128.105.214.163:8080.",
+ "description" : "Outage of default FTE bridge 128.105.214.163:8080.",
+ "links" : [ {
+ "label" : "comment",
+ "target" : "https://bugs.torproject.org/28521#comment:3"
+ } ]
+}, {
+ "start" : "2018-10-25",
+ "end" : "2018-10-28",
+ "protocols" : [ "relay" ],
+ "short_description" : "Gap in observed number of relay users, caused by the estimated fraction of reporting relays exceeding 100%.",
+ "description" : "Gap in observed number of relay users, caused by the estimated fraction of reporting relays exceeding 100%.",
+ "links" : [ {
+ "label" : "metrics-team post",
+ "target" : "https://lists.torproject.org/pipermail/metrics-team/2018-November/000936.ht…"
+ }, {
+ "label" : "ticket",
+ "target" : "https://bugs.torproject.org/28305"
+ }, {
+ "label" : "graph",
+ "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2018-10-0…"
+ }, {
+ "label" : "archived graph",
+ "target" : "https://web.archive.org/web/20181104023227/https://metrics.torproject.org/u…"
+ } ]
+}, {
+ "start" : "2018-11-16",
+ "end" : "2018-11-22",
+ "protocols" : [ "snowflake" ],
+ "short_description" : "A full disk stops the Snowflake bridge and fallback proxies from working.",
+ "description" : "A full disk stops the Snowflake bridge and fallback proxies from working.",
+ "links" : [ {
+ "label" : "ticket",
+ "target" : "https://bugs.torproject.org/28390"
+ } ]
+}, {
+ "start" : "2018-11-19",
+ "end" : "2018-11-27",
+ "short_description" : "Slow-running processes on the metrics host cause an observed drop in the overall relay bandwidth graph.",
+ "description" : "Slow-running processes on the metrics host cause an observed drop in the overall relay bandwidth graph.",
+ "links" : [ {
+ "label" : "mailing list thread",
+ "target" : "https://lists.torproject.org/pipermail/metrics-team/2018-December/000971.ht…"
+ } ]
+}, {
+ "start" : "2018-11-26",
+ "end" : "2018-11-28",
+ "short_description" : "Outage of the onionperf-us instance, caused by a Greenhost east coast VPS migration.",
+ "description" : "Outage of the onionperf-us instance, caused by a Greenhost east coast VPS migration.",
+ "links" : [ {
+ "label" : "mailing list post",
+ "target" : "https://lists.torproject.org/pipermail/metrics-team/2018-November/000967.ht…"
+ } ]
+}, {
+ "start" : "2018-11-26",
+ "end" : "2018-11-28",
+ "protocols" : [ "flashproxy" ],
+ "short_description" : "Outage of the flash proxy badge hosting server, flashproxy.bamsoftware.com, caused by a Greenhost east coast VPS migration.",
+ "description" : "Outage of the flash proxy badge hosting server, flashproxy.bamsoftware.com, caused by a Greenhost east coast VPS migration."
+}, {
+ "start" : "2018-12-20",
+ "ongoing" : true,
+ "places" : [ "sd" ],
+ "short_description" : "Protests and social media blocks in Sudan.",
+ "description" : "Protests and social media blocks in Sudan.",
+ "links" : [ {
+ "label" : "relay graph",
+ "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2018-11-1…"
+ }, {
+ "label" : "Access Now post",
+ "target" : "https://www.accessnow.org/amid-countrywide-protest-sudan-shuts-down-social-…"
+ }, {
+ "label" : "Psiphon users",
+ "target" : "https://media.ccc.de/v/35c3-9964-cat_mouse_evading_the_censors_in_2018#t=16…"
+ } ]
+}, {
"start" : "2016-02-24",
"places" : [ "tm" ],
"protocols" : [ "<OR>" ],
@@ -3420,8 +3572,8 @@
"end" : "2017-03-01",
"places" : [ "ae" ],
"protocols" : [ "<OR>", "relay" ],
- "short_description" : "Huge increase in relay users",
- "description" : "Huge increase in relay users (400k+). An anonymous contributor suggests that it may be a botnet, based on the large number of hosts with an open SMB port in the UAE.",
+ "short_description" : "Huge spike in relay users",
+ "description" : "Huge spike in relay users (400k+). An anonymous contributor suggests that it may be a botnet, based on the large number of hosts with an open SMB port in the UAE.",
"links" : [ {
"label" : "graph",
"target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2017-01-0…"
@@ -3480,14 +3632,17 @@
"unknown" : true
}, {
"start" : "2017-03-01",
- "end" : "2017-07-01",
+ "end" : "2018-11-12",
"places" : [ "ae" ],
"protocols" : [ "<OR>", "relay" ],
- "short_description" : "Another increase in relay users, with a slower rate of growth than the previous one.",
- "description" : "Another increase in relay users, with a slower rate of growth than the previous one.",
+ "short_description" : "Another increase in relay users, slower and more sustained than the previous one.",
+ "description" : "Another increase in relay users, slower and more sustained than the previous one.",
"links" : [ {
"label" : "graph",
- "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2017-01-0…"
+ "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2016-07-0…"
+ }, {
+ "label" : "ticket about end",
+ "target" : "https://bugs.torproject.org/28898#comment:2"
} ],
"unknown" : true
}, {
@@ -3609,18 +3764,6 @@
} ],
"unknown" : true
}, {
- "start" : "2017-07-01",
- "end" : "2017-08-30",
- "places" : [ "ae" ],
- "protocols" : [ "<OR>", "relay" ],
- "short_description" : "Slow increase in relay users.",
- "description" : "Slow increase in relay users.",
- "links" : [ {
- "label" : "graph",
- "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2017-06-0…"
- } ],
- "unknown" : true
-}, {
"start" : "2017-07-15",
"end" : "2017-07-22",
"places" : [ "sc" ],
@@ -3786,17 +3929,6 @@
} ],
"unknown" : true
}, {
- "start" : "2017-09-01",
- "places" : [ "ae" ],
- "protocols" : [ "<OR>", "relay" ],
- "short_description" : "Relay users remain volatile but flatten their rate of growth.",
- "description" : "Relay users remain volatile but flatten their rate of growth.",
- "links" : [ {
- "label" : "graph",
- "target" : "https://metrics.torproject.org/userstats-relay-country.html?start=2017-06-0…"
- } ],
- "unknown" : true
-}, {
"start" : "2017-09-02",
"end" : "2017-09-21",
"protocols" : [ "obfs4" ],
@@ -3956,6 +4088,18 @@
} ],
"unknown" : true
}, {
+ "start" : "2018-05-22",
+ "end" : "2018-06-08",
+ "places" : [ "iq" ],
+ "protocols" : [ "obfs4" ],
+ "short_description" : "Increase in obfs4 users in Iraq, followed by a slow decrease",
+ "description" : "Increase in obfs4 users in Iraq, followed by a slow decrease. No matching change in relay users or other transports.",
+ "links" : [ {
+ "label" : "bridge graph",
+ "target" : "https://metrics.torproject.org/userstats-bridge-country.html?start=2018-04-…"
+ } ],
+ "unknown" : true
+}, {
"start" : "2018-07-28",
"ongoing" : true,
"places" : [ "tr" ],
1
0
[metrics-web/release] Remove two unused R files from censorship detector.
by karsten@torproject.org 09 Nov '19
by karsten@torproject.org 09 Nov '19
09 Nov '19
commit c0a18aab9092c57f107732cb2f97f034909e94d9
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Dec 20 14:09:24 2018 +0100
Remove two unused R files from censorship detector.
Still part of #21588.
---
src/main/R/clients/merge-clients.R | 19 -------------------
src/main/R/clients/userstats-detector.R | 18 ------------------
2 files changed, 37 deletions(-)
diff --git a/src/main/R/clients/merge-clients.R b/src/main/R/clients/merge-clients.R
deleted file mode 100644
index cce7e9d..0000000
--- a/src/main/R/clients/merge-clients.R
+++ /dev/null
@@ -1,19 +0,0 @@
-require(reshape)
-r <- read.csv("userstats-ranges.csv", stringsAsFactors = FALSE)
-r <- melt(r, id.vars = c("date", "country"))
-r <- data.frame(date = r$date, node = "relay", country = r$country,
- transport = "", version = "",
- variable = ifelse(r$variable == "maxusers", "upper", "lower"),
- value = floor(r$value))
-u <- read.csv("userstats.csv", stringsAsFactors = FALSE)
-u <- melt(u, id.vars = c("date", "node", "country", "transport",
- "version"))
-u <- data.frame(date = u$date, node = u$node, country = u$country,
- transport = u$transport, version = u$version,
- variable = ifelse(u$variable == "frac", "frac", "clients"),
- value = u$value)
-c <- rbind(r, u)
-c <- cast(c, date + node + country + transport + version ~ variable)
-c <- c[order(as.Date(c$date), c$node, c$country, c$transport, c$version), ]
-write.csv(c, "clients.csv", quote = FALSE, row.names = FALSE, na = "")
-
diff --git a/src/main/R/clients/userstats-detector.R b/src/main/R/clients/userstats-detector.R
deleted file mode 100644
index c3a9041..0000000
--- a/src/main/R/clients/userstats-detector.R
+++ /dev/null
@@ -1,18 +0,0 @@
-library("reshape")
-export_userstats_detector <- function(path) {
- c <- read.csv("userstats.csv", stringsAsFactors = FALSE)
- c <- c[c$country != '' & c$transport == '' & c$version == '' &
- c$node == 'relay', ]
- u <- data.frame(country = c$country, date = c$date, users = c$users,
- stringsAsFactors = FALSE)
- u <- rbind(u, data.frame(country = "zy",
- aggregate(list(users = u$users),
- by = list(date = u$date), sum)))
- u <- data.frame(date = u$date, country = u$country,
- users = floor(u$users))
- u <- cast(u, date ~ country, value = "users")
- names(u)[names(u) == "zy"] <- "all"
- write.csv(u, path, quote = FALSE, row.names = FALSE)
-}
-export_userstats_detector("userstats-detector.csv")
-
1
0
09 Nov '19
commit 87f922d4fd555804d4e80fdefd7968acce5f4433
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Dec 29 09:23:35 2018 +0100
Fix Traffic link on start page.
---
src/main/resources/web/jsps/index.jsp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/resources/web/jsps/index.jsp b/src/main/resources/web/jsps/index.jsp
index 3fa49b8..ec93792 100644
--- a/src/main/resources/web/jsps/index.jsp
+++ b/src/main/resources/web/jsps/index.jsp
@@ -42,7 +42,7 @@
</div>
<div class="col-sm-4">
- <a href="bandwidth.html"><i class="fa fa-road fa-fw fa-4x" aria-hidden="true"></i> <h3>Traffic</h3> <p>How much traffic the Tor network can handle and how much traffic there is.</p></a>
+ <a href="bandwidth-flags.html"><i class="fa fa-road fa-fw fa-4x" aria-hidden="true"></i> <h3>Traffic</h3> <p>How much traffic the Tor network can handle and how much traffic there is.</p></a>
</div>
<div class="col-sm-4">
1
0
[metrics-web/release] Properly skip previously imported webstats files.
by karsten@torproject.org 09 Nov '19
by karsten@torproject.org 09 Nov '19
09 Nov '19
commit 9bdb6d39fc7b0ac8e7327caeafabfac43a41689f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Jan 7 11:59:19 2019 +0100
Properly skip previously imported webstats files.
Turns out we never skipped previously imported webstats files due to
two bugs:
1. While building a list of previously imported webstats files we
reassembled their file names as ${server}_${site}_* rather than
${site}_${server}_* which was the file name format we chose in an
earlier version of the CollecTor module.
2. When checking whether a given webstats file already exists in the
database we compared the full file name to the reassembled file
name from the database with ${server} being truncated to 32
characters.
This commit fixes both bugs.
---
src/main/java/org/torproject/metrics/stats/webstats/Main.java | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/stats/webstats/Main.java b/src/main/java/org/torproject/metrics/stats/webstats/Main.java
index a154e64..fb0a903 100644
--- a/src/main/java/org/torproject/metrics/stats/webstats/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/webstats/Main.java
@@ -100,7 +100,7 @@ public class Main {
try (ResultSet rs = st.executeQuery(queryString)) {
while (rs.next()) {
importedLogFileUrls.add(String.format("%s_%s_access.log_%s.xz",
- rs.getString(1), rs.getString(2),
+ rs.getString(2), rs.getString(1),
rs.getDate(3).toLocalDate().format(dateFormat)));
}
}
@@ -111,13 +111,19 @@ public class Main {
static void importLogFiles(Connection connection, SortedSet<String> skipFiles,
File... inDirectories) {
+ DateTimeFormatter dateFormat = DateTimeFormatter.ofPattern("yyyyMMdd");
for (Descriptor descriptor : DescriptorSourceFactory
.createDescriptorReader().readDescriptors(inDirectories)) {
if (!(descriptor instanceof WebServerAccessLog)) {
continue;
}
WebServerAccessLog logFile = (WebServerAccessLog) descriptor;
- if (skipFiles.contains(logFile.getDescriptorFile().getName())) {
+ String logFileNameWithTruncatedParts = String.format(
+ "%s_%s_access.log_%s.xz",
+ truncateString(logFile.getVirtualHost(), 128),
+ truncateString(logFile.getPhysicalHost(), 32),
+ logFile.getLogDate().format(dateFormat));
+ if (skipFiles.contains(logFileNameWithTruncatedParts)) {
continue;
}
try {
1
0
09 Nov '19
commit f55e63d986ed9c1054ce19ff0d4a19b1c0bce26d
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Jan 10 09:54:39 2019 +0100
Split up huge plot_userstats function.
The mere size of this function made it hard to impossible to refactor
things to using more recent R packages dplyr and tidyr. Now there are
four plot_userstats_* functions with accompanying prepare_userstats_*
that make the corresponding write_userstats_* functions really small.
---
src/main/R/rserver/graphs.R | 269 +++++++++++++++++++-------------------------
1 file changed, 115 insertions(+), 154 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index d3ea90a..ba8862c 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -751,9 +751,9 @@ write_bandwidth_flags <- function(start_p = NULL, end_p = NULL, path_p) {
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
- events_p, path_p) {
- c <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
+prepare_userstats_relay_country <- function(start_p, end_p, country_p,
+ events_p) {
+ read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
node = col_character(),
@@ -763,97 +763,26 @@ plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
lower = col_double(),
upper = col_double(),
clients = col_double(),
- frac = col_skip()),
+ frac = col_double()),
na = character()) %>%
- filter(node == node_p)
- u <- c[c$date >= start_p & c$date <= end_p, c("date", "country", "transport",
- "version", "lower", "upper", "clients")]
- u <- rbind(u, data.frame(date = start_p,
- country = ifelse(variable_p == "country" & value_p != "all", value_p, ""),
- transport = ifelse(variable_p == "transport", value_p, ""),
- version = ifelse(variable_p == "version", value_p, ""),
- lower = 0, upper = 0, clients = 0))
- if (node_p == "relay") {
- if (value_p != "all") {
- u <- u[u$country == value_p, ]
- title <- paste("Directly connecting users from", countryname(value_p))
- } else {
- u <- u[u$country == "", ]
- title <- "Directly connecting users"
- }
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$country),
- FUN = sum)
- } else if (variable_p == "transport") {
- if ("!<OR>" %in% value_p) {
- n <- u[u$transport != "" & u$transport != "<OR>", ]
- n <- aggregate(list(lower = n$lower, upper = n$upper,
- clients = n$clients),
- by = list(date = n$date),
- FUN = sum)
- u <- rbind(u, data.frame(date = n$date,
- country = "", transport = "!<OR>",
- version = "", lower = n$lower,
- upper = n$upper, clients = n$clients))
- }
- if (length(value_p) > 1) {
- u <- u[u$transport %in% value_p, ]
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$transport),
- FUN = sum)
- title <- paste("Bridge users by transport")
- } else {
- u <- u[u$transport == value_p, ]
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$transport),
- FUN = sum)
- title <- paste("Bridge users using",
- ifelse(value_p == "<??>", "unknown pluggable transport(s)",
- ifelse(value_p == "<OR>", "default OR protocol",
- ifelse(value_p == "!<OR>", "any pluggable transport",
- ifelse(value_p == "fte", "FTE",
- ifelse(value_p == "websocket", "Flash proxy/websocket",
- paste("transport", value_p)))))))
- }
- } else if (variable_p == "version") {
- u <- u[u$version == value_p, ]
- title <- paste("Bridge users using IP", value_p, sep = "")
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$version),
- FUN = sum)
- } else {
- if (value_p != "all") {
- u <- u[u$country == value_p, ]
- title <- paste("Bridge users from", countryname(value_p))
- } else {
- u <- u[u$country == "" & u$transport == "" & u$version == "", ]
- title <- "Bridge users"
- }
- u <- aggregate(list(lower = u$lower, upper = u$upper,
- users = u$clients),
- by = list(date = as.Date(u$date, "%Y-%m-%d"),
- value = u$country),
- FUN = sum)
- }
- u <- merge(x = u, all.y = TRUE, y = data.frame(expand.grid(
- date = seq(from = as.Date(start_p, "%Y-%m-%d"),
- to = as.Date(end_p, "%Y-%m-%d"), by = "1 day"),
- value = ifelse(value_p == "all", "", value_p))))
- if (length(value_p) > 1) {
- plot <- ggplot(u, aes(x = date, y = users, colour = value))
- } else {
- plot <- ggplot(u, aes(x = date, y = users))
- }
+ filter(node == "relay") %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(country_p))
+ country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
+ filter(transport == "") %>%
+ filter(version == "") %>%
+ select(date, country, clients, lower, upper, frac) %>%
+ rename(users = clients)
+}
+
+plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
+ path_p) {
+ u <- prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
+ complete(date = full_seq(date, period = 1))
+ plot <- ggplot(u, aes(x = date, y = users))
if (length(na.omit(u$users)) > 0 & events_p != "off" &
- variable_p == "country" & length(value_p) == 1 && value_p != "all") {
+ country_p != "all") {
upturns <- u[u$users > u$upper, c("date", "users")]
downturns <- u[u$users < u$lower, c("date", "users")]
if (events_p == "on") {
@@ -875,69 +804,20 @@ plot_userstats <- function(start_p, end_p, node_p, variable_p, value_p,
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
- ggtitle(title) +
+ ggtitle(paste("Directly connecting users",
+ ifelse(country_p == "all", "",
+ paste(" from", countryname(country_p))), sep = "")) +
labs(caption = copyright_notice)
- if (length(value_p) > 1) {
- plot <- plot +
- scale_colour_hue(name = "", breaks = value_p,
- labels = ifelse(value_p == "<??>", "Unknown PT",
- ifelse(value_p == "<OR>", "Default OR protocol",
- ifelse(value_p == "!<OR>", "Any PT",
- ifelse(value_p == "fte", "FTE",
- ifelse(value_p == "websocket", "Flash proxy/websocket",
- value_p))))))
- }
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
- path_p) {
- plot_userstats(start_p, end_p, "relay", "country", country_p, events_p,
- path_p)
-}
-
-plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
- plot_userstats(start_p, end_p, "bridge", "country", country_p, "off", path_p)
-}
-
-plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
- path_p) {
- plot_userstats(start_p, end_p, "bridge", "transport", transport_p, "off",
- path_p)
-}
-
-plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
- plot_userstats(start_p, end_p, "bridge", "version", version_p, "off", path_p)
-}
-
write_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
country_p = NULL, events_p = NULL, path_p) {
- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
- col_types = cols(
- date = col_date(format = ""),
- node = col_character(),
- country = col_character(),
- transport = col_character(),
- version = col_character(),
- lower = col_double(),
- upper = col_double(),
- clients = col_double(),
- frac = col_double()),
- na = character()) %>%
- filter(node == "relay") %>%
- filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(country_p))
- country == ifelse(country_p == "all", "", country_p) else TRUE) %>%
- filter(transport == "") %>%
- filter(version == "") %>%
- select(date, country, clients, lower, upper, frac) %>%
- rename(users = clients) %>%
+ prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
+prepare_userstats_bridge_country <- function(start_p, end_p, country_p) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -958,12 +838,32 @@ write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
filter(transport == "") %>%
filter(version == "") %>%
select(date, country, clients, frac) %>%
- rename(users = clients) %>%
+ rename(users = clients)
+}
+
+plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
+ prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
+ complete(date = full_seq(date, period = 1)) %>%
+ ggplot(aes(x = date, y = users)) +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(paste("Bridge users",
+ ifelse(country_p == "all", "",
+ paste(" from", countryname(country_p))), sep = "")) +
+ labs(caption = copyright_notice)
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL, path_p) {
+ prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
- transport_p = NULL, path_p) {
+prepare_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
+ transport_p = NULL) {
u <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -992,15 +892,58 @@ write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
}
u %>%
filter(if (!is.null(transport_p)) transport %in% transport_p else TRUE) %>%
- group_by(date, transport) %>%
select(date, transport, clients, frac) %>%
rename(users = clients) %>%
- arrange(date, transport) %>%
+ arrange(date, transport)
+}
+
+plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
+ path_p) {
+ if (length(transport_p) > 1) {
+ title <- paste("Bridge users by transport")
+ } else {
+ title <- paste("Bridge users using",
+ ifelse(transport_p == "<??>", "unknown pluggable transport(s)",
+ ifelse(transport_p == "<OR>", "default OR protocol",
+ ifelse(transport_p == "!<OR>", "any pluggable transport",
+ ifelse(transport_p == "fte", "FTE",
+ ifelse(transport_p == "websocket", "Flash proxy/websocket",
+ paste("transport", transport_p)))))))
+ }
+ u <- prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
+ complete(date = full_seq(date, period = 1), nesting(transport))
+ if (length(transport_p) > 1) {
+ plot <- ggplot(u, aes(x = date, y = users, colour = transport))
+ } else {
+ plot <- ggplot(u, aes(x = date, y = users))
+ }
+ plot <- plot +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(title) +
+ labs(caption = copyright_notice)
+ if (length(transport_p) > 1) {
+ plot <- plot +
+ scale_colour_hue(name = "", breaks = transport_p,
+ labels = ifelse(transport_p == "<??>", "Unknown PT",
+ ifelse(transport_p == "<OR>", "Default OR protocol",
+ ifelse(transport_p == "!<OR>", "Any PT",
+ ifelse(transport_p == "fte", "FTE",
+ ifelse(transport_p == "websocket", "Flash proxy/websocket",
+ transport_p))))))
+ }
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
+ transport_p = NULL, path_p) {
+ prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
- version_p = NULL, path_p) {
+prepare_userstats_bridge_version <- function(start_p, end_p, version_p) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -1019,7 +962,25 @@ write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
filter(is.na(transport)) %>%
filter(if (!is.null(version_p)) version == version_p else TRUE) %>%
select(date, version, clients, frac) %>%
- rename(users = clients) %>%
+ rename(users = clients)
+}
+
+plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
+ prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
+ complete(date = full_seq(date, period = 1)) %>%
+ ggplot(aes(x = date, y = users)) +
+ geom_line() +
+ scale_x_date(name = "", breaks = custom_breaks,
+ labels = custom_labels, minor_breaks = custom_minor_breaks) +
+ scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
+ ggtitle(paste("Bridge users using IP", version_p, sep = "")) +
+ labs(caption = copyright_notice)
+ ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
+}
+
+write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
+ version_p = NULL, path_p) {
+ prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
1
0
09 Nov '19
commit 0d2f1e2afd5f4b9e5c533d256586bb03d7466d5f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Jan 10 15:39:04 2019 +0100
Make write_* functions obsolete.
In most cases these functions would call their prepare_* equivalents,
possibly tweak the result, and write it to a .csv file. This patch
moves all those tweaks to the prepare_* functions, possibly reverts
them in the plot_* functions, and makes the write_* functions
obsolete.
The result is not only less code. We're also going to find bugs in
written .csv files sooner, because the same code is now run for
writing graph files, and the latter happens much more often.
---
src/main/R/rserver/graphs.R | 414 +++++++--------------
.../torproject/metrics/web/RObjectGenerator.java | 2 +-
2 files changed, 140 insertions(+), 276 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 27f399d..82a51e7 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -348,10 +348,17 @@ robust_call <- function(wrappee, filename) {
})
}
+# Write the result of the given FUN, typically a prepare_ function, as .csv file
+# to the given path_p.
+write_data <- function(FUN, ..., path_p) {
+ FUN(...) %>%
+ write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
+}
+
# Disable readr's automatic progress bar.
options(readr.show_progress = FALSE)
-prepare_networksize <- function(start_p, end_p) {
+prepare_networksize <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "networksize.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -375,12 +382,7 @@ plot_networksize <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_networksize <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_networksize(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_versions <- function(start_p, end_p) {
+prepare_versions <- function(start_p = NULL, end_p = NULL) {
read_csv(paste(stats_dir, "versions.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -413,42 +415,34 @@ plot_versions <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_versions <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_versions(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_platforms <- function(start_p, end_p) {
+prepare_platforms <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "platforms.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ mutate(platform = tolower(platform)) %>%
+ spread(platform, relays)
}
plot_platforms <- function(start_p, end_p, path_p) {
prepare_platforms(start_p, end_p) %>%
+ gather(platform, relays, -date) %>%
ggplot(aes(x = date, y = relays, colour = platform)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
scale_colour_manual(name = "Platform",
- breaks = c("Linux", "macOS", "BSD", "Windows", "Other"),
- values = c("Linux" = "#56B4E9", "macOS" = "#333333", "BSD" = "#E69F00",
- "Windows" = "#0072B2", "Other" = "#009E73")) +
+ breaks = c("linux", "macos", "bsd", "windows", "other"),
+ labels = c("Linux", "macOS", "BSD", "Windows", "Other"),
+ values = c("linux" = "#56B4E9", "macos" = "#333333", "bsd" = "#E69F00",
+ "windows" = "#0072B2", "other" = "#009E73")) +
ggtitle("Relay platforms") +
labs(caption = copyright_notice)
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_platforms(start_p, end_p) %>%
- mutate(platform = tolower(platform)) %>%
- spread(platform, relays) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_dirbytes <- function(start_p, end_p, path_p) {
+prepare_dirbytes <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -478,12 +472,7 @@ plot_dirbytes <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_dirbytes <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_dirbytes(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_relayflags <- function(start_p, end_p, flag_p) {
+prepare_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL) {
read.csv(paste(stats_dir, "relayflags.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -507,13 +496,8 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL,
- path_p) {
- prepare_relayflags(start_p, end_p, flag_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
+prepare_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
+ filesize_p = NULL) {
read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
colClasses = c("date" = "Date", "source" = "character")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -528,7 +512,7 @@ prepare_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
}
plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
- prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
+ prepare_torperf(start_p, end_p, server_p, filesize_p) %>%
filter(source != "") %>%
complete(date = full_seq(date, period = 1), nesting(source)) %>%
ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
@@ -549,13 +533,8 @@ plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
- filesize_p = NULL, path_p) {
- prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_torperf_failures <- function(start_p, end_p, server_p, filesize_p) {
+prepare_torperf_failures <- function(start_p = NULL, end_p = NULL,
+ server_p = NULL, filesize_p = NULL) {
read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -593,24 +572,13 @@ plot_torperf_failures <- function(start_p, end_p, server_p, filesize_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_torperf_failures <- function(start_p = NULL, end_p = NULL,
- server_p = NULL, filesize_p = NULL, path_p) {
- prepare_torperf_failures(start_p, end_p, server_p, filesize_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_onionperf_buildtimes <- function(start_p, end_p) {
+prepare_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "buildtimes.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
}
-write_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_onionperf_buildtimes(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
prepare_onionperf_buildtimes(start_p, end_p) %>%
filter(source != "") %>%
@@ -634,20 +602,15 @@ plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-prepare_onionperf_latencies <- function(start_p, end_p, server_p) {
- read.csv(paste(stats_dir, "latencies.csv", sep = ""),
+prepare_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
+ server_p = NULL) {
+ read.csv(paste(stats_dir, "latencies.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
filter(if (!is.null(server_p)) server == server_p else TRUE)
}
-write_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
- server_p = NULL, path_p) {
- prepare_onionperf_latencies(start_p, end_p, server_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
prepare_onionperf_latencies(start_p, end_p, server_p) %>%
filter(source != "") %>%
@@ -667,21 +630,22 @@ plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-prepare_connbidirect <- function(start_p, end_p) {
+prepare_connbidirect <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "connbidirect2.csv", sep = ""),
colClasses = c("date" = "Date", "direction" = "factor")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
mutate(quantile = paste("X", quantile, sep = ""),
fraction = fraction / 100) %>%
- spread(quantile, fraction)
+ spread(quantile, fraction) %>%
+ rename(q1 = X0.25, md = X0.5, q3 = X0.75)
}
plot_connbidirect <- function(start_p, end_p, path_p) {
prepare_connbidirect(start_p, end_p) %>%
- ggplot(aes(x = date, y = X0.5, colour = direction)) +
+ ggplot(aes(x = date, y = md, colour = direction)) +
geom_line(size = 0.75) +
- geom_ribbon(aes(x = date, ymin = X0.25, ymax = X0.75,
+ geom_ribbon(aes(x = date, ymin = q1, ymax = q3,
fill = direction), alpha = 0.5, show.legend = FALSE) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -700,13 +664,7 @@ plot_connbidirect <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_connbidirect <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_connbidirect(start_p, end_p) %>%
- rename(q1 = X0.25, md = X0.5, q3 = X0.75) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_bandwidth_flags <- function(start_p, end_p) {
+prepare_bandwidth_flags <- function(start_p = NULL, end_p = NULL) {
advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
transmute(date, have_guard_flag = isguard, have_exit_flag = isexit,
@@ -719,11 +677,13 @@ prepare_bandwidth_flags <- function(start_p, end_p) {
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
filter(have_exit_flag != "") %>%
- filter(have_guard_flag != "")
+ filter(have_guard_flag != "") %>%
+ spread(variable, value)
}
plot_bandwidth_flags <- function(start_p, end_p, path_p) {
prepare_bandwidth_flags(start_p, end_p) %>%
+ gather(variable, value, c(advbw, bwhist)) %>%
unite(flags, have_guard_flag, have_exit_flag) %>%
mutate(flags = factor(flags, levels = c("f_t", "t_t", "t_f", "f_f"),
labels = c("Exit only", "Guard and Exit", "Guard only",
@@ -745,14 +705,8 @@ plot_bandwidth_flags <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_bandwidth_flags <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_bandwidth_flags(start_p, end_p) %>%
- spread(variable, value) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_relay_country <- function(start_p, end_p, country_p,
- events_p) {
+prepare_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL, events_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -811,13 +765,8 @@ plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, events_p = NULL, path_p) {
- prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_country <- function(start_p, end_p, country_p) {
+prepare_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -856,12 +805,6 @@ plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
- prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
prepare_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
transport_p = NULL) {
u <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
@@ -937,13 +880,8 @@ plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
- transport_p = NULL, path_p) {
- prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_version <- function(start_p, end_p, version_p) {
+prepare_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
+ version_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -978,27 +916,28 @@ plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
- version_p = NULL, path_p) {
- prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_combined <- function(start_p, end_p, country_p) {
- read_csv(file = paste(stats_dir, "userstats-combined.csv", sep = ""),
- col_types = cols(
- date = col_date(format = ""),
- node = col_skip(),
- country = col_character(),
- transport = col_character(),
- version = col_skip(),
- frac = col_double(),
- low = col_double(),
- high = col_double()),
- na = character()) %>%
- filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(country_p)) country == country_p else TRUE)
+prepare_userstats_bridge_combined <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL) {
+ if (!is.null(country_p) && country_p == "all") {
+ prepare_userstats_bridge_country(start_p, end_p, country_p)
+ } else {
+ read_csv(file = paste(stats_dir, "userstats-combined.csv", sep = ""),
+ col_types = cols(
+ date = col_date(format = ""),
+ node = col_skip(),
+ country = col_character(),
+ transport = col_character(),
+ version = col_skip(),
+ frac = col_double(),
+ low = col_double(),
+ high = col_double()),
+ na = character()) %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(country_p)) country == country_p else TRUE) %>%
+ select(date, country, transport, low, high, frac) %>%
+ arrange(date, country, transport)
+ }
}
plot_userstats_bridge_combined <- function(start_p, end_p, country_p, path_p) {
@@ -1028,19 +967,7 @@ plot_userstats_bridge_combined <- function(start_p, end_p, country_p, path_p) {
}
}
-write_userstats_bridge_combined <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
- if (!is.null(country_p) && country_p == "all") {
- write_userstats_bridge_country(start_p, end_p, country_p, path_p)
- } else {
- prepare_userstats_bridge_combined(start_p, end_p, country_p) %>%
- select(date, country, transport, low, high, frac) %>%
- arrange(date, country, transport) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
- }
-}
-
-prepare_advbwdist_perc <- function(start_p, end_p, p_p) {
+prepare_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL) {
read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1048,15 +975,18 @@ prepare_advbwdist_perc <- function(start_p, end_p, p_p) {
filter(if (!is.null(p_p)) percentile %in% as.numeric(p_p) else
percentile != "") %>%
transmute(date, percentile = as.factor(percentile),
- variable = ifelse(is.na(isexit), "all", "exits"),
- advbw = advbw * 8 / 1e9)
+ variable = ifelse(isexit == "t", "exits", "all"),
+ advbw = advbw * 8 / 1e9) %>%
+ spread(variable, advbw) %>%
+ rename(p = percentile)
}
plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) {
prepare_advbwdist_perc(start_p, end_p, p_p) %>%
+ gather(variable, advbw, -c(date, p)) %>%
mutate(variable = ifelse(variable == "all", "All relays",
"Exits only")) %>%
- ggplot(aes(x = date, y = advbw, colour = percentile)) +
+ ggplot(aes(x = date, y = advbw, colour = p)) +
facet_grid(variable ~ .) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1069,15 +999,7 @@ plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL,
- path_p) {
- prepare_advbwdist_perc(start_p, end_p, p_p) %>%
- spread(variable, advbw) %>%
- rename(p = percentile) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_advbwdist_relay <- function(start_p, end_p, n_p) {
+prepare_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL) {
read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1086,14 +1008,17 @@ prepare_advbwdist_relay <- function(start_p, end_p, n_p) {
relay != "") %>%
transmute(date, relay = as.factor(relay),
variable = ifelse(isexit != "t", "all", "exits"),
- advbw = advbw * 8 / 1e9)
+ advbw = advbw * 8 / 1e9) %>%
+ spread(variable, advbw) %>%
+ rename(n = relay)
}
plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) {
prepare_advbwdist_relay(start_p, end_p, n_p) %>%
+ gather(variable, advbw, -c(date, n)) %>%
mutate(variable = ifelse(variable == "all", "All relays",
"Exits only")) %>%
- ggplot(aes(x = date, y = advbw, colour = relay)) +
+ ggplot(aes(x = date, y = advbw, colour = n)) +
facet_grid(variable ~ .) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1106,15 +1031,7 @@ plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL,
- path_p) {
- prepare_advbwdist_relay(start_p, end_p, n_p) %>%
- spread(variable, advbw) %>%
- rename(n = relay) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_hidserv_dir_onions_seen <- function(start_p, end_p) {
+prepare_hidserv_dir_onions_seen <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1135,13 +1052,7 @@ plot_hidserv_dir_onions_seen <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_hidserv_dir_onions_seen <- function(start_p = NULL, end_p = NULL,
- path_p) {
- prepare_hidserv_dir_onions_seen(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_hidserv_rend_relayed_cells <- function(start_p, end_p) {
+prepare_hidserv_rend_relayed_cells <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1164,13 +1075,7 @@ plot_hidserv_rend_relayed_cells <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_hidserv_rend_relayed_cells <- function(start_p = NULL, end_p = NULL,
- path_p) {
- prepare_hidserv_rend_relayed_cells(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb <- function(start_p, end_p) {
+prepare_webstats_tb <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1184,17 +1089,22 @@ prepare_webstats_tb <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tbid", "tbsd", "tbup", "tbur")) %>%
group_by(log_date, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ spread(request_type, count) %>%
+ rename(date = log_date, initial_downloads = tbid,
+ signature_downloads = tbsd, update_pings = tbup,
+ update_requests = tbur)
}
plot_webstats_tb <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tb(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tbid",
- "Signature downloads" = "tbsd",
- "Update pings" = "tbup",
- "Update requests" = "tbur")
- ggplot(d, aes(x = log_date, y = count)) +
+ prepare_webstats_tb(start_p, end_p) %>%
+ gather(request_type, count, -date) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "signature_downloads", "update_pings",
+ "update_requests"),
+ labels = c("Initial downloads", "Signature downloads", "Update pings",
+ "Update requests"))) %>%
+ ggplot(aes(x = date, y = count)) +
geom_point() +
geom_line() +
facet_grid(request_type ~ ., scales = "free_y") +
@@ -1208,16 +1118,7 @@ plot_webstats_tb <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb(start_p, end_p) %>%
- rename(date = log_date) %>%
- spread(request_type, count) %>%
- rename(initial_downloads = tbid, signature_downloads = tbsd,
- update_pings = tbup, update_requests = tbur) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb_platform <- function(start_p, end_p) {
+prepare_webstats_tb_platform <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1231,15 +1132,18 @@ prepare_webstats_tb_platform <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tbid", "tbup")) %>%
group_by(log_date, platform, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ spread(request_type, count, fill = 0) %>%
+ rename(date = log_date, initial_downloads = tbid, update_pings = tbup)
}
plot_webstats_tb_platform <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tb_platform(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tbid",
- "Update pings" = "tbup")
- ggplot(d, aes(x = log_date, y = count, colour = platform)) +
+ prepare_webstats_tb_platform(start_p, end_p) %>%
+ gather(request_type, count, -c(date, platform)) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "update_pings"),
+ labels = c("Initial downloads", "Update pings"))) %>%
+ ggplot(aes(x = date, y = count, colour = platform)) +
geom_point() +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1257,15 +1161,7 @@ plot_webstats_tb_platform <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb_platform <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb_platform(start_p, end_p) %>%
- rename(date = log_date) %>%
- spread(request_type, count, fill = 0) %>%
- rename(initial_downloads = tbid, update_pings = tbup) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb_locale <- function(start_p, end_p) {
+prepare_webstats_tb_locale <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1320,12 +1216,7 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb_locale(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tm <- function(start_p, end_p) {
+prepare_webstats_tm <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1339,15 +1230,19 @@ prepare_webstats_tm <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tmid", "tmup")) %>%
group_by(log_date, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ mutate(request_type = factor(request_type, levels = c("tmid", "tmup"))) %>%
+ spread(request_type, count, drop = FALSE) %>%
+ rename(date = log_date, initial_downloads = tmid, update_pings = tmup)
}
plot_webstats_tm <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tm(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tmid",
- "Update pings" = "tmup")
- ggplot(d, aes(x = log_date, y = count)) +
+ prepare_webstats_tm(start_p, end_p) %>%
+ gather(request_type, count, -date) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "update_pings"),
+ labels = c("Initial downloads", "Update pings"))) %>%
+ ggplot(aes(x = date, y = count)) +
geom_point() +
geom_line() +
facet_grid(request_type ~ ., scales = "free_y") +
@@ -1361,16 +1256,7 @@ plot_webstats_tm <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tm <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tm(start_p, end_p) %>%
- rename(date = log_date) %>%
- mutate(request_type = factor(request_type, levels = c("tmid", "tmup"))) %>%
- spread(request_type, count, drop = FALSE) %>%
- rename(initial_downloads = tmid, update_pings = tmup) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_relays_ipv6 <- function(start_p, end_p) {
+prepare_relays_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1385,12 +1271,15 @@ prepare_relays_ipv6 <- function(start_p, end_p) {
exiting = sum(server_count_sum_avg[exiting_ipv6_relay == "t"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
gather(total, announced, reachable, exiting, key = "category",
- value = "count")
+ value = "count") %>%
+ rename(date = valid_after_date) %>%
+ spread(category, count)
}
plot_relays_ipv6 <- function(start_p, end_p, path_p) {
prepare_relays_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = count, colour = category)) +
+ gather(category, count, -date) %>%
+ ggplot(aes(x = date, y = count, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1405,14 +1294,7 @@ plot_relays_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_relays_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_relays_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, count) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_bridges_ipv6 <- function(start_p, end_p) {
+prepare_bridges_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1424,12 +1306,13 @@ prepare_bridges_ipv6 <- function(start_p, end_p) {
summarize(total = sum(server_count_sum_avg),
announced = sum(server_count_sum_avg[announced_ipv6 == "t"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
- gather(total, announced, key = "category", value = "count")
+ rename(date = valid_after_date)
}
plot_bridges_ipv6 <- function(start_p, end_p, path_p) {
prepare_bridges_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = count, colour = category)) +
+ gather(category, count, -date) %>%
+ ggplot(aes(x = date, y = count, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1443,14 +1326,7 @@ plot_bridges_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_bridges_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_bridges_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, count) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_advbw_ipv6 <- function(start_p, end_p) {
+prepare_advbw_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1458,6 +1334,8 @@ prepare_advbw_ipv6 <- function(start_p, end_p) {
filter(if (!is.null(end_p))
valid_after_date <= as.Date(end_p) else TRUE) %>%
filter(server == "relay") %>%
+ mutate(advertised_bandwidth_bytes_sum_avg =
+ advertised_bandwidth_bytes_sum_avg * 8 / 1e9) %>%
group_by(valid_after_date) %>%
summarize(total = sum(advertised_bandwidth_bytes_sum_avg),
total_guard = sum(advertised_bandwidth_bytes_sum_avg[guard_relay != "f"]),
@@ -1469,14 +1347,13 @@ prepare_advbw_ipv6 <- function(start_p, end_p) {
exiting = sum(advertised_bandwidth_bytes_sum_avg[
exiting_ipv6_relay != "f"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
- gather(total, total_guard, total_exit, reachable_guard, reachable_exit,
- exiting, key = "category", value = "advbw") %>%
- mutate(advbw = advbw * 8 / 1e9)
+ rename(date = valid_after_date)
}
plot_advbw_ipv6 <- function(start_p, end_p, path_p) {
prepare_advbw_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = advbw, colour = category)) +
+ gather(category, advbw, -date) %>%
+ ggplot(aes(x = date, y = advbw, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1494,14 +1371,7 @@ plot_advbw_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbw_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_advbw_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, advbw) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_totalcw <- function(start_p, end_p) {
+prepare_totalcw <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "totalcw.csv", sep = ""),
colClasses = c("valid_after_date" = "Date", "nickname" = "character")) %>%
filter(if (!is.null(start_p))
@@ -1509,7 +1379,9 @@ prepare_totalcw <- function(start_p, end_p) {
filter(if (!is.null(end_p))
valid_after_date <= as.Date(end_p) else TRUE) %>%
group_by(valid_after_date, nickname) %>%
- summarize(measured_sum_avg = sum(measured_sum_avg))
+ summarize(measured_sum_avg = sum(measured_sum_avg)) %>%
+ rename(date = valid_after_date, totalcw = measured_sum_avg) %>%
+ arrange(date, nickname)
}
plot_totalcw <- function(start_p, end_p, path_p) {
@@ -1517,10 +1389,8 @@ plot_totalcw <- function(start_p, end_p, path_p) {
mutate(nickname = ifelse(nickname == "", "consensus", nickname)) %>%
mutate(nickname = factor(nickname,
levels = c("consensus", unique(nickname[nickname != "consensus"])))) %>%
- complete(valid_after_date = full_seq(valid_after_date, period = 1),
- nesting(nickname)) %>%
- ggplot(aes(x = valid_after_date, y = measured_sum_avg,
- colour = nickname)) +
+ complete(date = full_seq(date, period = 1), nesting(nickname)) %>%
+ ggplot(aes(x = date, y = totalcw, colour = nickname)) +
geom_line(na.rm = TRUE) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1531,10 +1401,4 @@ plot_totalcw <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_totalcw <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_totalcw(start_p, end_p) %>%
- rename(date = valid_after_date, totalcw = measured_sum_avg) %>%
- arrange(date, nickname) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
diff --git a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
index a529830..6a142e8 100644
--- a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
+++ b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
@@ -122,7 +122,7 @@ public class RObjectGenerator implements ServletContextListener {
StringBuilder queryBuilder = new StringBuilder();
queryBuilder.append("robust_call(as.call(list(");
if ("csv".equalsIgnoreCase(fileType)) {
- queryBuilder.append("write_");
+ queryBuilder.append("write_data, prepare_");
/* When we checked parameters above we also put in defaults for missing
* parameters. This is okay for graphs, but we want to support CSV files
* with empty parameters. Using the parameters we got here. */
1
0
[metrics-web/release] Remove Torperf/OnionPerf plots with all sources.
by karsten@torproject.org 09 Nov '19
by karsten@torproject.org 09 Nov '19
09 Nov '19
commit c39472548511175a6eaa0d67de62d3b5fa59dbe3
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Dec 5 11:56:19 2018 +0100
Remove Torperf/OnionPerf plots with all sources.
OnionPerf results look to be comparable over time, but between vantage
points there are systematic deltas between the results. The "all"
plots show rises and falls where they actually don't exist, it's just
that a particular vantage point was offline so the average of the two
remaining moves noticeably.
In this commit we remove the source parameter from these graphs and
always include all sources separately in the graph, but not a
combination of all measurements together.
Implements #28603.
---
src/main/R/rserver/graphs.R | 178 +++++++++------------
.../metrics/web/GraphParameterChecker.java | 24 ---
.../org/torproject/metrics/web/GraphServlet.java | 8 -
src/main/resources/web/json/metrics.json | 7 +-
src/main/resources/web/jsps/graph.jsp | 9 --
5 files changed, 76 insertions(+), 150 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index e541c30..1f7309b 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -592,70 +592,49 @@ write_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL,
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_torperf <- function(start_p, end_p, source_p, server_p, filesize_p,
- path_p) {
- filesize_val <- ifelse(filesize_p == "50kb", 50 * 1024,
- ifelse(filesize_p == "1mb", 1024 * 1024, 5 * 1024 * 1024))
- t <- read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
- colClasses = c("date" = "Date", "source" = "character"))
- known_sources <- c("all", unique(t[t$source != "", "source"]))
- colours <- data.frame(source = known_sources,
- colour = brewer.pal(length(known_sources), "Paired"),
- stringsAsFactors = FALSE)
- colour <- colours[colours$source == source_p, "colour"]
- filesizes <- data.frame(filesizes = c("5mb", "1mb", "50kb"),
- label = c("5 MiB", "1 MiB", "50 KiB"), stringsAsFactors = FALSE)
- filesize_str <- filesizes[filesizes$filesize == filesize_p, "label"]
- t[t$date >= as.Date(start_p) & t$date <= as.Date(end_p) &
- t$filesize == filesize_val &
- t$source == ifelse(source_p == "all", "", source_p) &
- t$server == server_p, ] %>%
- transmute(date, q1 = q1 / 1e3, md = md / 1e3, q3 = q3 / 1e3) %>%
- complete(date = full_seq(date, period = 1)) %>%
- ggplot(aes(x = date, y = md, fill = "line")) +
- geom_line(colour = colour, size = 0.75) +
- geom_ribbon(aes(x = date, ymin = q1, ymax = q3, fill = "ribbon")) +
+prepare_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
+ read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
+ colClasses = c("date" = "Date", "source" = "character")) %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(server_p)) server == server_p else TRUE) %>%
+ filter(if (!is.null(filesize_p))
+ filesize == ifelse(filesize_p == "50kb", 50 * 1024,
+ ifelse(filesize_p == "1mb", 1024 * 1024, 5 * 1024 * 1024)) else
+ TRUE) %>%
+ transmute(date, filesize, source, server, q1 = q1 / 1e3, md = md / 1e3,
+ q3 = q3 / 1e3)
+}
+
+plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
+ prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
+ filter(source != "") %>%
+ complete(date = full_seq(date, period = 1), nesting(source)) %>%
+ ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
+ geom_ribbon(alpha = 0.5) +
+ geom_line(aes(colour = source), size = 0.75) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = unit_format(unit = "s"),
limits = c(0, NA)) +
- scale_fill_manual(name = paste("Measured times on",
- ifelse(source_p == "all", "all sources", source_p), "per day"),
- breaks = c("line", "ribbon"),
- labels = c("Median", "1st to 3rd quartile"),
- values = paste(colour, c("", "66"), sep = "")) +
- ggtitle(paste("Time to complete", filesize_str,
+ scale_fill_hue(name = "Source") +
+ scale_colour_hue(name = "Source") +
+ ggtitle(paste("Time to complete",
+ ifelse(filesize_p == "50kb", "50 KiB",
+ ifelse(filesize_p == "1mb", "1 MiB", "5 MiB")),
"request to", server_p, "server")) +
labs(caption = copyright_notice) +
theme(legend.position = "top")
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-# Ideally, this function would share code with plot_torperf by using a
-# common prepare_torperf function. This just turned out to be a bit
-# harder than for other functions, because plot_torperf uses different
-# colours based on which sources exist, unrelated to which source is
-# plotted. Left as future work.
-write_torperf <- function(start_p = NULL, end_p = NULL, source_p = NULL,
- server_p = NULL, filesize_p = NULL, path_p) {
- read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
- colClasses = c("date" = "Date")) %>%
- filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(source_p))
- source == ifelse(source_p == "all", "", source_p) else TRUE) %>%
- filter(if (!is.null(server_p)) server == server_p else TRUE) %>%
- filter(if (!is.null(filesize_p))
- filesize == ifelse(filesize_p == "50kb", 50 * 1024,
- ifelse(filesize_p == "1mb", 1024 * 1024, 5 * 1024 * 1024)) else
- TRUE) %>%
- transmute(date, filesize, source, server, q1 = q1 / 1e3, md = md / 1e3,
- q3 = q3 / 1e3) %>%
+write_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
+ filesize_p = NULL, path_p) {
+ prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-prepare_torperf_failures <- function(start_p, end_p, source_p, server_p,
- filesize_p) {
+prepare_torperf_failures <- function(start_p, end_p, server_p, filesize_p) {
read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -664,31 +643,29 @@ prepare_torperf_failures <- function(start_p, end_p, source_p, server_p,
filesize == ifelse(filesize_p == "50kb", 50 * 1024,
ifelse(filesize_p == "1mb", 1024 * 1024, 5 * 1024 * 1024)) else
TRUE) %>%
- filter(if (!is.null(source_p))
- source == ifelse(source_p == "all", "", source_p) else TRUE) %>%
filter(if (!is.null(server_p)) server == server_p else TRUE) %>%
filter(requests > 0) %>%
transmute(date, filesize, source, server, timeouts = timeouts / requests,
failures = failures / requests)
}
-plot_torperf_failures <- function(start_p, end_p, source_p, server_p,
- filesize_p, path_p) {
- filesizes <- data.frame(filesizes = c("5mb", "1mb", "50kb"),
- label = c("5 MiB", "1 MiB", "50 KiB"), stringsAsFactors = FALSE)
- filesize_str <- filesizes[filesizes$filesize == filesize_p, "label"]
- prepare_torperf_failures(start_p, end_p, source_p, server_p, filesize_p) %>%
+plot_torperf_failures <- function(start_p, end_p, server_p, filesize_p,
+ path_p) {
+ prepare_torperf_failures(start_p, end_p, server_p, filesize_p) %>%
+ filter(source != "") %>%
gather(variable, value, -c(date, filesize, source, server)) %>%
- ggplot(aes(x = date, y = value, colour = variable)) +
- geom_point(size = 2) +
+ mutate(variable = factor(variable, levels = c("timeouts", "failures"),
+ labels = c("Timeouts", "Failures"))) %>%
+ ggplot(aes(x = date, y = value, colour = source)) +
+ geom_point(size = 2, alpha = 0.5) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = percent, limits = c(0, NA)) +
- scale_colour_hue(name = paste("Problems encountered on",
- ifelse(source_p == "all", "all sources", source_p)),
- h.start = 45, breaks = c("timeouts", "failures"),
- labels = c("Timeouts", "Failures")) +
- ggtitle(paste("Timeouts and failures of", filesize_str,
+ scale_colour_hue(name = "Source") +
+ facet_grid(variable ~ .) +
+ ggtitle(paste("Timeouts and failures of",
+ ifelse(filesize_p == "50kb", "50 KiB",
+ ifelse(filesize_p == "1mb", "1 MiB", "5 MiB")),
"requests to", server_p, "server")) +
labs(caption = copyright_notice) +
theme(legend.position = "top")
@@ -696,81 +673,74 @@ plot_torperf_failures <- function(start_p, end_p, source_p, server_p,
}
write_torperf_failures <- function(start_p = NULL, end_p = NULL,
- source_p = NULL, server_p = NULL, filesize_p = NULL, path_p) {
- prepare_torperf_failures(start_p, end_p, source_p, server_p, filesize_p) %>%
+ server_p = NULL, filesize_p = NULL, path_p) {
+ prepare_torperf_failures(start_p, end_p, server_p, filesize_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-prepare_onionperf_buildtimes <- function(start_p, end_p, source_p) {
+prepare_onionperf_buildtimes <- function(start_p, end_p) {
read.csv(paste(stats_dir, "buildtimes.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(source_p))
- source == ifelse(source_p == "all", "", source_p) else TRUE)
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
}
-write_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL,
- source_p = NULL, path_p) {
- prepare_onionperf_buildtimes(start_p, end_p, source_p) %>%
+write_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL, path_p) {
+ prepare_onionperf_buildtimes(start_p, end_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_onionperf_buildtimes <- function(start_p, end_p, source_p, path_p) {
- prepare_onionperf_buildtimes(start_p, end_p, source_p) %>%
+plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
+ prepare_onionperf_buildtimes(start_p, end_p) %>%
+ filter(source != "") %>%
mutate(date = as.Date(date),
position = factor(position, levels = seq(1, 3, 1),
labels = c("1st hop", "2nd hop", "3rd hop"))) %>%
- ggplot(aes(x = date, y = md, colour = position, fill = position)) +
- geom_line(size = 0.75) +
- geom_ribbon(aes(x = as.Date(date), ymin = q1, ymax = q3, alpha = 0.5),
- show.legend = FALSE) +
+ complete(date = full_seq(date, period = 1), nesting(source, position)) %>%
+ ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
+ geom_ribbon(alpha = 0.5) +
+ geom_line(aes(colour = source), size = 0.75) +
+ facet_grid(position ~ .) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = unit_format(unit = "ms"),
limits = c(0, NA)) +
- scale_colour_hue(name = "Medians and interquartile ranges") +
- scale_fill_hue(name = "Medians and interquartile ranges") +
- ggtitle(ifelse(source_p == "all", "Circuit build times on all sources",
- paste("Circuit build times on", source_p))) +
+ scale_fill_hue(name = "Source") +
+ scale_colour_hue(name = "Source") +
+ ggtitle("Circuit build times") +
labs(caption = copyright_notice) +
theme(legend.position = "top")
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-prepare_onionperf_latencies <- function(start_p, end_p, source_p) {
+prepare_onionperf_latencies <- function(start_p, end_p, server_p) {
read.csv(paste(stats_dir, "latencies.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(source_p))
- source == ifelse(source_p == "all", "", source_p) else TRUE)
+ filter(if (!is.null(server_p)) server == server_p else TRUE)
}
write_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
- source_p = NULL, path_p) {
- prepare_onionperf_latencies(start_p, end_p, source_p) %>%
+ server_p = NULL, path_p) {
+ prepare_onionperf_latencies(start_p, end_p, server_p) %>%
write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
}
-plot_onionperf_latencies <- function(start_p, end_p, source_p, path_p) {
- prepare_onionperf_latencies(start_p, end_p, source_p) %>%
- mutate(date = as.Date(date),
- server = factor(server, levels = c("public", "onion"),
- labels = c("public server", "onion server"))) %>%
- ggplot(aes(x = date, y = md, colour = server, fill = server)) +
- geom_line(size = 0.75) +
- geom_ribbon(aes(x = as.Date(date), ymin = q1, ymax = q3, alpha = 0.5),
- show.legend = FALSE) +
+plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
+ prepare_onionperf_latencies(start_p, end_p, server_p) %>%
+ filter(source != "") %>%
+ complete(date = full_seq(date, period = 1), nesting(source)) %>%
+ ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
+ geom_ribbon(alpha = 0.5) +
+ geom_line(aes(colour = source), size = 0.75) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = unit_format(unit = "ms"),
limits = c(0, NA)) +
- scale_colour_hue(name = "Medians and interquartile ranges") +
- scale_fill_hue(name = "Medians and interquartile ranges") +
- ggtitle(ifelse(source_p == "all",
- "Circuit round-trip latencies on all sources",
- paste("Circuit round-trip latencies on", source_p))) +
+ scale_fill_hue(name = "Source") +
+ scale_colour_hue(name = "Source") +
+ ggtitle(paste("Circuit round-trip latencies to", server_p, "server")) +
labs(caption = copyright_notice) +
theme(legend.position = "top")
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
diff --git a/src/main/java/org/torproject/metrics/web/GraphParameterChecker.java b/src/main/java/org/torproject/metrics/web/GraphParameterChecker.java
index 2168ab5..ac642e9 100644
--- a/src/main/java/org/torproject/metrics/web/GraphParameterChecker.java
+++ b/src/main/java/org/torproject/metrics/web/GraphParameterChecker.java
@@ -61,8 +61,6 @@ public class GraphParameterChecker {
}
this.knownParameterValues.put("country", sb.toString());
this.knownParameterValues.put("events", "on,off,points");
- this.knownParameterValues.put("source", "all,siv,moria,torperf,op-hk,"
- + "op-nl,op-us");
this.knownParameterValues.put("server", "public,onion");
this.knownParameterValues.put("filesize", "50kb,1mb,5mb");
this.knownParameterValues.put("transport", "obfs2,obfs3,obfs4,"
@@ -199,28 +197,6 @@ public class GraphParameterChecker {
recognizedGraphParameters.put("events", eventsParameter);
}
- /* Parse torperf data source if supported by the graph type. Only a
- * single source can be passed. If no source is passed, use "torperf"
- * as default. */
- if (supportedGraphParameters.contains("source")) {
- String[] sourceParameter = (String[]) requestParameters.get(
- "source");
- List<String> knownSources = Arrays.asList(
- this.knownParameterValues.get("source").split(","));
- if (sourceParameter != null) {
- if (sourceParameter.length != 1) {
- return null;
- }
- if (sourceParameter[0].length() == 0
- || !knownSources.contains(sourceParameter[0])) {
- return null;
- }
- } else {
- sourceParameter = new String[] { "all" };
- }
- recognizedGraphParameters.put("source", sourceParameter);
- }
-
/* Parse onionperf server if supported by the graph type. Only a single
* server can be passed. If no server is passed, use "public" as default. */
if (supportedGraphParameters.contains("server")) {
diff --git a/src/main/java/org/torproject/metrics/web/GraphServlet.java b/src/main/java/org/torproject/metrics/web/GraphServlet.java
index 2f35320..17d9309 100644
--- a/src/main/java/org/torproject/metrics/web/GraphServlet.java
+++ b/src/main/java/org/torproject/metrics/web/GraphServlet.java
@@ -103,14 +103,6 @@ public class GraphServlet extends MetricServlet {
this.defaultParameters.put("version", new String[][] {
{ "v4", " selected", "IPv4" },
{ "v6", "", "IPv6" } });
- this.defaultParameters.put("source", new String[][] {
- { "all", " checked" },
- { "torperf", "" },
- { "moria", "" },
- { "siv", "" },
- { "op-hk", "" },
- { "op-nl", "" },
- { "op-us", "" }});
this.defaultParameters.put("server", new String[][] {
{ "public", " checked" },
{ "onion", "" }});
diff --git a/src/main/resources/web/json/metrics.json b/src/main/resources/web/json/metrics.json
index 9cb50ad..b351814 100644
--- a/src/main/resources/web/json/metrics.json
+++ b/src/main/resources/web/json/metrics.json
@@ -290,7 +290,6 @@
"parameters": [
"start",
"end",
- "source",
"server",
"filesize"
]
@@ -304,7 +303,6 @@
"parameters": [
"start",
"end",
- "source",
"server",
"filesize"
]
@@ -317,8 +315,7 @@
"function": "onionperf_buildtimes",
"parameters": [
"start",
- "end",
- "source"
+ "end"
]
},
{
@@ -330,7 +327,7 @@
"parameters": [
"start",
"end",
- "source"
+ "server"
]
},
{
diff --git a/src/main/resources/web/jsps/graph.jsp b/src/main/resources/web/jsps/graph.jsp
index c30481f..e710d2c 100644
--- a/src/main/resources/web/jsps/graph.jsp
+++ b/src/main/resources/web/jsps/graph.jsp
@@ -122,15 +122,6 @@
</select>
</p>
</c:if>
- <c:if test="${fn:length(source) > 0}">
- <p><b>Source:</b>
- <c:forEach var="row" items="${source}">
- <label class="radio-label">
- <input type="radio" name="source" value="${row[0]}"${row[1]}> ${row[0]}
- </label>
- </c:forEach>
- </p>
- </c:if>
<c:if test="${fn:length(server) > 0}">
<p><b>Server:</b>
<c:forEach var="row" items="${server}">
1
0
09 Nov '19
commit ad1221cb980aa5bf3bf075338d2588d803e652c2
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Dec 20 10:03:31 2018 +0100
Document changes to OnionPerf graphs.
Still related to #28603.
---
src/main/resources/web/jsps/stats.jsp | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/src/main/resources/web/jsps/stats.jsp b/src/main/resources/web/jsps/stats.jsp
index e5f9c6a..2ae6726 100644
--- a/src/main/resources/web/jsps/stats.jsp
+++ b/src/main/resources/web/jsps/stats.jsp
@@ -48,7 +48,7 @@ https://metrics.torproject.org/identifier.csv
<li><b>August 15, 2018:</b> Made the first batch of changes to per-graph CSV files.</li>
<li><b>September 15, 2018:</b> Removed all pre-aggregated CSV files.</li>
<li><b>October 28, 2018:</b> Added and/or removed columns to <a href="#webstats-tb-platform">Tor Browser downloads and updates by platform</a> and <a href="#webstats-tb-locale">Tor Browser downloads and updates by locale</a> graphs.</li>
-<li><b>December 20, 2018 (scheduled):</b> Remove source parameters and output rows with aggregates over all sources from <a href="#torperf">Time to download files over Tor</a>, <a href="#torperf-failures">Timeouts and failures of downloading files over Tor</a>, <a href="#onionperf-buildtimes">Circuit build times</a>, <a href="#onionperf-latencies">Circuit round-trip latencies</a> graphs.</li>
+<li><b>December 20, 2018:</b> Removed source parameters and output rows with aggregates over all sources from <a href="#torperf">Time to download files over Tor</a>, <a href="#torperf-failures">Timeouts and failures of downloading files over Tor</a>, <a href="#onionperf-buildtimes">Circuit build times</a>, <a href="#onionperf-latencies">Circuit round-trip latencies</a> graphs.</li>
<li><b>December 20, 2018 (scheduled):</b> Remove two graphs <a href="#bandwidth">Total relay bandwidth</a> and <a href="#bwhist-flags">Consumed bandwidth by Exit/Guard flag combination</a>, and update the data format of the <a href="#bandwidth-flags">Advertised and consumed bandwidth by relay flag</a> graph to cover all data previously contained in the first two graphs.</li>
</ul>
@@ -536,7 +536,6 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements, or <b>"all"</b> for measurements performed by any service. <span class="red">This parameter is going to be removed after December 20, 2018.</span></li>
<li><b>server:</b> Either <b>"public"</b> for requests to a server on the public internet, or <b>"onion"</b> for requests to a version 2 onion server.</li>
<li><b>filesize:</b> Size of the downloaded file in bytes, with pre-defined possible values: <b>"50kb"</b>, <b>"1mb"</b>, or <b>"5mb"</b>.</li>
</ul>
@@ -546,7 +545,7 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
<li><b>filesize:</b> Size of the downloaded file in bytes.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements. If this column contains the empty string, all measurements are included, regardless of which service performed them. <span class="red">Output rows with aggregates over all sources are going to be removed after December 20, 2018.</span></li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
<li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to a version 2 onion server.</li>
<li><b>q1:</b> First quartile of time in milliseconds until receiving the last byte.</li>
<li><b>md:</b> Median of time in milliseconds until receiving the last byte.</li>
@@ -563,7 +562,6 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements, or <b>"all"</b> for measurements performed by any service. <span class="red">This parameter is going to be removed after December 20, 2018.</span></li>
<li><b>server:</b> Either <b>"public"</b> for requests to a server on the public internet, or <b>"onion"</b> for requests to a version 2 onion server.</li>
<li><b>filesize:</b> Size of the downloaded file in bytes, with pre-defined possible values: <b>"50kb"</b>, <b>"1mb"</b>, or <b>"5mb"</b>.</li>
</ul>
@@ -573,7 +571,7 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
<li><b>filesize:</b> Size of the downloaded file in bytes.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements. If this column contains the empty string, all measurements are included, regardless of which service performed them. <span class="red">Output rows with aggregates over all sources are going to be removed after December 20, 2018.</span></li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
<li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to a version 2 onion server.</li>
<li><b>timeouts:</b> Fraction of requests that timed out when attempting to download the static file over Tor.</li>
<li><b>failures:</b> Fraction of requests that failed when attempting to download the static file over Tor.</li>
@@ -589,14 +587,13 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements, or <b>"all"</b> for measurements performed by any service. <span class="red">This parameter is going to be removed after December 20, 2018.</span></li>
</ul>
<h4>Columns</h4>
<ul>
<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements. If this column contains the empty string, all measurements are included, regardless of which service performed them. <span class="red">Output rows with aggregates over all sources are going to be removed after December 20, 2018.</span></li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
<li><b>position:</b> Position in the circuit, from first to third hop.</li>
<li><b>q1:</b> First quartile of time in milliseconds until successfully extending the circuit to the given position.</li>
<li><b>md:</b> Median of time in milliseconds until successfully extending the circuit to the given position.</li>
@@ -613,14 +610,13 @@ Performance <a href="#performance" name="performance" class="anchor">#</a></h2>
<ul>
<li><b>start:</b> First UTC date (YYYY-MM-DD) to include in the file.</li>
<li><b>end:</b> Last UTC date (YYYY-MM-DD) to include in the file.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements, or <b>"all"</b> for measurements performed by any service. <span class="red">This parameter is going to be removed after December 20, 2018.</span></li>
</ul>
<h4>Columns</h4>
<ul>
<li><b>date:</b> UTC date (YYYY-MM-DD) when download performance was measured.</li>
-<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements. If this column contains the empty string, all measurements are included, regardless of which service performed them. <span class="red">Output rows with aggregates over all sources are going to be removed after December 20, 2018.</span></li>
+<li><b>source:</b> Name of the OnionPerf or Torperf service performing measurements.</li>
<li><b>server:</b> Either <b>"public"</b> if the request was made to a server on the public internet, or <b>"onion"</b> if the request was made to a version 2 onion server.</li>
<li><b>q1:</b> First quartile of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
<li><b>md:</b> Median of time in milliseconds between sending the HTTP request and receiving the HTTP response header.</li>
1
0