commit 0d2f1e2afd5f4b9e5c533d256586bb03d7466d5f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Jan 10 15:39:04 2019 +0100
Make write_* functions obsolete.
In most cases these functions would call their prepare_* equivalents,
possibly tweak the result, and write it to a .csv file. This patch
moves all those tweaks to the prepare_* functions, possibly reverts
them in the plot_* functions, and makes the write_* functions
obsolete.
The result is not only less code. We're also going to find bugs in
written .csv files sooner, because the same code is now run for
writing graph files, and the latter happens much more often.
---
src/main/R/rserver/graphs.R | 414 +++++++--------------
.../torproject/metrics/web/RObjectGenerator.java | 2 +-
2 files changed, 140 insertions(+), 276 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index 27f399d..82a51e7 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -348,10 +348,17 @@ robust_call <- function(wrappee, filename) {
})
}
+# Write the result of the given FUN, typically a prepare_ function, as .csv file
+# to the given path_p.
+write_data <- function(FUN, ..., path_p) {
+ FUN(...) %>%
+ write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
+}
+
# Disable readr's automatic progress bar.
options(readr.show_progress = FALSE)
-prepare_networksize <- function(start_p, end_p) {
+prepare_networksize <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "networksize.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -375,12 +382,7 @@ plot_networksize <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_networksize <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_networksize(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_versions <- function(start_p, end_p) {
+prepare_versions <- function(start_p = NULL, end_p = NULL) {
read_csv(paste(stats_dir, "versions.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -413,42 +415,34 @@ plot_versions <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_versions <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_versions(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_platforms <- function(start_p, end_p) {
+prepare_platforms <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "platforms.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ mutate(platform = tolower(platform)) %>%
+ spread(platform, relays)
}
plot_platforms <- function(start_p, end_p, path_p) {
prepare_platforms(start_p, end_p) %>%
+ gather(platform, relays, -date) %>%
ggplot(aes(x = date, y = relays, colour = platform)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
scale_colour_manual(name = "Platform",
- breaks = c("Linux", "macOS", "BSD", "Windows", "Other"),
- values = c("Linux" = "#56B4E9", "macOS" = "#333333", "BSD" = "#E69F00",
- "Windows" = "#0072B2", "Other" = "#009E73")) +
+ breaks = c("linux", "macos", "bsd", "windows", "other"),
+ labels = c("Linux", "macOS", "BSD", "Windows", "Other"),
+ values = c("linux" = "#56B4E9", "macos" = "#333333", "bsd" = "#E69F00",
+ "windows" = "#0072B2", "other" = "#009E73")) +
ggtitle("Relay platforms") +
labs(caption = copyright_notice)
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_platforms <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_platforms(start_p, end_p) %>%
- mutate(platform = tolower(platform)) %>%
- spread(platform, relays) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_dirbytes <- function(start_p, end_p, path_p) {
+prepare_dirbytes <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "bandwidth.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -478,12 +472,7 @@ plot_dirbytes <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_dirbytes <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_dirbytes(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_relayflags <- function(start_p, end_p, flag_p) {
+prepare_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL) {
read.csv(paste(stats_dir, "relayflags.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -507,13 +496,8 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL,
- path_p) {
- prepare_relayflags(start_p, end_p, flag_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
+prepare_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
+ filesize_p = NULL) {
read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
colClasses = c("date" = "Date", "source" = "character")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -528,7 +512,7 @@ prepare_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
}
plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
- prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
+ prepare_torperf(start_p, end_p, server_p, filesize_p) %>%
filter(source != "") %>%
complete(date = full_seq(date, period = 1), nesting(source)) %>%
ggplot(aes(x = date, y = md, ymin = q1, ymax = q3, fill = source)) +
@@ -549,13 +533,8 @@ plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL,
- filesize_p = NULL, path_p) {
- prepare_torperf(start_p, end_p, server_p, filesize_p, path_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_torperf_failures <- function(start_p, end_p, server_p, filesize_p) {
+prepare_torperf_failures <- function(start_p = NULL, end_p = NULL,
+ server_p = NULL, filesize_p = NULL) {
read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -593,24 +572,13 @@ plot_torperf_failures <- function(start_p, end_p, server_p, filesize_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_torperf_failures <- function(start_p = NULL, end_p = NULL,
- server_p = NULL, filesize_p = NULL, path_p) {
- prepare_torperf_failures(start_p, end_p, server_p, filesize_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_onionperf_buildtimes <- function(start_p, end_p) {
+prepare_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "buildtimes.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE)
}
-write_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_onionperf_buildtimes(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
prepare_onionperf_buildtimes(start_p, end_p) %>%
filter(source != "") %>%
@@ -634,20 +602,15 @@ plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-prepare_onionperf_latencies <- function(start_p, end_p, server_p) {
- read.csv(paste(stats_dir, "latencies.csv", sep = ""),
+prepare_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
+ server_p = NULL) {
+ read.csv(paste(stats_dir, "latencies.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
filter(if (!is.null(server_p)) server == server_p else TRUE)
}
-write_onionperf_latencies <- function(start_p = NULL, end_p = NULL,
- server_p = NULL, path_p) {
- prepare_onionperf_latencies(start_p, end_p, server_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
prepare_onionperf_latencies(start_p, end_p, server_p) %>%
filter(source != "") %>%
@@ -667,21 +630,22 @@ plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-prepare_connbidirect <- function(start_p, end_p) {
+prepare_connbidirect <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "connbidirect2.csv", sep = ""),
colClasses = c("date" = "Date", "direction" = "factor")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
mutate(quantile = paste("X", quantile, sep = ""),
fraction = fraction / 100) %>%
- spread(quantile, fraction)
+ spread(quantile, fraction) %>%
+ rename(q1 = X0.25, md = X0.5, q3 = X0.75)
}
plot_connbidirect <- function(start_p, end_p, path_p) {
prepare_connbidirect(start_p, end_p) %>%
- ggplot(aes(x = date, y = X0.5, colour = direction)) +
+ ggplot(aes(x = date, y = md, colour = direction)) +
geom_line(size = 0.75) +
- geom_ribbon(aes(x = date, ymin = X0.25, ymax = X0.75,
+ geom_ribbon(aes(x = date, ymin = q1, ymax = q3,
fill = direction), alpha = 0.5, show.legend = FALSE) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -700,13 +664,7 @@ plot_connbidirect <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_connbidirect <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_connbidirect(start_p, end_p) %>%
- rename(q1 = X0.25, md = X0.5, q3 = X0.75) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_bandwidth_flags <- function(start_p, end_p) {
+prepare_bandwidth_flags <- function(start_p = NULL, end_p = NULL) {
advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
transmute(date, have_guard_flag = isguard, have_exit_flag = isexit,
@@ -719,11 +677,13 @@ prepare_bandwidth_flags <- function(start_p, end_p) {
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
filter(have_exit_flag != "") %>%
- filter(have_guard_flag != "")
+ filter(have_guard_flag != "") %>%
+ spread(variable, value)
}
plot_bandwidth_flags <- function(start_p, end_p, path_p) {
prepare_bandwidth_flags(start_p, end_p) %>%
+ gather(variable, value, c(advbw, bwhist)) %>%
unite(flags, have_guard_flag, have_exit_flag) %>%
mutate(flags = factor(flags, levels = c("f_t", "t_t", "t_f", "f_f"),
labels = c("Exit only", "Guard and Exit", "Guard only",
@@ -745,14 +705,8 @@ plot_bandwidth_flags <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_bandwidth_flags <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_bandwidth_flags(start_p, end_p) %>%
- spread(variable, value) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_relay_country <- function(start_p, end_p, country_p,
- events_p) {
+prepare_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL, events_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -811,13 +765,8 @@ plot_userstats_relay_country <- function(start_p, end_p, country_p, events_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_relay_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, events_p = NULL, path_p) {
- prepare_userstats_relay_country(start_p, end_p, country_p, events_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_country <- function(start_p, end_p, country_p) {
+prepare_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -856,12 +805,6 @@ plot_userstats_bridge_country <- function(start_p, end_p, country_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_country <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
- prepare_userstats_bridge_country(start_p, end_p, country_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
prepare_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
transport_p = NULL) {
u <- read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
@@ -937,13 +880,8 @@ plot_userstats_bridge_transport <- function(start_p, end_p, transport_p,
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_transport <- function(start_p = NULL, end_p = NULL,
- transport_p = NULL, path_p) {
- prepare_userstats_bridge_transport(start_p, end_p, transport_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_version <- function(start_p, end_p, version_p) {
+prepare_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
+ version_p = NULL) {
read_csv(file = paste(stats_dir, "clients.csv", sep = ""),
col_types = cols(
date = col_date(format = ""),
@@ -978,27 +916,28 @@ plot_userstats_bridge_version <- function(start_p, end_p, version_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_userstats_bridge_version <- function(start_p = NULL, end_p = NULL,
- version_p = NULL, path_p) {
- prepare_userstats_bridge_version(start_p, end_p, version_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_userstats_bridge_combined <- function(start_p, end_p, country_p) {
- read_csv(file = paste(stats_dir, "userstats-combined.csv", sep = ""),
- col_types = cols(
- date = col_date(format = ""),
- node = col_skip(),
- country = col_character(),
- transport = col_character(),
- version = col_skip(),
- frac = col_double(),
- low = col_double(),
- high = col_double()),
- na = character()) %>%
- filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
- filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
- filter(if (!is.null(country_p)) country == country_p else TRUE)
+prepare_userstats_bridge_combined <- function(start_p = NULL, end_p = NULL,
+ country_p = NULL) {
+ if (!is.null(country_p) && country_p == "all") {
+ prepare_userstats_bridge_country(start_p, end_p, country_p)
+ } else {
+ read_csv(file = paste(stats_dir, "userstats-combined.csv", sep = ""),
+ col_types = cols(
+ date = col_date(format = ""),
+ node = col_skip(),
+ country = col_character(),
+ transport = col_character(),
+ version = col_skip(),
+ frac = col_double(),
+ low = col_double(),
+ high = col_double()),
+ na = character()) %>%
+ filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
+ filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>%
+ filter(if (!is.null(country_p)) country == country_p else TRUE) %>%
+ select(date, country, transport, low, high, frac) %>%
+ arrange(date, country, transport)
+ }
}
plot_userstats_bridge_combined <- function(start_p, end_p, country_p, path_p) {
@@ -1028,19 +967,7 @@ plot_userstats_bridge_combined <- function(start_p, end_p, country_p, path_p) {
}
}
-write_userstats_bridge_combined <- function(start_p = NULL, end_p = NULL,
- country_p = NULL, path_p) {
- if (!is.null(country_p) && country_p == "all") {
- write_userstats_bridge_country(start_p, end_p, country_p, path_p)
- } else {
- prepare_userstats_bridge_combined(start_p, end_p, country_p) %>%
- select(date, country, transport, low, high, frac) %>%
- arrange(date, country, transport) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
- }
-}
-
-prepare_advbwdist_perc <- function(start_p, end_p, p_p) {
+prepare_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL) {
read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1048,15 +975,18 @@ prepare_advbwdist_perc <- function(start_p, end_p, p_p) {
filter(if (!is.null(p_p)) percentile %in% as.numeric(p_p) else
percentile != "") %>%
transmute(date, percentile = as.factor(percentile),
- variable = ifelse(is.na(isexit), "all", "exits"),
- advbw = advbw * 8 / 1e9)
+ variable = ifelse(isexit == "t", "exits", "all"),
+ advbw = advbw * 8 / 1e9) %>%
+ spread(variable, advbw) %>%
+ rename(p = percentile)
}
plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) {
prepare_advbwdist_perc(start_p, end_p, p_p) %>%
+ gather(variable, advbw, -c(date, p)) %>%
mutate(variable = ifelse(variable == "all", "All relays",
"Exits only")) %>%
- ggplot(aes(x = date, y = advbw, colour = percentile)) +
+ ggplot(aes(x = date, y = advbw, colour = p)) +
facet_grid(variable ~ .) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1069,15 +999,7 @@ plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL,
- path_p) {
- prepare_advbwdist_perc(start_p, end_p, p_p) %>%
- spread(variable, advbw) %>%
- rename(p = percentile) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_advbwdist_relay <- function(start_p, end_p, n_p) {
+prepare_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL) {
read.csv(paste(stats_dir, "advbwdist.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1086,14 +1008,17 @@ prepare_advbwdist_relay <- function(start_p, end_p, n_p) {
relay != "") %>%
transmute(date, relay = as.factor(relay),
variable = ifelse(isexit != "t", "all", "exits"),
- advbw = advbw * 8 / 1e9)
+ advbw = advbw * 8 / 1e9) %>%
+ spread(variable, advbw) %>%
+ rename(n = relay)
}
plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) {
prepare_advbwdist_relay(start_p, end_p, n_p) %>%
+ gather(variable, advbw, -c(date, n)) %>%
mutate(variable = ifelse(variable == "all", "All relays",
"Exits only")) %>%
- ggplot(aes(x = date, y = advbw, colour = relay)) +
+ ggplot(aes(x = date, y = advbw, colour = n)) +
facet_grid(variable ~ .) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1106,15 +1031,7 @@ plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL,
- path_p) {
- prepare_advbwdist_relay(start_p, end_p, n_p) %>%
- spread(variable, advbw) %>%
- rename(n = relay) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_hidserv_dir_onions_seen <- function(start_p, end_p) {
+prepare_hidserv_dir_onions_seen <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1135,13 +1052,7 @@ plot_hidserv_dir_onions_seen <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_hidserv_dir_onions_seen <- function(start_p = NULL, end_p = NULL,
- path_p) {
- prepare_hidserv_dir_onions_seen(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_hidserv_rend_relayed_cells <- function(start_p, end_p) {
+prepare_hidserv_rend_relayed_cells <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "hidserv.csv", sep = ""),
colClasses = c("date" = "Date")) %>%
filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>%
@@ -1164,13 +1075,7 @@ plot_hidserv_rend_relayed_cells <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_hidserv_rend_relayed_cells <- function(start_p = NULL, end_p = NULL,
- path_p) {
- prepare_hidserv_rend_relayed_cells(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb <- function(start_p, end_p) {
+prepare_webstats_tb <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1184,17 +1089,22 @@ prepare_webstats_tb <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tbid", "tbsd", "tbup", "tbur")) %>%
group_by(log_date, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ spread(request_type, count) %>%
+ rename(date = log_date, initial_downloads = tbid,
+ signature_downloads = tbsd, update_pings = tbup,
+ update_requests = tbur)
}
plot_webstats_tb <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tb(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tbid",
- "Signature downloads" = "tbsd",
- "Update pings" = "tbup",
- "Update requests" = "tbur")
- ggplot(d, aes(x = log_date, y = count)) +
+ prepare_webstats_tb(start_p, end_p) %>%
+ gather(request_type, count, -date) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "signature_downloads", "update_pings",
+ "update_requests"),
+ labels = c("Initial downloads", "Signature downloads", "Update pings",
+ "Update requests"))) %>%
+ ggplot(aes(x = date, y = count)) +
geom_point() +
geom_line() +
facet_grid(request_type ~ ., scales = "free_y") +
@@ -1208,16 +1118,7 @@ plot_webstats_tb <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb(start_p, end_p) %>%
- rename(date = log_date) %>%
- spread(request_type, count) %>%
- rename(initial_downloads = tbid, signature_downloads = tbsd,
- update_pings = tbup, update_requests = tbur) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb_platform <- function(start_p, end_p) {
+prepare_webstats_tb_platform <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1231,15 +1132,18 @@ prepare_webstats_tb_platform <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tbid", "tbup")) %>%
group_by(log_date, platform, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ spread(request_type, count, fill = 0) %>%
+ rename(date = log_date, initial_downloads = tbid, update_pings = tbup)
}
plot_webstats_tb_platform <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tb_platform(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tbid",
- "Update pings" = "tbup")
- ggplot(d, aes(x = log_date, y = count, colour = platform)) +
+ prepare_webstats_tb_platform(start_p, end_p) %>%
+ gather(request_type, count, -c(date, platform)) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "update_pings"),
+ labels = c("Initial downloads", "Update pings"))) %>%
+ ggplot(aes(x = date, y = count, colour = platform)) +
geom_point() +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
@@ -1257,15 +1161,7 @@ plot_webstats_tb_platform <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb_platform <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb_platform(start_p, end_p) %>%
- rename(date = log_date) %>%
- spread(request_type, count, fill = 0) %>%
- rename(initial_downloads = tbid, update_pings = tbup) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tb_locale <- function(start_p, end_p) {
+prepare_webstats_tb_locale <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1320,12 +1216,7 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tb_locale(start_p, end_p) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_webstats_tm <- function(start_p, end_p) {
+prepare_webstats_tm <- function(start_p = NULL, end_p = NULL) {
read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
col_types = cols(
log_date = col_date(format = ""),
@@ -1339,15 +1230,19 @@ prepare_webstats_tm <- function(start_p, end_p) {
filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
filter(request_type %in% c("tmid", "tmup")) %>%
group_by(log_date, request_type) %>%
- summarize(count = sum(count))
+ summarize(count = sum(count)) %>%
+ mutate(request_type = factor(request_type, levels = c("tmid", "tmup"))) %>%
+ spread(request_type, count, drop = FALSE) %>%
+ rename(date = log_date, initial_downloads = tmid, update_pings = tmup)
}
plot_webstats_tm <- function(start_p, end_p, path_p) {
- d <- prepare_webstats_tm(start_p, end_p)
- levels(d$request_type) <- list(
- "Initial downloads" = "tmid",
- "Update pings" = "tmup")
- ggplot(d, aes(x = log_date, y = count)) +
+ prepare_webstats_tm(start_p, end_p) %>%
+ gather(request_type, count, -date) %>%
+ mutate(request_type = factor(request_type,
+ levels = c("initial_downloads", "update_pings"),
+ labels = c("Initial downloads", "Update pings"))) %>%
+ ggplot(aes(x = date, y = count)) +
geom_point() +
geom_line() +
facet_grid(request_type ~ ., scales = "free_y") +
@@ -1361,16 +1256,7 @@ plot_webstats_tm <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_webstats_tm <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_webstats_tm(start_p, end_p) %>%
- rename(date = log_date) %>%
- mutate(request_type = factor(request_type, levels = c("tmid", "tmup"))) %>%
- spread(request_type, count, drop = FALSE) %>%
- rename(initial_downloads = tmid, update_pings = tmup) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_relays_ipv6 <- function(start_p, end_p) {
+prepare_relays_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1385,12 +1271,15 @@ prepare_relays_ipv6 <- function(start_p, end_p) {
exiting = sum(server_count_sum_avg[exiting_ipv6_relay == "t"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
gather(total, announced, reachable, exiting, key = "category",
- value = "count")
+ value = "count") %>%
+ rename(date = valid_after_date) %>%
+ spread(category, count)
}
plot_relays_ipv6 <- function(start_p, end_p, path_p) {
prepare_relays_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = count, colour = category)) +
+ gather(category, count, -date) %>%
+ ggplot(aes(x = date, y = count, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1405,14 +1294,7 @@ plot_relays_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_relays_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_relays_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, count) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_bridges_ipv6 <- function(start_p, end_p) {
+prepare_bridges_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1424,12 +1306,13 @@ prepare_bridges_ipv6 <- function(start_p, end_p) {
summarize(total = sum(server_count_sum_avg),
announced = sum(server_count_sum_avg[announced_ipv6 == "t"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
- gather(total, announced, key = "category", value = "count")
+ rename(date = valid_after_date)
}
plot_bridges_ipv6 <- function(start_p, end_p, path_p) {
prepare_bridges_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = count, colour = category)) +
+ gather(category, count, -date) %>%
+ ggplot(aes(x = date, y = count, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1443,14 +1326,7 @@ plot_bridges_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_bridges_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_bridges_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, count) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_advbw_ipv6 <- function(start_p, end_p) {
+prepare_advbw_ipv6 <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""),
colClasses = c("valid_after_date" = "Date")) %>%
filter(if (!is.null(start_p))
@@ -1458,6 +1334,8 @@ prepare_advbw_ipv6 <- function(start_p, end_p) {
filter(if (!is.null(end_p))
valid_after_date <= as.Date(end_p) else TRUE) %>%
filter(server == "relay") %>%
+ mutate(advertised_bandwidth_bytes_sum_avg =
+ advertised_bandwidth_bytes_sum_avg * 8 / 1e9) %>%
group_by(valid_after_date) %>%
summarize(total = sum(advertised_bandwidth_bytes_sum_avg),
total_guard = sum(advertised_bandwidth_bytes_sum_avg[guard_relay != "f"]),
@@ -1469,14 +1347,13 @@ prepare_advbw_ipv6 <- function(start_p, end_p) {
exiting = sum(advertised_bandwidth_bytes_sum_avg[
exiting_ipv6_relay != "f"])) %>%
complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>%
- gather(total, total_guard, total_exit, reachable_guard, reachable_exit,
- exiting, key = "category", value = "advbw") %>%
- mutate(advbw = advbw * 8 / 1e9)
+ rename(date = valid_after_date)
}
plot_advbw_ipv6 <- function(start_p, end_p, path_p) {
prepare_advbw_ipv6(start_p, end_p) %>%
- ggplot(aes(x = valid_after_date, y = advbw, colour = category)) +
+ gather(category, advbw, -date) %>%
+ ggplot(aes(x = date, y = advbw, colour = category)) +
geom_line() +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1494,14 +1371,7 @@ plot_advbw_ipv6 <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_advbw_ipv6 <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_advbw_ipv6(start_p, end_p) %>%
- rename(date = valid_after_date) %>%
- spread(category, advbw) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
-
-prepare_totalcw <- function(start_p, end_p) {
+prepare_totalcw <- function(start_p = NULL, end_p = NULL) {
read.csv(paste(stats_dir, "totalcw.csv", sep = ""),
colClasses = c("valid_after_date" = "Date", "nickname" = "character")) %>%
filter(if (!is.null(start_p))
@@ -1509,7 +1379,9 @@ prepare_totalcw <- function(start_p, end_p) {
filter(if (!is.null(end_p))
valid_after_date <= as.Date(end_p) else TRUE) %>%
group_by(valid_after_date, nickname) %>%
- summarize(measured_sum_avg = sum(measured_sum_avg))
+ summarize(measured_sum_avg = sum(measured_sum_avg)) %>%
+ rename(date = valid_after_date, totalcw = measured_sum_avg) %>%
+ arrange(date, nickname)
}
plot_totalcw <- function(start_p, end_p, path_p) {
@@ -1517,10 +1389,8 @@ plot_totalcw <- function(start_p, end_p, path_p) {
mutate(nickname = ifelse(nickname == "", "consensus", nickname)) %>%
mutate(nickname = factor(nickname,
levels = c("consensus", unique(nickname[nickname != "consensus"])))) %>%
- complete(valid_after_date = full_seq(valid_after_date, period = 1),
- nesting(nickname)) %>%
- ggplot(aes(x = valid_after_date, y = measured_sum_avg,
- colour = nickname)) +
+ complete(date = full_seq(date, period = 1), nesting(nickname)) %>%
+ ggplot(aes(x = date, y = totalcw, colour = nickname)) +
geom_line(na.rm = TRUE) +
scale_x_date(name = "", breaks = custom_breaks,
labels = custom_labels, minor_breaks = custom_minor_breaks) +
@@ -1531,10 +1401,4 @@ plot_totalcw <- function(start_p, end_p, path_p) {
ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
}
-write_totalcw <- function(start_p = NULL, end_p = NULL, path_p) {
- prepare_totalcw(start_p, end_p) %>%
- rename(date = valid_after_date, totalcw = measured_sum_avg) %>%
- arrange(date, nickname) %>%
- write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
-}
diff --git a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
index a529830..6a142e8 100644
--- a/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
+++ b/src/main/java/org/torproject/metrics/web/RObjectGenerator.java
@@ -122,7 +122,7 @@ public class RObjectGenerator implements ServletContextListener {
StringBuilder queryBuilder = new StringBuilder();
queryBuilder.append("robust_call(as.call(list(");
if ("csv".equalsIgnoreCase(fileType)) {
- queryBuilder.append("write_");
+ queryBuilder.append("write_data, prepare_");
/* When we checked parameters above we also put in defaults for missing
* parameters. This is okay for graphs, but we want to support CSV files
* with empty parameters. Using the parameters we got here. */