commit a94a3844644041f7c1f6e0a4451e19ce12cae9e8 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Jan 10 22:32:28 2019 +0100
Switch to readr's read_csv() everywhere. --- src/main/R/rserver/graphs.R | 230 +++++++++++++++++++++++++++++++++----------- 1 file changed, 175 insertions(+), 55 deletions(-)
diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R index 82a51e7..205afbe 100644 --- a/src/main/R/rserver/graphs.R +++ b/src/main/R/rserver/graphs.R @@ -359,8 +359,11 @@ write_data <- function(FUN, ..., path_p) { options(readr.show_progress = FALSE)
prepare_networksize <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "networksize.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "networksize.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + relays = col_double(), + bridges = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) } @@ -416,8 +419,11 @@ plot_versions <- function(start_p, end_p, path_p) { }
prepare_platforms <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "platforms.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "platforms.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + platform = col_factor(levels = NULL), + relays = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% mutate(platform = tolower(platform)) %>% @@ -443,12 +449,19 @@ plot_platforms <- function(start_p, end_p, path_p) { }
prepare_dirbytes <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "bandwidth.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "bandwidth.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + isexit = col_logical(), + isguard = col_logical(), + bwread = col_skip(), + bwwrite = col_skip(), + dirread = col_double(), + dirwrite = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% - filter(isexit == "") %>% - filter(isguard == "") %>% + filter(is.na(isexit)) %>% + filter(is.na(isguard)) %>% mutate(dirread = dirread * 8 / 1e9, dirwrite = dirwrite * 8 / 1e9) %>% select(date, dirread, dirwrite) @@ -473,8 +486,11 @@ plot_dirbytes <- function(start_p, end_p, path_p) { }
prepare_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL) { - read.csv(paste(stats_dir, "relayflags.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "relayflags.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + flag = col_factor(levels = NULL), + relays = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(flag_p)) flag %in% flag_p else TRUE) @@ -483,7 +499,7 @@ prepare_relayflags <- function(start_p = NULL, end_p = NULL, flag_p = NULL) { plot_relayflags <- function(start_p, end_p, flag_p, path_p) { prepare_relayflags(start_p, end_p, flag_p) %>% complete(date = full_seq(date, period = 1), flag = unique(flag)) %>% - ggplot(aes(x = date, y = relays, colour = as.factor(flag))) + + ggplot(aes(x = date, y = relays, colour = flag)) + geom_line() + scale_x_date(name = "", breaks = custom_breaks, labels = custom_labels, minor_breaks = custom_minor_breaks) + @@ -498,8 +514,18 @@ plot_relayflags <- function(start_p, end_p, flag_p, path_p) {
prepare_torperf <- function(start_p = NULL, end_p = NULL, server_p = NULL, filesize_p = NULL) { - read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""), - colClasses = c("date" = "Date", "source" = "character")) %>% + read_csv(file = paste(stats_dir, "torperf-1.1.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + filesize = col_double(), + source = col_character(), + server = col_character(), + q1 = col_double(), + md = col_double(), + q3 = col_double(), + timeouts = col_skip(), + failures = col_skip(), + requests = col_skip())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(server_p)) server == server_p else TRUE) %>% @@ -535,8 +561,18 @@ plot_torperf <- function(start_p, end_p, server_p, filesize_p, path_p) {
prepare_torperf_failures <- function(start_p = NULL, end_p = NULL, server_p = NULL, filesize_p = NULL) { - read.csv(paste(stats_dir, "torperf-1.1.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "torperf-1.1.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + filesize = col_double(), + source = col_character(), + server = col_character(), + q1 = col_skip(), + md = col_skip(), + q3 = col_skip(), + timeouts = col_double(), + failures = col_double(), + requests = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(filesize_p)) @@ -573,8 +609,14 @@ plot_torperf_failures <- function(start_p, end_p, server_p, filesize_p, }
prepare_onionperf_buildtimes <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "buildtimes.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "buildtimes.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + source = col_character(), + position = col_double(), + q1 = col_double(), + md = col_double(), + q3 = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) } @@ -604,8 +646,14 @@ plot_onionperf_buildtimes <- function(start_p, end_p, path_p) {
prepare_onionperf_latencies <- function(start_p = NULL, end_p = NULL, server_p = NULL) { - read.csv(paste(stats_dir, "latencies.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "latencies.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + source = col_character(), + server = col_character(), + q1 = col_double(), + md = col_double(), + q3 = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(server_p)) server == server_p else TRUE) @@ -631,8 +679,12 @@ plot_onionperf_latencies <- function(start_p, end_p, server_p, path_p) { }
prepare_connbidirect <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "connbidirect2.csv", sep = ""), - colClasses = c("date" = "Date", "direction" = "factor")) %>% + read_csv(file = paste(stats_dir, "connbidirect2.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + direction = col_factor(), + quantile = col_double(), + fraction = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% mutate(quantile = paste("X", quantile, sep = ""), @@ -665,19 +717,30 @@ plot_connbidirect <- function(start_p, end_p, path_p) { }
prepare_bandwidth_flags <- function(start_p = NULL, end_p = NULL) { - advbw <- read.csv(paste(stats_dir, "advbw.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + advbw <- read_csv(file = paste(stats_dir, "advbw.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + isexit = col_logical(), + isguard = col_logical(), + advbw = col_double())) %>% transmute(date, have_guard_flag = isguard, have_exit_flag = isexit, variable = "advbw", value = advbw * 8 / 1e9) - bwhist <- read.csv(paste(stats_dir, "bandwidth.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + bwhist <- read_csv(file = paste(stats_dir, "bandwidth.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + isexit = col_logical(), + isguard = col_logical(), + bwread = col_double(), + bwwrite = col_double(), + dirread = col_double(), + dirwrite = col_double())) %>% transmute(date, have_guard_flag = isguard, have_exit_flag = isexit, variable = "bwhist", value = (bwread + bwwrite) * 8 / 2e9) rbind(advbw, bwhist) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% - filter(have_exit_flag != "") %>% - filter(have_guard_flag != "") %>% + filter(!is.na(have_exit_flag)) %>% + filter(!is.na(have_guard_flag)) %>% spread(variable, value) }
@@ -685,7 +748,8 @@ plot_bandwidth_flags <- function(start_p, end_p, path_p) { prepare_bandwidth_flags(start_p, end_p) %>% gather(variable, value, c(advbw, bwhist)) %>% unite(flags, have_guard_flag, have_exit_flag) %>% - mutate(flags = factor(flags, levels = c("f_t", "t_t", "t_f", "f_f"), + mutate(flags = factor(flags, + levels = c("FALSE_TRUE", "TRUE_TRUE", "TRUE_FALSE", "FALSE_FALSE"), labels = c("Exit only", "Guard and Exit", "Guard only", "Neither Guard nor Exit"))) %>% mutate(variable = ifelse(variable == "advbw", @@ -968,14 +1032,19 @@ plot_userstats_bridge_combined <- function(start_p, end_p, country_p, path_p) { }
prepare_advbwdist_perc <- function(start_p = NULL, end_p = NULL, p_p = NULL) { - read.csv(paste(stats_dir, "advbwdist.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "advbwdist.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + isexit = col_logical(), + relay = col_skip(), + percentile = col_integer(), + advbw = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(p_p)) percentile %in% as.numeric(p_p) else percentile != "") %>% transmute(date, percentile = as.factor(percentile), - variable = ifelse(isexit == "t", "exits", "all"), + variable = ifelse(is.na(isexit), "all", "exits"), advbw = advbw * 8 / 1e9) %>% spread(variable, advbw) %>% rename(p = percentile) @@ -1000,14 +1069,19 @@ plot_advbwdist_perc <- function(start_p, end_p, p_p, path_p) { }
prepare_advbwdist_relay <- function(start_p = NULL, end_p = NULL, n_p = NULL) { - read.csv(paste(stats_dir, "advbwdist.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "advbwdist.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + isexit = col_logical(), + relay = col_integer(), + percentile = col_skip(), + advbw = col_double())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(if (!is.null(n_p)) relay %in% as.numeric(n_p) else relay != "") %>% transmute(date, relay = as.factor(relay), - variable = ifelse(isexit != "t", "all", "exits"), + variable = ifelse(is.na(isexit), "all", "exits"), advbw = advbw * 8 / 1e9) %>% spread(variable, advbw) %>% rename(n = relay) @@ -1032,8 +1106,15 @@ plot_advbwdist_relay <- function(start_p, end_p, n_p, path_p) { }
prepare_hidserv_dir_onions_seen <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "hidserv.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "hidserv.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + type = col_factor(), + wmean = col_skip(), + wmedian = col_skip(), + wiqm = col_double(), + frac = col_double(), + stats = col_skip())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(type == "dir-onions-seen") %>% @@ -1053,8 +1134,15 @@ plot_hidserv_dir_onions_seen <- function(start_p, end_p, path_p) { }
prepare_hidserv_rend_relayed_cells <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "hidserv.csv", sep = ""), - colClasses = c("date" = "Date")) %>% + read_csv(file = paste(stats_dir, "hidserv.csv", sep = ""), + col_types = cols( + date = col_date(format = ""), + type = col_factor(), + wmean = col_skip(), + wmedian = col_skip(), + wiqm = col_double(), + frac = col_double(), + stats = col_skip())) %>% filter(if (!is.null(start_p)) date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) date <= as.Date(end_p) else TRUE) %>% filter(type == "rend-relayed-cells") %>% @@ -1257,8 +1345,17 @@ plot_webstats_tm <- function(start_p, end_p, path_p) { }
prepare_relays_ipv6 <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""), - colClasses = c("valid_after_date" = "Date")) %>% + read_csv(file = paste(stats_dir, "ipv6servers.csv", sep = ""), + col_types = cols( + valid_after_date = col_date(format = ""), + server = col_factor(), + guard_relay = col_skip(), + exit_relay = col_skip(), + announced_ipv6 = col_logical(), + exiting_ipv6_relay = col_logical(), + reachable_ipv6_relay = col_logical(), + server_count_sum_avg = col_double(), + advertised_bandwidth_bytes_sum_avg = col_skip())) %>% filter(if (!is.null(start_p)) valid_after_date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) @@ -1266,9 +1363,9 @@ prepare_relays_ipv6 <- function(start_p = NULL, end_p = NULL) { filter(server == "relay") %>% group_by(valid_after_date) %>% summarize(total = sum(server_count_sum_avg), - announced = sum(server_count_sum_avg[announced_ipv6 == "t"]), - reachable = sum(server_count_sum_avg[reachable_ipv6_relay == "t"]), - exiting = sum(server_count_sum_avg[exiting_ipv6_relay == "t"])) %>% + announced = sum(server_count_sum_avg[announced_ipv6]), + reachable = sum(server_count_sum_avg[reachable_ipv6_relay]), + exiting = sum(server_count_sum_avg[exiting_ipv6_relay])) %>% complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>% gather(total, announced, reachable, exiting, key = "category", value = "count") %>% @@ -1295,8 +1392,17 @@ plot_relays_ipv6 <- function(start_p, end_p, path_p) { }
prepare_bridges_ipv6 <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""), - colClasses = c("valid_after_date" = "Date")) %>% + read_csv(file = paste(stats_dir, "ipv6servers.csv", sep = ""), + col_types = cols( + valid_after_date = col_date(format = ""), + server = col_factor(), + guard_relay = col_skip(), + exit_relay = col_skip(), + announced_ipv6 = col_logical(), + exiting_ipv6_relay = col_skip(), + reachable_ipv6_relay = col_skip(), + server_count_sum_avg = col_double(), + advertised_bandwidth_bytes_sum_avg = col_skip())) %>% filter(if (!is.null(start_p)) valid_after_date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) @@ -1304,7 +1410,7 @@ prepare_bridges_ipv6 <- function(start_p = NULL, end_p = NULL) { filter(server == "bridge") %>% group_by(valid_after_date) %>% summarize(total = sum(server_count_sum_avg), - announced = sum(server_count_sum_avg[announced_ipv6 == "t"])) %>% + announced = sum(server_count_sum_avg[announced_ipv6])) %>% complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>% rename(date = valid_after_date) } @@ -1327,8 +1433,17 @@ plot_bridges_ipv6 <- function(start_p, end_p, path_p) { }
prepare_advbw_ipv6 <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "ipv6servers.csv", sep = ""), - colClasses = c("valid_after_date" = "Date")) %>% + read_csv(file = paste(stats_dir, "ipv6servers.csv", sep = ""), + col_types = cols( + valid_after_date = col_date(format = ""), + server = col_factor(), + guard_relay = col_logical(), + exit_relay = col_logical(), + announced_ipv6 = col_logical(), + exiting_ipv6_relay = col_logical(), + reachable_ipv6_relay = col_logical(), + server_count_sum_avg = col_skip(), + advertised_bandwidth_bytes_sum_avg = col_double())) %>% filter(if (!is.null(start_p)) valid_after_date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p)) @@ -1338,14 +1453,14 @@ prepare_advbw_ipv6 <- function(start_p = NULL, end_p = NULL) { advertised_bandwidth_bytes_sum_avg * 8 / 1e9) %>% group_by(valid_after_date) %>% summarize(total = sum(advertised_bandwidth_bytes_sum_avg), - total_guard = sum(advertised_bandwidth_bytes_sum_avg[guard_relay != "f"]), - total_exit = sum(advertised_bandwidth_bytes_sum_avg[exit_relay != "f"]), + total_guard = sum(advertised_bandwidth_bytes_sum_avg[guard_relay]), + total_exit = sum(advertised_bandwidth_bytes_sum_avg[exit_relay]), reachable_guard = sum(advertised_bandwidth_bytes_sum_avg[ - reachable_ipv6_relay != "f" & guard_relay != "f"]), + reachable_ipv6_relay & guard_relay]), reachable_exit = sum(advertised_bandwidth_bytes_sum_avg[ - reachable_ipv6_relay != "f" & exit_relay != "f"]), + reachable_ipv6_relay & exit_relay]), exiting = sum(advertised_bandwidth_bytes_sum_avg[ - exiting_ipv6_relay != "f"])) %>% + exiting_ipv6_relay])) %>% complete(valid_after_date = full_seq(valid_after_date, period = 1)) %>% rename(date = valid_after_date) } @@ -1372,8 +1487,13 @@ plot_advbw_ipv6 <- function(start_p, end_p, path_p) { }
prepare_totalcw <- function(start_p = NULL, end_p = NULL) { - read.csv(paste(stats_dir, "totalcw.csv", sep = ""), - colClasses = c("valid_after_date" = "Date", "nickname" = "character")) %>% + read_csv(file = paste(stats_dir, "totalcw.csv", sep = ""), + col_types = cols( + valid_after_date = col_date(format = ""), + nickname = col_character(), + have_guard_flag = col_logical(), + have_exit_flag = col_logical(), + measured_sum_avg = col_double())) %>% filter(if (!is.null(start_p)) valid_after_date >= as.Date(start_p) else TRUE) %>% filter(if (!is.null(end_p))
tor-commits@lists.torproject.org