[tor-commits] [metrics-web/release] Simplify plot_webstats_tb_locale function.

karsten at torproject.org karsten at torproject.org
Sat Nov 9 21:45:06 UTC 2019


commit 2b34cd2023a3e59057f4274afb0d7b8163282a18
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Jan 10 10:41:48 2019 +0100

    Simplify plot_webstats_tb_locale function.
---
 src/main/R/rserver/graphs.R | 61 ++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/src/main/R/rserver/graphs.R b/src/main/R/rserver/graphs.R
index ba8862c..27f399d 100644
--- a/src/main/R/rserver/graphs.R
+++ b/src/main/R/rserver/graphs.R
@@ -1265,8 +1265,8 @@ write_webstats_tb_platform <- function(start_p = NULL, end_p = NULL, path_p) {
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 
-plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
-  d <- read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
+prepare_webstats_tb_locale <- function(start_p, end_p) {
+  read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
       col_types = cols(
         log_date = col_date(format = ""),
         request_type = col_factor(),
@@ -1274,20 +1274,35 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
         channel = col_skip(),
         locale = col_factor(),
         incremental = col_skip(),
-        count = col_double()))
-  d <- d[d$log_date >= start_p & d$log_date <= end_p &
-         d$request_type %in% c("tbid", "tbup"), ]
-  levels(d$request_type) <- list(
-      "Initial downloads" = "tbid",
-      "Update pings" = "tbup")
+        count = col_double())) %>%
+    filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
+    filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
+    filter(request_type %in% c("tbid", "tbup")) %>%
+    rename(date = log_date) %>%
+    group_by(date, locale, request_type) %>%
+    summarize(count = sum(count)) %>%
+    mutate(request_type = factor(request_type, levels = c("tbid", "tbup"))) %>%
+    spread(request_type, count, fill = 0) %>%
+    rename(initial_downloads = tbid, update_pings = tbup)
+}
+
+plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
+  d <- prepare_webstats_tb_locale(start_p, end_p) %>%
+    gather(request_type, count, -c(date, locale)) %>%
+    mutate(request_type = factor(request_type,
+      levels = c("initial_downloads", "update_pings"),
+      labels = c("Initial downloads", "Update pings")))
   e <- d
   e <- aggregate(list(count = e$count), by = list(locale = e$locale), FUN = sum)
   e <- e[order(e$count, decreasing = TRUE), ]
   e <- e[1:5, ]
-  d <- aggregate(list(count = d$count), by = list(log_date = d$log_date,
+  d <- aggregate(list(count = d$count), by = list(date = d$date,
     request_type = d$request_type,
     locale = ifelse(d$locale %in% e$locale, d$locale, "(other)")), FUN = sum)
-  ggplot(d, aes(x = log_date, y = count, colour = locale)) +
+  d %>%
+    complete(date = full_seq(date, period = 1),
+      nesting(locale, request_type)) %>%
+    ggplot(aes(x = date, y = count, colour = locale)) +
     geom_point() +
     geom_line() +
     scale_x_date(name = "", breaks = custom_breaks,
@@ -1295,7 +1310,7 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
     scale_y_continuous(name = "", labels = formatter, limits = c(0, NA)) +
     scale_colour_hue(name = "Locale",
         breaks = c(e$locale, "(other)"),
-        labels = c(e$locale, "Other")) +
+        labels = c(as.character(e$locale), "Other")) +
     facet_grid(request_type ~ ., scales = "free_y") +
     theme(strip.text.y = element_text(angle = 0, hjust = 0, size = rel(1.5)),
           strip.background = element_rect(fill = NA),
@@ -1305,30 +1320,8 @@ plot_webstats_tb_locale <- function(start_p, end_p, path_p) {
   ggsave(filename = path_p, width = 8, height = 5, dpi = 150)
 }
 
-# Ideally, this function would share code with plot_webstats_tb_locale
-# by using a common prepare_webstats_tb_locale function. This just
-# turned out to be a bit harder than for other functions, because
-# plot_webstats_tb_locale needs the preliminary data frame e for its
-# breaks and labels. Left as future work.
 write_webstats_tb_locale <- function(start_p = NULL, end_p = NULL, path_p) {
-  read_csv(file = paste(stats_dir, "webstats.csv", sep = ""),
-      col_types = cols(
-        log_date = col_date(format = ""),
-        request_type = col_factor(),
-        platform = col_skip(),
-        channel = col_skip(),
-        locale = col_factor(),
-        incremental = col_skip(),
-        count = col_double())) %>%
-    filter(if (!is.null(start_p)) log_date >= as.Date(start_p) else TRUE) %>%
-    filter(if (!is.null(end_p)) log_date <= as.Date(end_p) else TRUE) %>%
-    filter(request_type %in% c("tbid", "tbup")) %>%
-    rename(date = log_date) %>%
-    group_by(date, locale, request_type) %>%
-    summarize(count = sum(count)) %>%
-    mutate(request_type = factor(request_type, levels = c("tbid", "tbup"))) %>%
-    spread(request_type, count, fill = 0) %>%
-    rename(initial_downloads = tbid, update_pings = tbup) %>%
+  prepare_webstats_tb_locale(start_p, end_p) %>%
     write.csv(path_p, quote = FALSE, row.names = FALSE, na = "")
 }
 





More information about the tor-commits mailing list