[tor-commits] [metrics-tasks/master] Add graph to figure out spikes in user numbers.

karsten at torproject.org karsten at torproject.org
Thu Jun 23 15:26:18 UTC 2011


commit 04a508dbd179ccb7895720ca0ea0f8964a44a26b
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Jun 23 17:25:48 2011 +0200

    Add graph to figure out spikes in user numbers.
---
 task-3338/README        |   20 ++++++++++++++++
 task-3338/daily-users.R |   57 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 0 deletions(-)

diff --git a/task-3338/README b/task-3338/README
new file mode 100644
index 0000000..4db2c65
--- /dev/null
+++ b/task-3338/README
@@ -0,0 +1,20 @@
+Ticket: Find out why our user graphs have huge spikes in Q2/2011
+================================================================
+
+1. Plot single variables of daily user estimate
+
+- reported directory requests
+- ratio of written directory bytes of the directory mirrors reporting
+  directory requests compared to all written directory bytes
+- daily users
+
+On a machine with the metrics database from metrics-web installed, edit
+and run daily-users.R.
+
+  $ R --slave -f daily-users.R
+
+
+2. Plot directory bytes estimate
+
+(TODO)
+
diff --git a/task-3338/daily-users.R b/task-3338/daily-users.R
new file mode 100644
index 0000000..58921cc
--- /dev/null
+++ b/task-3338/daily-users.R
@@ -0,0 +1,57 @@
+library("RPostgreSQL")
+library("DBI")
+library("ggplot2")
+library("proto")
+library("grid")
+library("reshape")
+library("plyr")
+library("digest")
+
+db = "tordir"
+dbuser = "metrics"
+dbpassword= "password" ###### <- put in real password here!
+
+plot_dirbytes <- function(start, end, path, dpi) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) 
+  q <- paste("SELECT date, r, bwp, brp, bwn, brn, bwr, brr "
+      "FROM user_stats WHERE date >= '", start, "' AND date <= '", end,
+      "' AND date < (SELECT MAX(date) FROM user_stats) - 1 ",
+      "AND country = 'zy' ORDER BY date", sep = "")
+  rs <- dbSendQuery(con, q)
+  u <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  u <- data.frame(date = u$date,
+       requests = u$r,
+       fraction = (u$bwr * u$brn / u$bwn - u$brr) /
+                (u$bwp * u$brn / u$bwn - u$brp),
+       users = u$r * (u$bwp * u$brn / u$bwn - u$brp) /
+               (u$bwr * u$brn / u$bwn - u$brr) / 10)
+  highest <- u[u$date %in% as.Date(c("2011-04-10", "2011-04-17",
+                                     "2011-04-24", "2011-05-29")), ]
+  highest <- melt(highest, id.vars = 1)
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by="1 day")
+  missing <- setdiff(dates, u$date)
+  if (length(missing) > 0)
+    u <- rbind(u,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        requests = NA, fraction = NA, users = NA))
+  u <- melt(u, id.vars = 1)
+  date_breaks <- date_breaks(
+    as.numeric(max(as.Date(u$date, "%Y-%m-%d")) -
+    min(as.Date(u$date, "%Y-%m-%d"))))
+  ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = value)) +
+    geom_line() +
+    facet_grid(variable ~ ., scales = "free_y") +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = "")) +
+    scale_y_continuous(name = "") +
+    geom_point(data = highest, size = 3, colour = alpha("red", 0.5)) +
+    opts(title = paste("Estimating directly connecting users from all",
+                       "countries\n(users = requests / fraction / 10)\n"))
+  ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
+}
+plot_dirbytes("2011-01-01", "2011-06-23", "daily-users.pdf", 300)
+



More information about the tor-commits mailing list