[tor-commits] [metrics-tasks/master] Add code for second #3338 graph.

karsten at torproject.org karsten at torproject.org
Fri Jun 24 18:41:21 UTC 2011


commit 4d17699bf8450a9d6d8aff2e8eb94b935b73df6b
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Jun 24 20:40:10 2011 +0200

    Add code for second #3338 graph.
---
 task-3338/.gitignore           |    2 +
 task-3338/README               |    2 +-
 task-3338/dir-bytes-estimate.R |   63 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletions(-)

diff --git a/task-3338/.gitignore b/task-3338/.gitignore
new file mode 100644
index 0000000..bd98a73
--- /dev/null
+++ b/task-3338/.gitignore
@@ -0,0 +1,2 @@
+*.pdf
+
diff --git a/task-3338/README b/task-3338/README
index 4db2c65..85fda1b 100644
--- a/task-3338/README
+++ b/task-3338/README
@@ -16,5 +16,5 @@ and run daily-users.R.
 
 2. Plot directory bytes estimate
 
-(TODO)
+  $ R --slave -f dir-bytes-estimate.R
 
diff --git a/task-3338/dir-bytes-estimate.R b/task-3338/dir-bytes-estimate.R
new file mode 100644
index 0000000..de74dcf
--- /dev/null
+++ b/task-3338/dir-bytes-estimate.R
@@ -0,0 +1,63 @@
+library("RPostgreSQL")
+library("DBI")
+library("ggplot2")
+library("proto")
+library("grid")
+library("reshape")
+library("plyr")
+library("digest")
+
+db = "tordir"
+dbuser = "metrics"
+dbpassword= "password" ###### <- put in real password here!
+
+plot_dirbytes <- function(start, end, path, dpi) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, r, bwp, brp, bwn, brn, bwr, brr, dw, bwd ",
+      "FROM user_stats WHERE date >= '", start, "' AND date <= '", end,
+      "' AND date < (SELECT MAX(date) FROM user_stats) - 1 ",
+      "AND country = 'zy' ORDER BY date", sep = "")
+  rs <- dbSendQuery(con, q)
+  u <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  u <- data.frame(date = u$date,
+       estimated = floor((u$bwp * u$brn / u$bwn - u$brp) / 86400),
+       estimatedreq = floor((u$bwr * u$brn / u$bwn - u$brr) / 86400),
+       extrapolated = floor(u$dw * u$bwp / u$bwd / 86400))
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by="1 day")
+  missing <- setdiff(dates, u$date)
+  if (length(missing) > 0)
+    u <- rbind(u,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        estimated = NA, estimatedreq = NA, extrapolated = NA))
+  u <- melt(u, id.vars = 1)
+  highest <- u[u$date %in% as.Date(c("2011-04-10", "2011-04-17",
+    "2011-04-24", "2011-05-29")) & u$variable == "estimatedreq", ]
+  date_breaks <- date_breaks(
+    as.numeric(max(as.Date(u$date, "%Y-%m-%d")) -
+    min(as.Date(u$date, "%Y-%m-%d"))))
+  ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = value / 2^20,
+    colour = variable)) +
+    geom_line() +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = "")) +
+    scale_y_continuous(name = "",
+      limits = c(0, max(u$value, na.rm = TRUE) / 2^20)) +
+    scale_colour_hue(name = "",
+      breaks = c("extrapolated", "estimated", "estimatedreq"),
+      labels = c(paste("extrapolated from reported directory bytes (all",
+               "directory mirrors)"),
+               "estimated from total bytes (all directory mirrors)",
+               paste("estimated from total bytes (only directory mirrors",
+               "reporting stats)"))) +
+    geom_point(data = highest, size = 3, colour = alpha("purple", 0.35)) +
+    opts(title = paste("Estimated vs. extrapolated written directory",
+                       "bytes in MiB/s per day"),
+      legend.position = "top")
+  ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
+}
+plot_dirbytes("2009-08-19", "2011-06-23", "dir-bytes-estimate.pdf", 300)
+



More information about the tor-commits mailing list