commit 4d17699bf8450a9d6d8aff2e8eb94b935b73df6b Author: Karsten Loesing karsten.loesing@gmx.net Date: Fri Jun 24 20:40:10 2011 +0200
Add code for second #3338 graph. --- task-3338/.gitignore | 2 + task-3338/README | 2 +- task-3338/dir-bytes-estimate.R | 63 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletions(-)
diff --git a/task-3338/.gitignore b/task-3338/.gitignore new file mode 100644 index 0000000..bd98a73 --- /dev/null +++ b/task-3338/.gitignore @@ -0,0 +1,2 @@ +*.pdf + diff --git a/task-3338/README b/task-3338/README index 4db2c65..85fda1b 100644 --- a/task-3338/README +++ b/task-3338/README @@ -16,5 +16,5 @@ and run daily-users.R.
2. Plot directory bytes estimate
-(TODO) + $ R --slave -f dir-bytes-estimate.R
diff --git a/task-3338/dir-bytes-estimate.R b/task-3338/dir-bytes-estimate.R new file mode 100644 index 0000000..de74dcf --- /dev/null +++ b/task-3338/dir-bytes-estimate.R @@ -0,0 +1,63 @@ +library("RPostgreSQL") +library("DBI") +library("ggplot2") +library("proto") +library("grid") +library("reshape") +library("plyr") +library("digest") + +db = "tordir" +dbuser = "metrics" +dbpassword= "password" ###### <- put in real password here! + +plot_dirbytes <- function(start, end, path, dpi) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + q <- paste("SELECT date, r, bwp, brp, bwn, brn, bwr, brr, dw, bwd ", + "FROM user_stats WHERE date >= '", start, "' AND date <= '", end, + "' AND date < (SELECT MAX(date) FROM user_stats) - 1 ", + "AND country = 'zy' ORDER BY date", sep = "") + rs <- dbSendQuery(con, q) + u <- fetch(rs, n = -1) + dbDisconnect(con) + dbUnloadDriver(drv) + u <- data.frame(date = u$date, + estimated = floor((u$bwp * u$brn / u$bwn - u$brp) / 86400), + estimatedreq = floor((u$bwr * u$brn / u$bwn - u$brr) / 86400), + extrapolated = floor(u$dw * u$bwp / u$bwd / 86400)) + dates <- seq(from = as.Date(start, "%Y-%m-%d"), + to = as.Date(end, "%Y-%m-%d"), by="1 day") + missing <- setdiff(dates, u$date) + if (length(missing) > 0) + u <- rbind(u, + data.frame(date = as.Date(missing, origin = "1970-01-01"), + estimated = NA, estimatedreq = NA, extrapolated = NA)) + u <- melt(u, id.vars = 1) + highest <- u[u$date %in% as.Date(c("2011-04-10", "2011-04-17", + "2011-04-24", "2011-05-29")) & u$variable == "estimatedreq", ] + date_breaks <- date_breaks( + as.numeric(max(as.Date(u$date, "%Y-%m-%d")) - + min(as.Date(u$date, "%Y-%m-%d")))) + ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = value / 2^20, + colour = variable)) + + geom_line() + + scale_x_date(name = paste("\nThe Tor Project - ", + "https://metrics.torproject.org/", sep = "")) + + scale_y_continuous(name = "", + limits = c(0, max(u$value, na.rm = TRUE) / 2^20)) + + scale_colour_hue(name = "", + breaks = c("extrapolated", "estimated", "estimatedreq"), + labels = c(paste("extrapolated from reported directory bytes (all", + "directory mirrors)"), + "estimated from total bytes (all directory mirrors)", + paste("estimated from total bytes (only directory mirrors", + "reporting stats)"))) + + geom_point(data = highest, size = 3, colour = alpha("purple", 0.35)) + + opts(title = paste("Estimated vs. extrapolated written directory", + "bytes in MiB/s per day"), + legend.position = "top") + ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi)) +} +plot_dirbytes("2009-08-19", "2011-06-23", "dir-bytes-estimate.pdf", 300) +