[or-cvs] [metrics-web/master] Make directory bytes estimate more accurate.

karsten at torproject.org karsten at torproject.org
Thu Dec 16 20:59:57 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu, 16 Dec 2010 21:48:04 +0100
Subject: Make directory bytes estimate more accurate.
Commit: 7faa0ff04b34ff0c7a3f7bdf185e5fe4bf58da28

We estimated directory bytes by weighting observed directory bytes with
the wrong factor.  We used the quotient of total written bytes in the
network divided by total bytes written by directory mirrors reporting
directory bytes.  We need to restrict our view to directory mirrors and
ignore all non-directory-mirrors.

I'm also removing total bytes from the graph.  There's a graph on total
bytes, and adding these two metrics with rather different scales to the
same graph is not very helpful.
---
 rserve/csv.R    |   16 +++++++++-------
 rserve/graphs.R |   26 ++++++++++++++------------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/rserve/csv.R b/rserve/csv.R
index c0045c9..6b5bbcc 100644
--- a/rserve/csv.R
+++ b/rserve/csv.R
@@ -81,16 +81,18 @@ export_bandwidth <- function(path) {
 export_dirbytes <- function(path) {
   drv <- dbDriver("PostgreSQL")
   con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
-  q <- paste("SELECT date, read / 86400 AS read,",
-      "written / 86400 AS written, dirread / 86400 AS dirread,",
-      "dirwritten / 86400 AS dirwritten FROM total_bwhist",
-      "WHERE date < (SELECT MAX(date) FROM total_bwhist) - 1",
-      "ORDER BY date")
+  q <- paste("SELECT date, dr, dw, brp, bwp, brd, bwd FROM user_stats",
+      "WHERE country = 'zy' AND bwp / bwd <= 3",
+      "AND date < (SELECT MAX(date) FROM user_stats) - 1 ORDER BY date")
   rs <- dbSendQuery(con, q)
-  bw_hist <- fetch(rs, n = -1)
+  dir <- fetch(rs, n = -1)
   dbDisconnect(con)
   dbUnloadDriver(drv)
-  write.csv(bw_hist, path, quote = FALSE, row.names = FALSE)
+  dir <- data.frame(date = dir$date,
+      dirread = floor(dir$dr * dir$brp / dir$brd / 86400),
+      dirwrite = floor(dir$dw * dir$bwp / dir$bwd / 86400))
+  dir <- na.omit(dir)
+  write.csv(dir, path, quote = FALSE, row.names = FALSE)
 }
 
 export_relayflags <- function(path) {
diff --git a/rserve/graphs.R b/rserve/graphs.R
index 9da1731..043ad54 100644
--- a/rserve/graphs.R
+++ b/rserve/graphs.R
@@ -128,26 +128,28 @@ plot_bandwidth <- function(start, end, path) {
 plot_dirbytes <- function(start, end, path) {
   drv <- dbDriver("PostgreSQL")
   con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
-  q <- paste("SELECT date, read, written, dirread, dirwritten ",
-      "FROM total_bwhist WHERE date >= '", start, "' AND date <= '", end,
-      "' AND date < (SELECT MAX(date) FROM total_bwhist) - 1 ", sep = "")
+  q <- paste("SELECT date, dr, dw, brp, bwp, brd, bwd FROM user_stats",
+      "WHERE country = 'zy' AND bwp / bwd <= 3",
+      "AND date < (SELECT MAX(date) FROM user_stats) - 1 ORDER BY date")
   rs <- dbSendQuery(con, q)
-  bw_hist <- fetch(rs, n = -1)
+  dir <- fetch(rs, n = -1)
   dbDisconnect(con)
   dbUnloadDriver(drv)
-  bw_hist <- melt(bw_hist, id = "date")
-  ggplot(bw_hist, aes(x = as.Date(date, "%Y-%m-%d"), y = value /
-      (86400 * 2^20), colour = variable)) +
+  dir <- data.frame(date = dir$date,
+      dirread = floor(dir$dr * dir$brp / dir$brd / 86400),
+      dirwrite = floor(dir$dw * dir$bwp / dir$bwd / 86400))
+  dir <- na.omit(dir)
+  dir <- melt(dir, id = "date")
+  ggplot(dir, aes(x = as.Date(date, "%Y-%m-%d"), y = value / 2^20,
+      colour = variable)) +
     geom_line(size = 1) +
     scale_x_date(name = paste("\nThe Tor Project - ",
         "https://metrics.torproject.org/", sep = "")) +
     scale_y_continuous(name="Bandwidth (MiB/s)",
-        limits = c(0, max(bw_hist$value, na.rm = TRUE) /
-        (86400 * 2^20))) +
+        limits = c(0, max(dir$value, na.rm = TRUE) / 2^20)) +
     scale_colour_hue(name = "",
-        breaks = c("written", "read", "dirwritten", "dirread"),
-        labels = c("Total written bytes", "Total read bytes",
-            "Written dir bytes", "Read dir bytes")) +
+        breaks = c("dirwrite", "dirread"),
+        labels = c("Written dir bytes", "Read dir bytes")) +
     opts(title = "Number of bytes spent on answering directory requests",
         legend.position = "top")
   ggsave(filename = path, width = 8, height = 5, dpi = 72)
-- 
1.7.1



More information about the tor-commits mailing list