[or-cvs] [ernie/master] Add detailed documentation to one of the R scripts.

karsten at torproject.org karsten at torproject.org
Sat Jun 5 09:45:03 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Sat, 5 Jun 2010 11:43:29 +0200
Subject: Add detailed documentation to one of the R scripts.
Commit: d790e9b4620969e6549a1098ef8dd3500aad724d

---
 R/descriptor-stats.R |   78 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/R/descriptor-stats.R b/R/descriptor-stats.R
index 9cb25a8..3ee5d86 100644
--- a/R/descriptor-stats.R
+++ b/R/descriptor-stats.R
@@ -1,22 +1,74 @@
+# R script to plot relay versions, platforms, and advertised bandwidth.
+# Run from ERNIE's base directory as "R --slave < R/descriptor.stats.R".
+
+# Suppress all warnings, so that only errors are written to stdout. This
+# is useful when executing this script from cron and having it mail out a
+# notification only when there's an actual problem.
 options(warn = -1)
+
+# Import library ggplot2 that is used for plotting. Suppress package
+# startup messages for the same reason as suppressing warnings.
 suppressPackageStartupMessages(library("ggplot2"))
 
+# Define a function to plot relay versions. Right now, there are no
+# parameters for this function. In the future, a possible parameter would
+# be the time interval to be plotted on the x axis.
 plot_versions <- function() {
+
+  # Transform data frame versions into a data frame that can be processed
+  # by ggplot2. In particular, versions has one row per date and multiple
+  # columns for the number of relays running a particular Tor version at
+  # that date. What we need for plotting is a single data point per row
+  # with additional columns for classification, e.g., which version this
+  # date point belongs to. Add commands "print(versions)" and "print(v)"
+  # for an example.
   v <- melt(versions, id = "date")
-  ggplot(v, aes(x = date, y = value, colour = variable)) +
+
+  # Start plotting the data in data frame v.
+  ggplot(v,
+
+    # Tell ggplot2 how to understand the data in data frame v. The date
+    # shall be plotted on the x axis, the value on the y axis, and the
+    # row called variable shall be used to distinguish data sets by color.
+    aes(x = date, y = value, colour = variable)) +
+
+    # So far, ggplot2 only knows how to understand the data, but not how
+    # to visualize them. Draw a line from the data with line size 1.
     geom_line(size = 1) +
-    scale_x_date(name = "") + scale_y_continuous(name = "",
+
+    # Override the default x axis which would display a label "date" with
+    # an x axis that has no label. This line can be commented out.
+    scale_x_date(name = "") +
+
+    # Override the default y axis with label "value" with one that has no
+    # label and that starts at the origin. Note that the max() function is
+    # told to remove NA values. These lines can be commented out.
+    scale_y_continuous(name = "",
         limits = c(0, max(v$value, na.rm = TRUE))) +
+
+    # Override the categorization by relay version to use a different
+    # color scheme (brewer instead of hue), have a different legend title
+    # ("Tor versions" instead of "variable") and display custom legend
+    # labels ("0.2.2" instead of "X0.2.2"). These lines can be commented
+    # out.
     scale_colour_brewer(name = "Tor version",
         breaks = rev(names(versions)[2:length(names(versions))]),
         labels = c("other",
             substr(rev(names(versions)[2:(length(names(versions)) - 1)]),
             2, 6))) +
+
+    # Add a graph title. This line can be commented out together with the
+    # '+' character in the last non-comment line.
     opts(title = "Relay versions\n")
+
+  # Save the generated graph to the following path with given width,
+  # height, and resolution.
   ggsave(filename = "website/graphs/descriptors/versions.png",
-    width = 8, height = 5, dpi = 72)
+      width = 8, height = 5, dpi = 72)
 }
 
+# Define a function to plot relay platforms. See the similar function
+# plot_versions() for details.
 plot_platforms <- function() {
   p <- melt(platforms, id = "date")
   ggplot(p, aes(x = date, y = value, colour = variable)) +
@@ -28,9 +80,11 @@ plot_platforms <- function() {
         labels = rev(names(platforms)[2:length(names(platforms))])) +
     opts(title = "Relay platforms\n")
   ggsave(filename = "website/graphs/descriptors/platforms.png",
-    width = 8, height = 5, dpi = 72)
+      width = 8, height = 5, dpi = 72)
 }
 
+# Define a function to plot advertised bandwidth. See the similar function
+# plot_versions() for details.
 plot_bandwidth <- function() {
   ggplot(bandwidth, aes(x = date, y = advbw / 1024)) + geom_line() +
     scale_x_date(name = "") +
@@ -38,17 +92,29 @@ plot_bandwidth <- function() {
         limits = c(0, max(bandwidth$advbw / 1024, na.rm = TRUE))) +
     opts(title = "Total advertised bandwidth\n")
   ggsave(filename = "website/graphs/descriptors/bandwidth.png",
-    width = 8, height = 5, dpi = 72)
+      width = 8, height = 5, dpi = 72)
 }
 
+# If a CSV file with version data exists, ...
 if (file.exists("stats/version-stats")) {
+
+  # Read in the file, declare that the first line has the column names,
+  # and define the type of the first column as Date.
   versions <- read.csv("stats/version-stats", header = TRUE,
       colClasses = c(date = "Date"))
+
+  # Write the same data to disk without putting in quotes around strings
+  # and without adding row numbers. This file can be downloaded by others
+  # to run their own evaluations.
   write.csv(versions, "website/csv/versions.csv", quote = FALSE,
     row.names = FALSE)
+
+  # Call the function defined above to plot relay versions.
   plot_versions()
 }
 
+# If a CSV file with platform data exists, read it, copy it to the
+# website, and plot a platform graph.
 if (file.exists("stats/platform-stats")) {
   platforms <- read.csv("stats/platform-stats", header = TRUE,
       colClasses = c(date = "Date"))
@@ -57,6 +123,8 @@ if (file.exists("stats/platform-stats")) {
   plot_platforms()
 }
 
+# If a CSV file with bandwidth data exists, read it, copy it to the
+# website, and plot a bandwidth graph.
 if (file.exists("stats/bandwidth-stats")) {
   bandwidth <- read.csv("stats/bandwidth-stats", header = TRUE,
       colClasses = c(date = "Date"))
-- 
1.6.5



More information about the tor-commits mailing list