[tor-commits] [metrics-tasks/master] Add graphing code for old user numbers (#10041).

karsten at torproject.org karsten at torproject.org
Mon Oct 28 14:13:15 UTC 2013


commit 1fa116045b42db527729c2e254b3d2508cff4822
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Oct 28 15:10:39 2013 +0100

    Add graphing code for old user numbers (#10041).
---
 task-10041/.gitignore   |    3 +
 task-10041/LICENSE      |   37 +++++
 task-10041/README       |   20 +++
 task-10041/plot-users.R |  368 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 428 insertions(+)

diff --git a/task-10041/.gitignore b/task-10041/.gitignore
new file mode 100644
index 0000000..1f86c56
--- /dev/null
+++ b/task-10041/.gitignore
@@ -0,0 +1,3 @@
+Rplots.pdf
+*.csv.gz
+
diff --git a/task-10041/LICENSE b/task-10041/LICENSE
new file mode 100644
index 0000000..d5362a0
--- /dev/null
+++ b/task-10041/LICENSE
@@ -0,0 +1,37 @@
+Copyright 2013 The Tor Project
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following disclaimer
+  in the documentation and/or other materials provided with the
+  distribution.
+
+* Neither the names of the copyright owners nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Included data is freely available under a CC0 no copyright declaration: To
+the extent possible under law, the Tor Project has waived all copyright
+and related or neighboring rights in the data.
+
+Graphs produced with this code are licensed under a Creative Commons
+Attribution 3.0 United States License.
+
diff --git a/task-10041/README b/task-10041/README
new file mode 100644
index 0000000..007aa3d
--- /dev/null
+++ b/task-10041/README
@@ -0,0 +1,20 @@
+Usage:
+
+  Rscript plot-users.R direct [start] [end] [country] [events] [file]
+
+or
+
+  Rscript plot-users.R bridge [start] [end] [country] [file]
+
+  start:   start date YYYY-MM-DD
+  end:     end date YYYY-MM-DD
+  country: two-letter country code, or "all"
+  events:  censorship events ("on" or "points"), or "off"
+  file:    output filename, typically .png
+
+Examples:
+
+  Rscript plot-users.R direct 2013-07-01 2013-09-30 us on ex1.png
+
+  Rscript plot-users.R bridge 2013-01-01 2013-03-31 all ex2.png
+
diff --git a/task-10041/plot-users.R b/task-10041/plot-users.R
new file mode 100644
index 0000000..f07985f
--- /dev/null
+++ b/task-10041/plot-users.R
@@ -0,0 +1,368 @@
+# Copyright 2013 The Tor Project
+# See LICENSE for licensing information
+
+args <- commandArgs(TRUE)
+if (length(args) < 5) {
+  print("Not enough arguments. See README for usage instructions and examples.")
+  print("  Rscript plot-users.R direct 2013-07-01 2013-09-30 us on 1.png")
+  print("  Rscript plot-users.R bridge 2013-01-01 2013-03-31 all 2.png")
+  quit()
+}
+
+require(ggplot2, quietly = TRUE)
+require(reshape, quietly = TRUE, warn.conflicts = FALSE)
+options(scipen = 15)
+
+countrylist <- list(
+  "ad" = "Andorra",
+  "ae" = "the United Arab Emirates",
+  "af" = "Afghanistan",
+  "ag" = "Antigua and Barbuda",
+  "ai" = "Anguilla",
+  "al" = "Albania",
+  "am" = "Armenia",
+  "an" = "the Netherlands Antilles",
+  "ao" = "Angola",
+  "aq" = "Antarctica",
+  "ar" = "Argentina",
+  "as" = "American Samoa",
+  "at" = "Austria",
+  "au" = "Australia",
+  "aw" = "Aruba",
+  "ax" = "the Aland Islands",
+  "az" = "Azerbaijan",
+  "ba" = "Bosnia and Herzegovina",
+  "bb" = "Barbados",
+  "bd" = "Bangladesh",
+  "be" = "Belgium",
+  "bf" = "Burkina Faso",
+  "bg" = "Bulgaria",
+  "bh" = "Bahrain",
+  "bi" = "Burundi",
+  "bj" = "Benin",
+  "bl" = "Saint Bartelemey",
+  "bm" = "Bermuda",
+  "bn" = "Brunei",
+  "bo" = "Bolivia",
+  "br" = "Brazil",
+  "bs" = "the Bahamas",
+  "bt" = "Bhutan",
+  "bv" = "the Bouvet Island",
+  "bw" = "Botswana",
+  "by" = "Belarus",
+  "bz" = "Belize",
+  "ca" = "Canada",
+  "cc" = "the Cocos (Keeling) Islands",
+  "cd" = "the Democratic Republic of the Congo",
+  "cf" = "Central African Republic",
+  "cg" = "Congo",
+  "ch" = "Switzerland",
+  "ci" = "Côte d'Ivoire",
+  "ck" = "the Cook Islands",
+  "cl" = "Chile",
+  "cm" = "Cameroon",
+  "cn" = "China",
+  "co" = "Colombia",
+  "cr" = "Costa Rica",
+  "cu" = "Cuba",
+  "cv" = "Cape Verde",
+  "cx" = "the Christmas Island",
+  "cy" = "Cyprus",
+  "cz" = "the Czech Republic",
+  "de" = "Germany",
+  "dj" = "Djibouti",
+  "dk" = "Denmark",
+  "dm" = "Dominica",
+  "do" = "the Dominican Republic",
+  "dz" = "Algeria",
+  "ec" = "Ecuador",
+  "ee" = "Estonia",
+  "eg" = "Egypt",
+  "eh" = "the Western Sahara",
+  "er" = "Eritrea",
+  "es" = "Spain",
+  "et" = "Ethiopia",
+  "fi" = "Finland",
+  "fj" = "Fiji",
+  "fk" = "the Falkland Islands (Malvinas)",
+  "fm" = "the Federated States of Micronesia",
+  "fo" = "the Faroe Islands",
+  "fr" = "France",
+  "fx" = "Metropolitan France",
+  "ga" = "Gabon",
+  "gb" = "the United Kingdom",
+  "gd" = "Grenada",
+  "ge" = "Georgia",
+  "gf" = "French Guiana",
+  "gg" = "Guernsey",
+  "gh" = "Ghana",
+  "gi" = "Gibraltar",
+  "gl" = "Greenland",
+  "gm" = "Gambia",
+  "gn" = "Guinea",
+  "gp" = "Guadeloupe",
+  "gq" = "Equatorial Guinea",
+  "gr" = "Greece",
+  "gs" = "South Georgia and the South Sandwich Islands",
+  "gt" = "Guatemala",
+  "gu" = "Guam",
+  "gw" = "Guinea-Bissau",
+  "gy" = "Guyana",
+  "hk" = "Hong Kong",
+  "hm" = "Heard Island and McDonald Islands",
+  "hn" = "Honduras",
+  "hr" = "Croatia",
+  "ht" = "Haiti",
+  "hu" = "Hungary",
+  "id" = "Indonesia",
+  "ie" = "Ireland",
+  "il" = "Israel",
+  "im" = "the Isle of Man",
+  "in" = "India",
+  "io" = "the British Indian Ocean Territory",
+  "iq" = "Iraq",
+  "ir" = "Iran",
+  "is" = "Iceland",
+  "it" = "Italy",
+  "je" = "Jersey",
+  "jm" = "Jamaica",
+  "jo" = "Jordan",
+  "jp" = "Japan",
+  "ke" = "Kenya",
+  "kg" = "Kyrgyzstan",
+  "kh" = "Cambodia",
+  "ki" = "Kiribati",
+  "km" = "Comoros",
+  "kn" = "Saint Kitts and Nevis",
+  "kp" = "North Korea",
+  "kr" = "the Republic of Korea",
+  "kw" = "Kuwait",
+  "ky" = "the Cayman Islands",
+  "kz" = "Kazakhstan",
+  "la" = "Laos",
+  "lb" = "Lebanon",
+  "lc" = "Saint Lucia",
+  "li" = "Liechtenstein",
+  "lk" = "Sri Lanka",
+  "lr" = "Liberia",
+  "ls" = "Lesotho",
+  "lt" = "Lithuania",
+  "lu" = "Luxembourg",
+  "lv" = "Latvia",
+  "ly" = "Libya",
+  "ma" = "Morocco",
+  "mc" = "Monaco",
+  "md" = "the Republic of Moldova",
+  "me" = "Montenegro",
+  "mf" = "Saint Martin",
+  "mg" = "Madagascar",
+  "mh" = "the Marshall Islands",
+  "mk" = "Macedonia",
+  "ml" = "Mali",
+  "mm" = "Burma",
+  "mn" = "Mongolia",
+  "mo" = "Macau",
+  "mp" = "the Northern Mariana Islands",
+  "mq" = "Martinique",
+  "mr" = "Mauritania",
+  "ms" = "Montserrat",
+  "mt" = "Malta",
+  "mu" = "Mauritius",
+  "mv" = "the Maldives",
+  "mw" = "Malawi",
+  "mx" = "Mexico",
+  "my" = "Malaysia",
+  "mz" = "Mozambique",
+  "na" = "Namibia",
+  "nc" = "New Caledonia",
+  "ne" = "Niger",
+  "nf" = "Norfolk Island",
+  "ng" = "Nigeria",
+  "ni" = "Nicaragua",
+  "nl" = "the Netherlands",
+  "no" = "Norway",
+  "np" = "Nepal",
+  "nr" = "Nauru",
+  "nu" = "Niue",
+  "nz" = "New Zealand",
+  "om" = "Oman",
+  "pa" = "Panama",
+  "pe" = "Peru",
+  "pf" = "French Polynesia",
+  "pg" = "Papua New Guinea",
+  "ph" = "the Philippines",
+  "pk" = "Pakistan",
+  "pl" = "Poland",
+  "pm" = "Saint Pierre and Miquelon",
+  "pn" = "the Pitcairn Islands",
+  "pr" = "Puerto Rico",
+  "ps" = "the Palestinian Territory",
+  "pt" = "Portugal",
+  "pw" = "Palau",
+  "py" = "Paraguay",
+  "qa" = "Qatar",
+  "re" = "Reunion",
+  "ro" = "Romania",
+  "rs" = "Serbia",
+  "ru" = "Russia",
+  "rw" = "Rwanda",
+  "sa" = "Saudi Arabia",
+  "sb" = "the Solomon Islands",
+  "sc" = "the Seychelles",
+  "sd" = "Sudan",
+  "se" = "Sweden",
+  "sg" = "Singapore",
+  "sh" = "Saint Helena",
+  "si" = "Slovenia",
+  "sj" = "Svalbard and Jan Mayen",
+  "sk" = "Slovakia",
+  "sl" = "Sierra Leone",
+  "sm" = "San Marino",
+  "sn" = "Senegal",
+  "so" = "Somalia",
+  "sr" = "Suriname",
+  "st" = "São Tomé and Príncipe",
+  "sv" = "El Salvador",
+  "sy" = "the Syrian Arab Republic",
+  "sz" = "Swaziland",
+  "tc" = "Turks and Caicos Islands",
+  "td" = "Chad",
+  "tf" = "the French Southern Territories",
+  "tg" = "Togo",
+  "th" = "Thailand",
+  "tj" = "Tajikistan",
+  "tk" = "Tokelau",
+  "tl" = "East Timor",
+  "tm" = "Turkmenistan",
+  "tn" = "Tunisia",
+  "to" = "Tonga",
+  "tr" = "Turkey",
+  "tt" = "Trinidad and Tobago",
+  "tv" = "Tuvalu",
+  "tw" = "Taiwan",
+  "tz" = "the United Republic of Tanzania",
+  "ua" = "Ukraine",
+  "ug" = "Uganda",
+  "um" = "the United States Minor Outlying Islands",
+  "us" = "the United States",
+  "uy" = "Uruguay",
+  "uz" = "Uzbekistan",
+  "va" = "Vatican City",
+  "vc" = "Saint Vincent and the Grenadines",
+  "ve" = "Venezuela",
+  "vg" = "the British Virgin Islands",
+  "vi" = "the United States Virgin Islands",
+  "vn" = "Vietnam",
+  "vu" = "Vanuatu",
+  "wf" = "Wallis and Futuna",
+  "ws" = "Samoa",
+  "ye" = "Yemen",
+  "yt" = "Mayotte",
+  "za" = "South Africa",
+  "zm" = "Zambia",
+  "zw" = "Zimbabwe")
+
+countryname <- function(country) {
+  res <- countrylist[[country]]
+  if (is.null(res))
+    res <- "no-man's-land"
+  res
+}
+
+plot_direct_users <- function(start, end, country, events, path) {
+  u <- read.csv("direct-users.csv.gz", stringsAsFactors = FALSE)
+  u <- u[u$date >= start & u$date <= end, ]
+  u <- melt(u, id.vars = "date")
+  u <- u[u$variable %in% c(country, "all"), ]
+  a <- u[u$variable == "all", ]
+  if (country != "all")
+    u <- u[u$variable == country, ]
+  u <- data.frame(date = u$date, users = u$value)
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by="1 day")
+  missing <- setdiff(dates, as.Date(a$date))
+  if (length(missing) > 0)
+    u <- rbind(u,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        users = NA))
+  missing <- setdiff(dates, as.Date(u$date))
+  if (length(missing) > 0)
+    u <- rbind(u,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        users = 0))
+  title <- ifelse(country == "all",
+    "Directly connecting users from all countries\n",
+    paste("Directly connecting users from ", countryname(country), "\n",
+      sep = ""))
+  max_y <- ifelse(length(na.omit(u$users)) == 0, 0,
+      max(u$users, na.rm = TRUE))
+  plot <- ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = users))
+  if (length(na.omit(u$users)) > 0 & events != "off" & country != "all") {
+    r <- read.csv("direct-users-ranges.csv.gz", stringsAsFactors = FALSE)
+    r <- r[r$date >= start & r$date <= end & r$country == country,
+        c("date", "minusers", "maxusers")]
+    r[r$minusers < 0, "minusers"] <- 0
+    r <- cast(rbind(melt(u, id.vars = "date"), melt(r, id.vars = "date")))
+    upturns <- r[r$users > r$maxusers, 1:2]
+    downturns <- r[r$users < r$minusers, 1:2]
+    if (events == "on") {
+      if (length(r$maxusers) > 0)
+        max_y <- max(max_y, max(r$maxusers, na.rm = TRUE))
+      plot <- plot +
+        geom_ribbon(data = r, aes(ymin = minusers,
+            ymax = maxusers), fill = "gray")
+    }
+    if (length(upturns$date) > 0)
+      plot <- plot +
+          geom_point(data = upturns, aes(x = as.Date(date), y = users), size = 5,
+          colour = "dodgerblue2")
+    if (length(downturns$date) > 0)
+      plot <- plot +
+          geom_point(data = downturns, aes(x = as.Date(date), y = users), size = 5,
+          colour = "firebrick2")
+  }
+  plot <- plot +
+    geom_line(size = 1) +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = "")) +
+    scale_y_continuous(name = "", limits = c(0, max_y)) +
+    ggtitle(title)
+  print(plot)
+  ggsave(filename = path, width = 8, height = 5, dpi = 72)
+}
+
+plot_bridge_users <- function(start, end, country, path) {
+  b <- read.csv("bridge-users.csv.gz", stringsAsFactors = FALSE)
+  b <- b[b$date >= start & b$date <= end, ]
+  b <- melt(b, id.vars = "date")
+  b <- b[b$variable %in% c(country, "all"), ]
+  bridgeusers <- data.frame(date = as.Date(b$date), users = b$value)
+  dates <- seq(from = as.Date(start, "%Y-%m-%d"),
+      to = as.Date(end, "%Y-%m-%d"), by="1 day")
+  missing <- setdiff(dates, bridgeusers$date)
+  if (length(missing) > 0)
+    bridgeusers <- rbind(bridgeusers,
+        data.frame(date = as.Date(missing, origin = "1970-01-01"),
+        users = NA))
+  title <- ifelse(country == "all",
+    "Bridge users from all countries\n",
+    paste("Bridge users from ", countryname(country), "\n", sep = ""))
+  ggplot(bridgeusers, aes(x = as.Date(date, "%Y-%m-%d"), y = users)) +
+    geom_line(size = 1) +
+    scale_x_date(name = paste("\nThe Tor Project - ",
+        "https://metrics.torproject.org/", sep = "")) +
+    scale_y_continuous(name = "", limits = c(0,
+        ifelse(length(na.omit(bridgeusers$users)) == 0, 0,
+        max(bridgeusers$users, na.rm = TRUE)))) +
+    ggtitle(title)
+  ggsave(filename = path, width = 8, height = 5, dpi = 72)
+}
+
+if (args[1] == "direct") {
+  plot_direct_users(start = args[2], end = args[3], country = args[4],
+                    events = args[5], path = args[6])
+} else if (args[1] == "bridge") {
+  plot_bridge_users(start = args[2], end = args[3], country = args[4],
+                    path = args[5])
+}
+



More information about the tor-commits mailing list