commit c40db6ca3f95eabb843b1c65cebe7324430b7271 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Apr 21 12:09:00 2011 +0200
Add statistics on relays by country. --- db/tordir.sql | 37 ++++++++++ etc/web.xml | 4 + rserve/csv.R | 12 +++ rserve/graphs.R | 59 +++++++++++++++ src/org/torproject/ernie/web/CsvServlet.java | 1 + .../ernie/web/GraphParameterChecker.java | 2 + web/WEB-INF/network.jsp | 77 ++++++++++++++++++++ 7 files changed, 192 insertions(+), 0 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql index ab0482e..8efba09 100644 --- a/db/tordir.sql +++ b/db/tordir.sql @@ -146,6 +146,14 @@ CREATE TABLE network_size_hour ( CONSTRAINT network_size_hour_pkey PRIMARY KEY(validafter) );
+-- TABLE relay_countries +CREATE TABLE relay_countries ( + date DATE NOT NULL, + country CHARACTER(2) NOT NULL, + relays INTEGER NOT NULL, + CONSTRAINT relay_countries_pkey PRIMARY KEY(date, country) +); + -- TABLE relay_platforms CREATE TABLE relay_platforms ( date DATE NOT NULL, @@ -429,6 +437,34 @@ CREATE OR REPLACE FUNCTION refresh_network_size_hour() RETURNS INTEGER AS $$ END; $$ LANGUAGE plpgsql;
+-- FUNCTION refresh_relay_countries() +CREATE OR REPLACE FUNCTION refresh_relay_countries() RETURNS INTEGER AS $$ + BEGIN + + DELETE FROM relay_countries + WHERE date IN (SELECT date FROM updates); + + INSERT INTO relay_countries + (date, country, relays) + SELECT date, country, relays / count AS relays + FROM ( + SELECT DATE(validafter), + COALESCE(lower((geoip_lookup(address)).country), 'zz') + AS country, + COUNT(*) AS relays + FROM statusentry + WHERE isrunning = TRUE + AND DATE(validafter) >= (SELECT MIN(date) FROM updates) + AND DATE(validafter) <= (SELECT MAX(date) FROM updates) + AND DATE(validafter) IN (SELECT date FROM updates) + GROUP BY 1, 2 + ) b + NATURAL JOIN relay_statuses_per_day; + + RETURN 1; + END; +$$ LANGUAGE plpgsql; + -- FUNCTION refresh_relay_platforms() CREATE OR REPLACE FUNCTION refresh_relay_platforms() RETURNS INTEGER AS $$ BEGIN @@ -782,6 +818,7 @@ CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$ PERFORM refresh_relay_statuses_per_day(); PERFORM refresh_network_size(); PERFORM refresh_network_size_hour(); + PERFORM refresh_relay_countries(); PERFORM refresh_relay_platforms(); PERFORM refresh_relay_versions(); PERFORM refresh_total_bandwidth(); diff --git a/etc/web.xml b/etc/web.xml index 145472b..a9b36bc 100644 --- a/etc/web.xml +++ b/etc/web.xml @@ -173,6 +173,10 @@ </servlet-mapping> <servlet-mapping> <servlet-name>GraphImage</servlet-name> + <url-pattern>/relaycountries.png</url-pattern> + </servlet-mapping> + <servlet-mapping> + <servlet-name>GraphImage</servlet-name> <url-pattern>/relayflags.png</url-pattern> </servlet-mapping> <servlet-mapping> diff --git a/rserve/csv.R b/rserve/csv.R index e68971c..37a0856 100644 --- a/rserve/csv.R +++ b/rserve/csv.R @@ -15,6 +15,18 @@ export_networksize <- function(path) { write.csv(networksize, path, quote = FALSE, row.names = FALSE) }
+export_relaycountries <- function(path) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + q <- paste("SELECT date, country, relays FROM relay_countries", + "ORDER BY date, country") + rs <- dbSendQuery(con, q) + relays <- fetch(rs, n = -1) + dbDisconnect(con) + dbUnloadDriver(drv) + write.csv(relays, path, quote = FALSE, row.names = FALSE) +} + export_versions <- function(path) { drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) diff --git a/rserve/graphs.R b/rserve/graphs.R index a37f9bd..3e56cc8 100644 --- a/rserve/graphs.R +++ b/rserve/graphs.R @@ -43,6 +43,65 @@ plot_networksize <- function(start, end, path, dpi) { ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi)) }
+plot_relaycountries <- function(start, end, country, path, dpi) { + drv <- dbDriver("PostgreSQL") + con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) + if (country == "all") { + q <- paste("SELECT date, avg_running AS relays FROM network_size ", + "WHERE date >= '", start, "' AND date <= '", end, + "' AND date < (SELECT MAX(date) FROM network_size)", sep = "") + } else { + q <- paste("SELECT date, relays FROM relay_countries ", + "WHERE date >= '", start, "' AND date <= '", end, + "' AND date < (SELECT MAX(date) FROM relay_countries) ", + "AND country = '", country, "'", sep = "") + } + rs <- dbSendQuery(con, q) + u <- fetch(rs, n = -1) + if (length(u$date) == 0) + u <- data.frame(date = as.Date(start), relays = 0) + dbDisconnect(con) + dbUnloadDriver(drv) + dates <- seq(from = as.Date(start, "%Y-%m-%d"), + to = as.Date(end, "%Y-%m-%d"), by="1 day") + missing <- setdiff(dates, u$date) + if (length(missing) > 0) + u <- rbind(u, + data.frame(date = as.Date(missing, origin = "1970-01-01"), + relays = NA)) + peoples <- data.frame(country = c("ae", "au", "bh", "br", "ca", "cn", + "cu", "de", "dj", "dz", "eg", "et", "fr", "gb", "il", "ir", "it", + "iq", "jo", "jp", "kp", "kr", "kw", "lb", "ly", "ma", "mm", "om", + "pl", "ps", "qa", "ru", "sa", "sd", "se", "sy", "tn", "tm", "us", + "uz", "vn", "ye"), + people = c("U.A.E.", "Australian", "Bahraini", "Brazilian", + "Canadian", "Chinese", "Cuban", "German", "Djiboutian", "Algerian", + "Egyptian", "Ethiopian", "French", "U.K.", "Israeli", "Iranian", + "Italian", "Iraqi", "Jordanian", "Japanese", "North Korean", + "South Korean", "Kuwaiti", "Lebanese", "Libyan", "Moroccan", + "Burmese", "Omani", "Polish", "Palestinian", "Qatari", "Russian", + "Saudi", "Sudanese", "Swedish", "Syrian", "Tunisian", "Turkmen", + "U.S.", "Uzbek", "Vietnamese", "Yemeni"), + stringsAsFactors = FALSE) + title <- ifelse(country == "all", + "Number of relays in all countries\n", + paste("Number of", peoples[peoples$country == country, "people"], + "relays\n")) + formatter <- function(x, ...) { format(x, scientific = FALSE, ...) } + ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = relays)) + + geom_line(size = 1) + + scale_x_date(name = paste("\nThe Tor Project - ", + "https://metrics.torproject.org/", sep = ""), format = + c("%d-%b", "%d-%b", "%b-%Y", "%b-%Y", "%Y", "%Y")[ + cut(as.numeric(max(as.Date(u$date, "%Y-%m-%d")) - + min(as.Date(u$date, "%Y-%m-%d"))), + c(0, 10, 56, 365, 730, 5000, Inf), labels=FALSE)]) + + scale_y_continuous(name = "", limits = c(0, max(u$relays, + na.rm = TRUE)), formatter = formatter) + + opts(title = title) + ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi)) +} + plot_versions <- function(start, end, path, dpi) { drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java index c68a2d0..0997f31 100644 --- a/src/org/torproject/ernie/web/CsvServlet.java +++ b/src/org/torproject/ernie/web/CsvServlet.java @@ -41,6 +41,7 @@ public class CsvServlet extends HttpServlet { this.availableCsvFiles.add("networksize"); this.availableCsvFiles.add("platforms"); this.availableCsvFiles.add("relays-monthly-snapshots"); + this.availableCsvFiles.add("relaycountries"); this.availableCsvFiles.add("relayflags"); this.availableCsvFiles.add("relayflags-hour"); this.availableCsvFiles.add("torperf"); diff --git a/src/org/torproject/ernie/web/GraphParameterChecker.java b/src/org/torproject/ernie/web/GraphParameterChecker.java index caeee06..2822cc4 100644 --- a/src/org/torproject/ernie/web/GraphParameterChecker.java +++ b/src/org/torproject/ernie/web/GraphParameterChecker.java @@ -40,6 +40,8 @@ public class GraphParameterChecker {
this.availableGraphs = new HashMap<String, String>(); this.availableGraphs.put("networksize", "start,end,filename,dpi"); + this.availableGraphs.put("relaycountries", + "start,end,country,filename,dpi"); this.availableGraphs.put("relayflags", "start,end,flag,granularity," + "filename,dpi"); this.availableGraphs.put("versions", "start,end,filename,dpi"); diff --git a/web/WEB-INF/network.jsp b/web/WEB-INF/network.jsp index 2f457d9..84d5f1d 100644 --- a/web/WEB-INF/network.jsp +++ b/web/WEB-INF/network.jsp @@ -44,6 +44,83 @@ bridges in the network.</p> <p><a href="csv/networksize.csv">CSV</a> file containing all data.</p> <br>
+<h3>Relays by country</h3> +<br> +<p>The following graph shows the average daily number of relays by +country.</p> +<a name="relaycountries"></a> +<img src="relaycountries.png${relaycountries_url}" + width="576" height="360" alt="Relay countries graph"> +<form action="network.html#relaycountries"> + <div class="formrow"> + <input type="hidden" name="graph" value="relaycountries"> + <p> + <label>Start date (yyyy-mm-dd):</label> + <input type="text" name="start" size="10" + value="${relaycountries_start[0]}"> + <label>End date (yyyy-mm-dd):</label> + <input type="text" name="end" size="10" + value="${relaycountries_end[0]}"> + </p><p> + Source: <select name="country"> + <option value="all"<c:if test="${relaycountries_country[0] eq 'all'}"> selected</c:if>>All relays</option> + <option value="dz"<c:if test="${relaycountries_country[0] eq 'dz'}"> selected</c:if>>Algeria</option> + <option value="au"<c:if test="${relaycountries_country[0] eq 'au'}"> selected</c:if>>Australia</option> + <option value="bh"<c:if test="${relaycountries_country[0] eq 'bh'}"> selected</c:if>>Bahrain</option> + <option value="br"<c:if test="${relaycountries_country[0] eq 'br'}"> selected</c:if>>Brazil</option> + <option value="mm"<c:if test="${relaycountries_country[0] eq 'mm'}"> selected</c:if>>Burma</option> + <option value="ca"<c:if test="${relaycountries_country[0] eq 'ca'}"> selected</c:if>>Canada</option> + <option value="cn"<c:if test="${relaycountries_country[0] eq 'cn'}"> selected</c:if>>China</option> + <option value="cu"<c:if test="${relaycountries_country[0] eq 'cu'}"> selected</c:if>>Cuba</option> + <option value="dj"<c:if test="${relaycountries_country[0] eq 'dj'}"> selected</c:if>>Djibouti</option> + <option value="eg"<c:if test="${relaycountries_country[0] eq 'eg'}"> selected</c:if>>Egypt</option> + <option value="et"<c:if test="${relaycountries_country[0] eq 'et'}"> selected</c:if>>Ethiopia</option> + <option value="fr"<c:if test="${relaycountries_country[0] eq 'fr'}"> selected</c:if>>France</option> + <option value="de"<c:if test="${relaycountries_country[0] eq 'de'}"> selected</c:if>>Germany</option> + <option value="ir"<c:if test="${relaycountries_country[0] eq 'ir'}"> selected</c:if>>Iran</option> + <option value="iq"<c:if test="${relaycountries_country[0] eq 'iq'}"> selected</c:if>>Iraq</option> + <option value="il"<c:if test="${relaycountries_country[0] eq 'il'}"> selected</c:if>>Israel</option> + <option value="it"<c:if test="${relaycountries_country[0] eq 'it'}"> selected</c:if>>Italy</option> + <option value="jp"<c:if test="${relaycountries_country[0] eq 'jp'}"> selected</c:if>>Japan</option> + <option value="jo"<c:if test="${relaycountries_country[0] eq 'jo'}"> selected</c:if>>Jordan</option> + <option value="kw"<c:if test="${relaycountries_country[0] eq 'kw'}"> selected</c:if>>Kuwait</option> + <option value="lb"<c:if test="${relaycountries_country[0] eq 'lb'}"> selected</c:if>>Lebanon</option> + <option value="ly"<c:if test="${relaycountries_country[0] eq 'ly'}"> selected</c:if>>Libya</option> + <option value="ma"<c:if test="${relaycountries_country[0] eq 'ma'}"> selected</c:if>>Morocco</option> + <option value="kp"<c:if test="${relaycountries_country[0] eq 'kp'}"> selected</c:if>>North Korea</option> + <option value="om"<c:if test="${relaycountries_country[0] eq 'om'}"> selected</c:if>>Oman</option> + <option value="ps"<c:if test="${relaycountries_country[0] eq 'ps'}"> selected</c:if>>Palestinian territories</option> + <option value="pl"<c:if test="${relaycountries_country[0] eq 'pl'}"> selected</c:if>>Poland</option> + <option value="qa"<c:if test="${relaycountries_country[0] eq 'qa'}"> selected</c:if>>Qatar</option> + <option value="ru"<c:if test="${relaycountries_country[0] eq 'ru'}"> selected</c:if>>Russia</option> + <option value="sa"<c:if test="${relaycountries_country[0] eq 'sa'}"> selected</c:if>>Saudi Arabia</option> + <option value="kr"<c:if test="${relaycountries_country[0] eq 'kr'}"> selected</c:if>>South Korea</option> + <option value="sd"<c:if test="${relaycountries_country[0] eq 'sd'}"> selected</c:if>>Sudan</option> + <option value="se"<c:if test="${relaycountries_country[0] eq 'se'}"> selected</c:if>>Sweden</option> + <option value="sy"<c:if test="${relaycountries_country[0] eq 'sy'}"> selected</c:if>>Syria</option> + <option value="tn"<c:if test="${relaycountries_country[0] eq 'tn'}"> selected</c:if>>Tunisia</option> + <option value="tm"<c:if test="${relaycountries_country[0] eq 'tm'}"> selected</c:if>>Turkmenistan</option> + <option value="ae"<c:if test="${relaycountries_country[0] eq 'ae'}"> selected</c:if>>U.A.E.</option> + <option value="gb"<c:if test="${relaycountries_country[0] eq 'gb'}"> selected</c:if>>U.K.</option> + <option value="us"<c:if test="${relaycountries_country[0] eq 'us'}"> selected</c:if>>U.S.A.</option> + <option value="uz"<c:if test="${relaycountries_country[0] eq 'uz'}"> selected</c:if>>Uzbekistan</option> + <option value="vn"<c:if test="${relaycountries_country[0] eq 'vn'}"> selected</c:if>>Vietnam</option> + <option value="ye"<c:if test="${relaycountries_country[0] eq 'ye'}"> selected</c:if>>Yemen</option> + </select> + </p><p> + Resolution: <select name="dpi"> + <option value="72"<c:if test="${relaycountries_dpi[0] eq '72'}"> selected</c:if>>Screen - 576x360</option> + <option value="150"<c:if test="${relaycountries_dpi[0] eq '150'}"> selected</c:if>>Print low - 1200x750</option> + <option value="300"<c:if test="${relaycountries_dpi[0] eq '300'}"> selected</c:if>>Print high - 2400x1500</option> + </select> + </p><p> + <input class="submit" type="submit" value="Update graph"> + </p> + </div> +</form> +<p><a href="csv/relaycountries.csv">CSV</a> file containing all data.</p> +<br> + <h3>Relays with Exit, Fast, Guard, and Stable flags</h3> <br> <p>The directory authorities assign certain flags to relays that clients