commit 9fb8d3ff54dcd461a579a39c3487ee5a4a542bf4 Author: Karsten Loesing karsten.loesing@gmx.net Date: Thu Dec 5 08:50:19 2013 +0100
Remove more of the old user stats code. --- db/tordir.sql | 9 - etc/web.xml | 6 - rserve/csv.R | 90 --- rserve/graphs.R | 118 ---- rserve/tables.R | 87 --- src/org/torproject/ernie/cron/Main.java | 23 - .../ernie/cron/users/BridgeStatsFileHandler.java | 718 -------------------- .../ernie/web/graphs/RObjectGenerator.java | 12 - 8 files changed, 1063 deletions(-)
diff --git a/db/tordir.sql b/db/tordir.sql index e62732d..6b31aee 100644 --- a/db/tordir.sql +++ b/db/tordir.sql @@ -904,15 +904,6 @@ CREATE TABLE dirreq_stats ( PRIMARY KEY (source, statsend, seconds, country) );
--- TABLE bridge_stats --- Contains daily bridge users by country. -CREATE TABLE bridge_stats ( - "date" DATE NOT NULL, - country CHARACTER(2) NOT NULL, - users INTEGER NOT NULL, - CONSTRAINT bridge_stats_pkey PRIMARY KEY ("date", country) -); - -- TABLE torperf_stats -- Quantiles and medians of daily torperf results. CREATE TABLE torperf_stats ( diff --git a/etc/web.xml b/etc/web.xml index 9b4f23a..3f49001 100644 --- a/etc/web.xml +++ b/etc/web.xml @@ -187,12 +187,6 @@ <url-pattern>/dirbytes.png</url-pattern> <url-pattern>/dirbytes.pdf</url-pattern> <url-pattern>/dirbytes.svg</url-pattern> - <url-pattern>/direct-users.png</url-pattern> - <url-pattern>/direct-users.pdf</url-pattern> - <url-pattern>/direct-users.svg</url-pattern> - <url-pattern>/bridge-users.png</url-pattern> - <url-pattern>/bridge-users.pdf</url-pattern> - <url-pattern>/bridge-users.svg</url-pattern> <url-pattern>/torperf.png</url-pattern> <url-pattern>/torperf.pdf</url-pattern> <url-pattern>/torperf.svg</url-pattern> diff --git a/rserve/csv.R b/rserve/csv.R index e93b90f..8150b17 100644 --- a/rserve/csv.R +++ b/rserve/csv.R @@ -138,39 +138,6 @@ export_relayflags <- function(path) { write.csv(relayflags, path, quote = FALSE, row.names = FALSE) }
-export_direct_users <- function(path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, r, bwp, brn, bwn, brp, bwr, brr", - "FROM user_stats WHERE date < current_date - 3", - "ORDER BY date, country") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - directusers <- data.frame(date = u$date, country = u$country, - directusers = floor(u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10)) - directusers <- cast(directusers, date ~ country, value = "directusers") - names(directusers)[names(directusers) == "zy"] <- "all" - write.csv(directusers, path, quote = FALSE, row.names = FALSE) -} - -export_bridge_users <- function(path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, users AS bridgeusers", - "FROM bridge_stats WHERE date < current_date - 3", - "ORDER BY date, country") - rs <- dbSendQuery(con, q) - bridgeusers <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - bridgeusers <- cast(bridgeusers, date ~ country, value = "bridgeusers") - names(bridgeusers)[names(bridgeusers) == "zy"] <- "all" - write.csv(bridgeusers, path, quote = FALSE, row.names = FALSE) -} - export_torperf <- function(path) { drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) @@ -196,43 +163,6 @@ export_torperf_failures <- function(path) { write.csv(torperf, path, quote = FALSE, row.names = FALSE) }
-help_export_monthly_users <- function(path, aggr_fun) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, r, bwp, brn, bwn, brp, bwr, brr", - "FROM user_stats WHERE date < current_date - 3", - "ORDER BY date, country") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - direct <- data.frame(date = u$date, country = u$country, - users = u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10) - q <- paste("SELECT date, country, FLOOR(users) AS users", - "FROM bridge_stats WHERE date < current_date - 3", - "ORDER BY date, country") - rs <- dbSendQuery(con, q) - bridge <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - users <- rbind(bridge, direct) - users <- aggregate(users$users, - by = list(date = users$date, country = users$country), sum) - users <- aggregate(users$x, by = list(month = substr(users$date, 1, 7), - country = users$country), aggr_fun) - users <- cast(users, country ~ month, value = "x") - users[users$country == "zy", 1] <- "all" - users[, 2:length(users)] <- floor(users[, 2:length(users)]) - write.csv(users, path, quote = FALSE, row.names = FALSE) -} - -export_monthly_users_peak <- function(path) { - help_export_monthly_users(path, max) -} - -export_monthly_users_average <- function(path) { - help_export_monthly_users(path, mean) -} - export_connbidirect <- function(path) { drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) @@ -248,26 +178,6 @@ export_connbidirect <- function(path) { quote = FALSE, row.names = FALSE) }
-export_dirreq_stats <- function(path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, r, bwp, brp, bwn, brn, bwr, brr ", - "FROM user_stats WHERE date < current_date - 3", - "AND country = 'zy' ORDER BY date", sep = "") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - u <- data.frame(date = u$date, - requests = u$r, - fraction = (u$bwr * u$brn / u$bwn - u$brr) / - (u$bwp * u$brn / u$bwn - u$brp), - users = u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10) - write.csv(format(u, trim = TRUE, scientific = FALSE), path, - quote = FALSE, row.names = FALSE) -} - export_bandwidth_flags <- function(path) { drv <- dbDriver("PostgreSQL") con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) diff --git a/rserve/graphs.R b/rserve/graphs.R index 86eda59..8157d89 100644 --- a/rserve/graphs.R +++ b/rserve/graphs.R @@ -636,124 +636,6 @@ plot_relayflags <- function(start, end, flags, path) { ggsave(filename = path, width = 8, height = 5, dpi = 72) }
-plot_direct_users <- function(start, end, country, events, path) { - end <- min(end, as.character(Sys.Date() - 4)) - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, r, bwp, brn, bwn, brp, bwr, brr, country ", - "FROM user_stats WHERE date >= '", start, "' AND date <= '", end, - "' AND (country = 'zy'", ifelse(country == "all", "", - paste(" OR country = '", country, "'", sep = "")), ")", sep = "") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - a <- u[u$country == "zy", ] - if (country != "all") - u <- u[u$country == country, ] - u <- data.frame(date = u$date, - users = u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10) - dates <- seq(from = as.Date(start, "%Y-%m-%d"), - to = as.Date(end, "%Y-%m-%d"), by="1 day") - missing <- setdiff(dates, a$date) - if (length(missing) > 0) - u <- rbind(u, - data.frame(date = as.Date(missing, origin = "1970-01-01"), - users = NA)) - missing <- setdiff(dates, u$date) - if (length(missing) > 0) - u <- rbind(u, - data.frame(date = as.Date(missing, origin = "1970-01-01"), - users = 0)) - title <- ifelse(country == "all", - "Directly connecting users from all countries\n", - paste("Directly connecting users from ", countryname(country), "\n", - sep = "")) - formatter <- function(x, ...) { format(x, scientific = FALSE, ...) } - date_breaks <- date_breaks( - as.numeric(max(as.Date(u$date, "%Y-%m-%d")) - - min(as.Date(u$date, "%Y-%m-%d")))) - max_y <- ifelse(length(na.omit(u$users)) == 0, 0, - max(u$users, na.rm = TRUE)) - plot <- ggplot(u, aes(x = as.Date(date, "%Y-%m-%d"), y = users)) - if (length(na.omit(u$users)) > 0 & events != "off" & country != "all") { - r <- read.csv( - "/srv/metrics.torproject.org/web/detector/direct-users-ranges.csv", - stringsAsFactors = FALSE) - r <- r[r$date >= start & r$date <= end & r$country == country, - c("date", "minusers", "maxusers")] - r <- cast(rbind(melt(u, id.vars = "date"), melt(r, id.vars = "date"))) - upturns <- r[r$users > r$maxusers, 1:2] - downturns <- r[r$users < r$minusers, 1:2] - if (events == "on") { - if (length(r$maxusers) > 0) - max_y <- max(max_y, max(r$maxusers, na.rm = TRUE)) - r[r$minusers < 0, "minusers"] <- 0 - plot <- plot + - geom_ribbon(data = r, aes(ymin = minusers, - ymax = maxusers), fill = "gray") - } - if (length(upturns$date) > 0) - plot <- plot + - geom_point(data = upturns, aes(x = date, y = users), size = 5, - colour = "dodgerblue2") - if (length(downturns$date) > 0) - plot <- plot + - geom_point(data = downturns, aes(x = date, y = users), size = 5, - colour = "firebrick2") - } - plot <- plot + - geom_line(size = 1) + - scale_x_date(name = paste("\nThe Tor Project - ", - "https://metrics.torproject.org/", sep = ""), - format = date_breaks$format, major = date_breaks$major, - minor = date_breaks$minor) + - scale_y_continuous(name = "", limits = c(0, max_y), - formatter = formatter) + opts(title = title) - print(plot) - ggsave(filename = path, width = 8, height = 5, dpi = 72) -} - -plot_bridge_users <- function(start, end, country, path) { - end <- min(end, as.character(Sys.Date() - 4)) - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, users FROM bridge_stats ", - "WHERE date >= '", start, "' AND date <= '", end, "' ", - "AND country = '", ifelse(country == "all", "zy", country), "'", - sep = "") - rs <- dbSendQuery(con, q) - bridgeusers <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - dates <- seq(from = as.Date(start, "%Y-%m-%d"), - to = as.Date(end, "%Y-%m-%d"), by="1 day") - missing <- setdiff(dates, bridgeusers$date) - if (length(missing) > 0) - bridgeusers <- rbind(bridgeusers, - data.frame(date = as.Date(missing, origin = "1970-01-01"), - users = NA)) - title <- ifelse(country == "all", - "Bridge users from all countries\n", - paste("Bridge users from ", countryname(country), "\n", sep = "")) - formatter <- function(x, ...) { format(x, scientific = FALSE, ...) } - date_breaks <- date_breaks( - as.numeric(max(as.Date(bridgeusers$date, "%Y-%m-%d")) - - min(as.Date(bridgeusers$date, "%Y-%m-%d")))) - ggplot(bridgeusers, aes(x = as.Date(date, "%Y-%m-%d"), y = users)) + - geom_line(size = 1) + - scale_x_date(name = paste("\nThe Tor Project - ", - "https://metrics.torproject.org/", sep = ""), - format = date_breaks$format, major = date_breaks$major, - minor = date_breaks$minor) + - scale_y_continuous(name = "", limits = c(0, - ifelse(length(na.omit(bridgeusers$users)) == 0, 0, - max(bridgeusers$users, na.rm = TRUE))), formatter = formatter) + - opts(title = title) - ggsave(filename = path, width = 8, height = 5, dpi = 72) -} - plot_torperf <- function(start, end, source, filesize, path) { end <- min(end, as.character(Sys.Date() - 2)) drv <- dbDriver("PostgreSQL") diff --git a/rserve/tables.R b/rserve/tables.R index 59593d6..24de947 100644 --- a/rserve/tables.R +++ b/rserve/tables.R @@ -2,93 +2,6 @@ countrynames <- function(countries) { sapply(countries, countryname) }
-write_direct_users <- function(start, end, path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, r, bwp, brn, bwn, brp, bwr, brr ", - "FROM user_stats WHERE date >= '", start, "' AND date <= '", end, - "' AND date < current_date - 3 ORDER BY date, country", sep = "") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - d <- data.frame(date = u$date, country = u$country, - directusers = floor(u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10)) - d <- aggregate(d$directusers, by = list(country = d$country), mean) - total <- d[d$country == "zy", "x"] - d <- d[!(d$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] - d <- data.frame(country = d$country, directusers = d$x) - d <- d[order(d$directusers, decreasing = TRUE), ] - d <- d[1:10, ] - d <- data.frame( - cc = as.character(d$country), - country = sub('the ', '', countrynames(as.character(d$country))), - abs = round(d$directusers), - rel = round(100 * d$directusers / total, 2)) - write.csv(d, path, quote = FALSE, row.names = FALSE) -} - -write_censorship_events <- function(start, end, path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, r, bwp, brn, bwn, brp, bwr, brr ", - "FROM user_stats WHERE date >= '", start, "' AND date <= '", end, - "' AND date < current_date - 3", sep = "") - rs <- dbSendQuery(con, q) - u <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - u <- data.frame(date = u$date, country = u$country, - users = u$r * (u$bwp * u$brn / u$bwn - u$brp) / - (u$bwr * u$brn / u$bwn - u$brr) / 10) - r <- read.csv( - "/srv/metrics.torproject.org/web/detector/direct-users-ranges.csv", - stringsAsFactors = FALSE) - r <- r[r$date >= start & r$date <= end, - c("date", "country", "minusers", "maxusers")] - r <- cast(rbind(melt(u, id.vars = c("date", "country")), - melt(r, id.vars = c("date", "country")))) - r <- na.omit(r[r$users < r$minusers | r$users > r$maxusers, ]) - r <- data.frame(date = r$date, country = r$country, - upturn = ifelse(r$users > r$maxusers, 1, 0), - downturn = ifelse(r$users < r$minusers, 1, 0)) - r <- aggregate(r[, c("upturn", "downturn")], - by = list(country = r$country), sum) - r <- r[!(r$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] - r <- r[order(r$downturn, r$upturn, decreasing = TRUE), ] - r <- r[1:10, ] - r <- data.frame(cc = r$country, - country = sub('the ', '', countrynames(as.character(r$country))), - downturns = r$downturn, - upturns = r$upturn) - write.csv(r, path, quote = FALSE, row.names = FALSE) -} - -write_bridge_users <- function(start, end, path) { - drv <- dbDriver("PostgreSQL") - con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db) - q <- paste("SELECT date, country, users AS bridgeusers ", - "FROM bridge_stats WHERE date >= '", start, "' AND date <= '", end, - "' AND date < current_date - 3 ORDER BY date, country", sep = "") - rs <- dbSendQuery(con, q) - d <- fetch(rs, n = -1) - dbDisconnect(con) - dbUnloadDriver(drv) - d <- aggregate(d$bridgeusers, by = list(country = d$country), mean) - total <- d[d$country == "zy", "x"] - d <- d[!(d$country %in% c("zy", "??", "a1", "a2", "o1", "ap", "eu")), ] - d <- data.frame(country = d$country, bridgeusers = d$x) - d <- d[order(d$bridgeusers, decreasing = TRUE), ] - d <- d[1:10, ] - d <- data.frame( - cc = as.character(d$country), - country = sub('the ', '', countrynames(as.character(d$country))), - abs = round(d$bridgeusers), - rel = round(100 * d$bridgeusers / total, 2)) - write.csv(d, path, quote = FALSE, row.names = FALSE) -} - write_userstats <- function(start, end, node, path) { end <- min(end, as.character(Sys.Date())) u <- read.csv(paste("/srv/metrics.torproject.org/task-8462-graphs/", diff --git a/src/org/torproject/ernie/cron/Main.java b/src/org/torproject/ernie/cron/Main.java index c0eb435..5d561a6 100644 --- a/src/org/torproject/ernie/cron/Main.java +++ b/src/org/torproject/ernie/cron/Main.java @@ -8,7 +8,6 @@ import java.util.logging.Logger; import org.torproject.ernie.cron.network.ConsensusStatsFileHandler; import org.torproject.ernie.cron.performance.PerformanceStatsImporter; import org.torproject.ernie.cron.performance.TorperfProcessor; -import org.torproject.ernie.cron.users.BridgeStatsFileHandler;
/** * Coordinate downloading and parsing of descriptors and extraction of @@ -37,16 +36,6 @@ public class Main { // Define stats directory for temporary files File statsDirectory = new File("stats");
- // Prepare bridge stats file handler - BridgeStatsFileHandler bsfh = config.getWriteBridgeStats() ? - new BridgeStatsFileHandler( - config.getRelayDescriptorDatabaseJDBC(), - new File(config.getSanitizedBridgesDirectory()), - statsDirectory, config.getKeepSanitizedBridgesImportHistory(), - new File(config.getDirectoryArchivesDirectory()), - config.getKeepDirectoryArchiveImportHistory()) : - null; - // Import relay descriptors if (config.getImportDirectoryArchives()) { RelayDescriptorDatabaseImporter rddi = @@ -63,9 +52,6 @@ public class Main { if (rddi != null) { rddi.importRelayDescriptors(); } - if (bsfh != null) { - bsfh.importRelayDescriptors(); - } rddi.closeConnection();
// Import conn-bi-direct statistics. @@ -81,15 +67,6 @@ public class Main { psi.closeConnection(); }
- // Import sanitized bridges and write updated stats files to disk - if (bsfh != null) { - if (config.getImportSanitizedBridges()) { - bsfh.importSanitizedBridges(); - } - bsfh.writeFiles(); - bsfh = null; - } - // Prepare consensus stats file handler (used for stats on running // bridges only) ConsensusStatsFileHandler csfh = config.getWriteBridgeStats() ? diff --git a/src/org/torproject/ernie/cron/users/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/users/BridgeStatsFileHandler.java deleted file mode 100644 index 14795e1..0000000 --- a/src/org/torproject/ernie/cron/users/BridgeStatsFileHandler.java +++ /dev/null @@ -1,718 +0,0 @@ -/* Copyright 2011, 2012 The Tor Project - * See LICENSE for licensing information */ -package org.torproject.ernie.cron.users; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.codec.digest.DigestUtils; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorFile; -import org.torproject.descriptor.DescriptorReader; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.ExtraInfoDescriptor; -import org.torproject.descriptor.NetworkStatusEntry; -import org.torproject.descriptor.RelayNetworkStatusConsensus; -import org.torproject.descriptor.ServerDescriptor; - -/** - * Determines estimates of bridge users per country and day from the - * extra-info descriptors that bridges publish. In a first step, the - * number of unique IP addresses that bridges see are normalized to a - * 24-hour period. In the next step, all bridges are excluded that have - * been running as a relay. Finally, observations are summed up and - * written to <code>stats/bridge-stats</code>. - */ -public class BridgeStatsFileHandler { - - /** - * Two-letter country codes of known countries. - */ - private SortedSet<String> countries; - - /** - * Intermediate results file containing bridge user numbers by country - * as seen by single bridges, normalized to 24-hour periods. - */ - private File bridgeStatsRawFile; - - /** - * Temp file for writing intermediate results. - */ - private File bridgeStatsRawTempFile; - - /** - * Bridge user numbers by country as seen by single bridges on a given - * day. Map keys are bridge and date written as "bridge,date", map - * values are lines as read from <code>stats/bridge-stats-raw</code>. - */ - private SortedMap<String, Map<String, String>> bridgeUsersRaw; - - /** - * Helper file containing the hashed relay identities of all known - * relays. These hashes are compared to the bridge identity hashes to - * exclude bridges that have been known as relays from the statistics. - */ - private File hashedRelayIdentitiesFile; - - /** - * Known hashed relay identities used to exclude bridges that have been - * running as relays. - */ - private SortedSet<String> hashedRelays; - - /** - * Helper file containing extra-info descriptors published by 0.2.2.x - * bridges. If these descriptors contain geoip-stats, they are not - * included in the results, because stats are very likely broken. - */ - private File zeroTwoTwoDescriptorsFile; - - /** - * Extra-info descriptors published by 0.2.2.x bridges. If these - * descriptors contain geoip-stats, they are not included in the - * results, because stats are very likely broken. - */ - private SortedSet<String> zeroTwoTwoDescriptors; - - /** - * Final results file containing the number of bridge users per country - * and day. This file is not read in during initialization, but - * overwritten at the end of the execution. - */ - private File bridgeStatsFile; - - /** - * Logger for this class. - */ - private Logger logger; - - /* Database connection string. */ - private String connectionURL = null; - - private SimpleDateFormat dateTimeFormat; - - private File bridgesDir; - - private File statsDirectory; - - private boolean keepBridgeDescriptorImportHistory; - - private File archivesDirectory; - - private boolean keepRelayDescriptorImportHistory; - - /** - * Initializes this class, including reading in intermediate results - * files <code>stats/bridge-stats-raw</code> and - * <code>stats/hashed-relay-identities</code>. - */ - public BridgeStatsFileHandler(String connectionURL, - File bridgesDir, File statsDirectory, - boolean keepBridgeDescriptorImportHistory, File archivesDirectory, - boolean keepRelayDescriptorImportHistory) { - - if (bridgesDir == null || statsDirectory == null || - archivesDirectory == null || statsDirectory == null) { - throw new IllegalArgumentException(); - } - this.bridgesDir = bridgesDir; - this.statsDirectory = statsDirectory; - this.keepBridgeDescriptorImportHistory = - keepBridgeDescriptorImportHistory; - this.archivesDirectory = archivesDirectory; - this.keepRelayDescriptorImportHistory = - keepRelayDescriptorImportHistory; - - /* Initialize set of known countries. */ - this.countries = new TreeSet<String>(); - this.countries.add("zy"); - - /* Initialize local data structures to hold results. */ - this.bridgeUsersRaw = new TreeMap<String, Map<String, String>>(); - this.hashedRelays = new TreeSet<String>(); - this.zeroTwoTwoDescriptors = new TreeSet<String>(); - - /* Initialize file names for intermediate and final results. */ - this.bridgeStatsRawFile = new File("stats/bridge-stats-raw"); - this.bridgeStatsRawTempFile = new File("stats/bridge-stats-raw.tmp"); - this.bridgeStatsFile = new File("stats/bridge-stats"); - this.hashedRelayIdentitiesFile = new File( - "stats/hashed-relay-identities"); - this.zeroTwoTwoDescriptorsFile = new File( - "stats/v022-bridge-descriptors"); - - /* Initialize database connection string. */ - this.connectionURL = connectionURL; - - this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); - - /* Initialize logger. */ - this.logger = Logger.getLogger( - BridgeStatsFileHandler.class.getName()); - - /* Read in bridge user numbers by country as seen by single bridges, - * normalized to 24-hour periods. */ - if (this.bridgeStatsRawFile.exists()) { - try { - this.logger.fine("Reading file " - + this.bridgeStatsRawFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.bridgeStatsRawFile)); - String line = br.readLine(); - if (line != null) { - /* The first line should contain headers that we need to parse - * in order to learn what countries we were interested in when - * writing this file. */ - if (!line.startsWith("bridge,date,time,")) { - this.logger.warning("Incorrect first line '" + line + "' in " - + this.bridgeStatsRawFile.getAbsolutePath() + "! This line " - + "should contain headers! Aborting to read in this " - + "file!"); - } else { - String[] headers = line.split(","); - for (int i = 3; i < headers.length; i++) { - if (!headers[i].equals("all")) { - this.countries.add(headers[i]); - } - } - /* Read in the rest of the file. */ - while ((line = br.readLine()) != null) { - String[] parts = line.split(","); - if (parts.length != headers.length) { - this.logger.warning("Corrupt line '" + line + "' in file " - + this.bridgeStatsRawFile.getAbsolutePath() - + "! Aborting to read this file!"); - break; - } - String hashedBridgeIdentity = parts[0]; - String date = parts[1]; - String time = parts[2]; - SortedMap<String, String> obs = - new TreeMap<String, String>(); - for (int i = 3; i < parts.length; i++) { - if (parts[i].equals("NA")) { - continue; - } - if (headers[i].equals("all")) { - obs.put("zy", parts[i]); - } else { - obs.put(headers[i], parts[i]); - } - } - long dateTimeMillis = dateTimeFormat.parse(date + " " - + time).getTime(); - this.addObs(hashedBridgeIdentity, dateTimeMillis, obs); - } - } - } - br.close(); - this.logger.fine("Finished reading file " - + this.bridgeStatsRawFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.bridgeStatsRawFile.getAbsolutePath() + "!", e); - } catch (ParseException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.bridgeStatsRawFile.getAbsolutePath() + "!", e); - } - } - - /* Read in known hashed relay identities used to exclude bridges that - * have been running as relays. */ - if (this.hashedRelayIdentitiesFile.exists()) { - try { - this.logger.fine("Reading file " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.hashedRelayIdentitiesFile)); - String line = null; - /* Read in all lines from the file and memorize them. */ - while ((line = br.readLine()) != null) { - this.hashedRelays.add(line); - } - br.close(); - this.logger.fine("Finished reading file " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e); - } - } - - /* Read in known extra-info descriptors published by 0.2.2.x - * bridges. */ - if (this.zeroTwoTwoDescriptorsFile.exists()) { - try { - this.logger.fine("Reading file " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "..."); - BufferedReader br = new BufferedReader(new FileReader( - this.zeroTwoTwoDescriptorsFile)); - String line = null; - /* Read in all lines from the file and memorize them. */ - while ((line = br.readLine()) != null) { - this.zeroTwoTwoDescriptors.add(line); - } - br.close(); - this.logger.fine("Finished reading file " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to read file " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e); - } - } - } - - /** - * Adds a hashed relay identity string to the list of bridges that we - * are going to ignore in the future. If we counted user numbers from - * bridges that have been running as relays, our numbers would be far - * higher than what we think is correct. - */ - public void addHashedRelay(String hashedRelayIdentity) { - if (!this.hashedRelays.contains(hashedRelayIdentity)) { - this.logger.finer("Adding new hashed relay identity: " - + hashedRelayIdentity); - this.hashedRelays.add(hashedRelayIdentity); - } - } - - /** - * Adds an extra-info descriptor identifier published by an 0.2.2.x - * bridges. If this extra-info descriptor contains geoip-stats, they are - * not included in the results, because stats are very likely broken. - */ - public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity, - long publishedMillis) { - String value = hashedBridgeIdentity.toUpperCase() + "," - + this.dateTimeFormat.format(publishedMillis). - replaceAll(" ", ","); - if (!this.zeroTwoTwoDescriptors.contains(value)) { - this.logger.finer("Adding new bridge 0.2.2.x extra-info " - + "descriptor: " + value); - this.zeroTwoTwoDescriptors.add(value); - } - } - - /** - * Returns whether the given fingerprint is a known hashed relay - * identity. <code>BridgeDescriptorParser</code> uses this information - * to decide whether to continue parsing a bridge extra-descriptor - * descriptor or not. - */ - public boolean isKnownRelay(String hashedBridgeIdentity) { - return this.hashedRelays.contains(hashedBridgeIdentity); - } - - /** - * Adds bridge user numbers by country as seen by a single bridge on a - * given date and time. Bridges can publish statistics on unique IP - * addresses multiple times a day, but we only want to include one - * observation per day. If we already have an observation from the given - * bridge and day, we keep the one with the later publication time and - * discard the other one. - */ - public void addObs(String hashedIdentity, long publishedMillis, - Map<String, String> obs) { - for (String country : obs.keySet()) { - this.countries.add(country); - } - String dateTime = this.dateTimeFormat.format(publishedMillis); - String date = dateTime.split(" ")[0]; - String time = dateTime.split(" ")[1]; - String shortKey = hashedIdentity + "," + date; - String longKey = shortKey + "," + time; - SortedMap<String, Map<String, String>> tailMap = - this.bridgeUsersRaw.tailMap(shortKey); - String nextKey = tailMap.isEmpty() ? null : tailMap.firstKey(); - if (nextKey == null || !nextKey.startsWith(shortKey)) { - this.logger.finer("Adding new bridge user numbers for key " - + longKey); - this.bridgeUsersRaw.put(longKey, obs); - } else if (longKey.compareTo(nextKey) > 0) { - this.logger.finer("Replacing existing bridge user numbers (" + - nextKey + " with new numbers: " + longKey); - this.bridgeUsersRaw.put(longKey, obs); - } else { - this.logger.finer("Not replacing existing bridge user numbers (" + - nextKey + " with new numbers (" + longKey + ")."); - } - } - - public void importSanitizedBridges() { - if (bridgesDir.exists()) { - logger.fine("Importing files in directory " + bridgesDir + "/..."); - DescriptorReader reader = - DescriptorSourceFactory.createDescriptorReader(); - reader.addDirectory(bridgesDir); - if (keepBridgeDescriptorImportHistory) { - reader.setExcludeFiles(new File(statsDirectory, - "bridge-stats-bridge-descriptor-history")); - } - Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getDescriptors() != null) { - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof ServerDescriptor) { - this.addServerDescriptor((ServerDescriptor) descriptor); - } else if (descriptor instanceof ExtraInfoDescriptor) { - this.addExtraInfoDescriptor( - (ExtraInfoDescriptor) descriptor); - } - } - } - } - logger.info("Finished importing bridge descriptors."); - } - } - - private void addServerDescriptor(ServerDescriptor descriptor) { - if (descriptor.getPlatform() != null && - descriptor.getPlatform().startsWith("Tor 0.2.2")) { - this.addZeroTwoTwoDescriptor(descriptor.getFingerprint(), - descriptor.getPublishedMillis()); - } - } - - private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) { - if (!this.isKnownRelay(descriptor.getFingerprint())) { - if (descriptor.getGeoipStartTimeMillis() >= 0 && - descriptor.getGeoipClientOrigins() != null) { - long seconds = (descriptor.getPublishedMillis() - - descriptor.getGeoipStartTimeMillis()) / 1000L; - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - for (Map.Entry<String, Integer> e : - descriptor.getGeoipClientOrigins().entrySet()) { - String country = e.getKey(); - double users = ((double) e.getValue() - 4) * 86400.0D - / ((double) seconds); - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - this.addObs(descriptor.getFingerprint(), - descriptor.getPublishedMillis(), obs); - } - if (descriptor.getBridgeStatsEndMillis() >= 0 && - descriptor.getBridgeIps() != null) { - double allUsers = 0.0D; - Map<String, String> obs = new HashMap<String, String>(); - for (Map.Entry<String, Integer> e : - descriptor.getBridgeIps().entrySet()) { - String country = e.getKey(); - double users = (double) e.getValue() - 4; - allUsers += users; - obs.put(country, String.format("%.2f", users)); - } - obs.put("zy", String.format("%.2f", allUsers)); - this.addObs(descriptor.getFingerprint(), - descriptor.getBridgeStatsEndMillis(), obs); - } - } - } - - public void importRelayDescriptors() { - if (archivesDirectory.exists()) { - logger.fine("Importing files in directory " + archivesDirectory - + "/..."); - DescriptorReader reader = - DescriptorSourceFactory.createDescriptorReader(); - reader.addDirectory(archivesDirectory); - if (keepRelayDescriptorImportHistory) { - reader.setExcludeFiles(new File(statsDirectory, - "bridge-stats-relay-descriptor-history")); - } - Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors(); - while (descriptorFiles.hasNext()) { - DescriptorFile descriptorFile = descriptorFiles.next(); - if (descriptorFile.getDescriptors() != null) { - for (Descriptor descriptor : descriptorFile.getDescriptors()) { - if (descriptor instanceof RelayNetworkStatusConsensus) { - this.addRelayNetworkStatusConsensus( - (RelayNetworkStatusConsensus) descriptor); - } - } - } - } - } - - logger.info("Finished importing relay descriptors."); - } - - private void addRelayNetworkStatusConsensus( - RelayNetworkStatusConsensus consensus) { - for (NetworkStatusEntry statusEntry : - consensus.getStatusEntries().values()) { - try { - this.addHashedRelay(DigestUtils.shaHex(Hex.decodeHex( - statusEntry.getFingerprint().toCharArray())).toUpperCase()); - } catch (DecoderException e) { - } - } - } - - /** - * Writes the list of hashed relay identities and bridge user numbers as - * observed by single bridges to disk, aggregates per-day statistics for - * all bridges, and writes those to disk, too. - */ - public void writeFiles() { - - /* Write hashed relay identities to disk. */ - try { - this.logger.fine("Writing file " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "..."); - this.hashedRelayIdentitiesFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.hashedRelayIdentitiesFile)); - for (String hashedRelay : this.hashedRelays) { - bw.append(hashedRelay + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to write " - + this.hashedRelayIdentitiesFile.getAbsolutePath() + "!", e); - } - - /* Write bridge extra-info descriptor identifiers to disk. */ - try { - this.logger.fine("Writing file " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "..."); - this.zeroTwoTwoDescriptorsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.zeroTwoTwoDescriptorsFile)); - for (String descriptorIdentifier : this.zeroTwoTwoDescriptors) { - bw.append(descriptorIdentifier + "\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to write " - + this.zeroTwoTwoDescriptorsFile.getAbsolutePath() + "!", e); - } - - /* Write observations made by single bridges to disk. */ - try { - this.logger.fine("Writing file " - + this.bridgeStatsRawFile.getAbsolutePath() + " (using " - + this.bridgeStatsRawTempFile.getAbsolutePath() + " as temp " - + "file)..."); - this.bridgeStatsRawTempFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeStatsRawTempFile)); - bw.append("bridge,date,time"); - for (String c : this.countries) { - if (c.equals("zy")) { - bw.append(",all"); - } else { - bw.append("," + c); - } - } - bw.append("\n"); - for (Map.Entry<String, Map<String, String>> e : - this.bridgeUsersRaw.entrySet()) { - String longKey = e.getKey(); - String[] parts = longKey.split(","); - String hashedBridgeIdentity = parts[0]; - if (!this.hashedRelays.contains(hashedBridgeIdentity) && - !this.zeroTwoTwoDescriptors.contains(longKey)) { - Map<String, String> obs = e.getValue(); - StringBuilder sb = new StringBuilder(longKey); - for (String c : this.countries) { - sb.append("," + (obs.containsKey(c) && - !obs.get(c).startsWith("-") ? obs.get(c) : "NA")); - } - String line = sb.toString(); - bw.append(line + "\n"); - } - } - bw.close(); - if (!this.bridgeStatsRawTempFile.renameTo( - this.bridgeStatsRawFile)) { - this.logger.fine("Failed to rename " - + this.bridgeStatsRawTempFile.getAbsolutePath() + " to " - + this.bridgeStatsRawFile.getAbsolutePath() + "."); - } - this.logger.fine("Finished writing file " - + this.bridgeStatsRawFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to write " - + this.bridgeStatsRawFile.getAbsolutePath() + " (using " - + this.bridgeStatsRawTempFile.getAbsolutePath() + " as temp " - + "file)!", e); - } - - /* Aggregate per-day statistics. */ - SortedMap<String, double[]> bridgeUsersPerDay = - new TreeMap<String, double[]>(); - for (Map.Entry<String, Map<String, String>> e : - this.bridgeUsersRaw.entrySet()) { - String longKey = e.getKey(); - String[] parts = longKey.split(","); - String hashedBridgeIdentity = parts[0]; - String date = parts[1]; - if (!this.hashedRelays.contains(hashedBridgeIdentity) && - !this.zeroTwoTwoDescriptors.contains(longKey)) { - double[] users = bridgeUsersPerDay.get(date); - Map<String, String> obs = e.getValue(); - if (users == null) { - users = new double[this.countries.size()]; - bridgeUsersPerDay.put(date, users); - } - int i = 0; - for (String c : this.countries) { - if (obs.containsKey(c) && !obs.get(c).startsWith("-")) { - users[i] += Double.parseDouble(obs.get(c)); - } - i++; - } - } - } - - /* Write final results of bridge users per day and country to - * <code>stats/bridge-stats</code>. */ - try { - this.logger.fine("Writing file " - + this.bridgeStatsRawFile.getAbsolutePath() + "..."); - this.bridgeStatsFile.getParentFile().mkdirs(); - BufferedWriter bw = new BufferedWriter(new FileWriter( - this.bridgeStatsFile)); - bw.append("date"); - for (String c : this.countries) { - if (c.equals("zy")) { - bw.append(",all"); - } else { - bw.append("," + c); - } - } - bw.append("\n"); - - /* Write current observation. */ - for (Map.Entry<String, double[]> e : bridgeUsersPerDay.entrySet()) { - String date = e.getKey(); - bw.append(date); - double[] users = e.getValue(); - for (int i = 0; i < users.length; i++) { - bw.append("," + String.format("%.2f", users[i])); - } - bw.append("\n"); - } - bw.close(); - this.logger.fine("Finished writing file " - + this.bridgeStatsFile.getAbsolutePath() + "."); - } catch (IOException e) { - this.logger.log(Level.WARNING, "Failed to write " - + this.bridgeStatsFile.getAbsolutePath() + "!", e); - } - - /* Add daily bridge users to database. */ - if (connectionURL != null) { - try { - List<String> countryList = new ArrayList<String>(); - for (String c : this.countries) { - countryList.add(c); - } - Map<String, Integer> insertRows = new HashMap<String, Integer>(), - updateRows = new HashMap<String, Integer>(); - for (Map.Entry<String, double[]> e : - bridgeUsersPerDay.entrySet()) { - String date = e.getKey(); - double[] users = e.getValue(); - for (int i = 0; i < users.length; i++) { - int usersInt = (int) users[i]; - if (usersInt < 1) { - continue; - } - String country = countryList.get(i); - String key = date + "," + country; - insertRows.put(key, usersInt); - } - } - Connection conn = DriverManager.getConnection(connectionURL); - conn.setAutoCommit(false); - Statement statement = conn.createStatement(); - ResultSet rs = statement.executeQuery( - "SELECT date, country, users FROM bridge_stats"); - while (rs.next()) { - String date = rs.getDate(1).toString(); - String country = rs.getString(2); - String key = date + "," + country; - if (insertRows.containsKey(key)) { - int insertRow = insertRows.remove(key); - int oldUsers = rs.getInt(3); - if (oldUsers != insertRow) { - updateRows.put(key, insertRow); - } - } - } - rs.close(); - PreparedStatement psU = conn.prepareStatement( - "UPDATE bridge_stats SET users = ? " - + "WHERE date = ? AND country = ?"); - for (Map.Entry<String, Integer> e : updateRows.entrySet()) { - String[] keyParts = e.getKey().split(","); - java.sql.Date date = java.sql.Date.valueOf(keyParts[0]); - String country = keyParts[1]; - int users = e.getValue(); - psU.clearParameters(); - psU.setInt(1, users); - psU.setDate(2, date); - psU.setString(3, country); - psU.executeUpdate(); - } - PreparedStatement psI = conn.prepareStatement( - "INSERT INTO bridge_stats (users, date, country) " - + "VALUES (?, ?, ?)"); - for (Map.Entry<String, Integer> e : insertRows.entrySet()) { - String[] keyParts = e.getKey().split(","); - java.sql.Date date = java.sql.Date.valueOf(keyParts[0]); - String country = keyParts[1]; - int users = e.getValue(); - psI.clearParameters(); - psI.setInt(1, users); - psI.setDate(2, date); - psI.setString(3, country); - psI.executeUpdate(); - } - conn.commit(); - conn.close(); - } catch (SQLException e) { - logger.log(Level.WARNING, "Failed to add daily bridge users to " - + "database.", e); - } - } - } -} - diff --git a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java index 2fa0cc6..927b5c4 100644 --- a/src/org/torproject/ernie/web/graphs/RObjectGenerator.java +++ b/src/org/torproject/ernie/web/graphs/RObjectGenerator.java @@ -58,15 +58,10 @@ public class RObjectGenerator implements ServletContextListener { this.availableCsvFiles = new TreeSet<String>(); this.availableCsvFiles.add("bandwidth"); this.availableCsvFiles.add("bandwidth-flags"); - this.availableCsvFiles.add("bridge-users"); this.availableCsvFiles.add("bwhist-flags"); this.availableCsvFiles.add("connbidirect"); this.availableCsvFiles.add("cloudbridges"); - this.availableCsvFiles.add("direct-users"); - this.availableCsvFiles.add("dirreq-stats"); this.availableCsvFiles.add("dirbytes"); - this.availableCsvFiles.add("monthly-users-average"); - this.availableCsvFiles.add("monthly-users-peak"); this.availableCsvFiles.add("monthly-userstats-average"); this.availableCsvFiles.add("monthly-userstats-peak"); this.availableCsvFiles.add("networksize"); @@ -80,9 +75,6 @@ public class RObjectGenerator implements ServletContextListener { this.availableCsvFiles.add("versions");
this.availableTables = new HashMap<String, String>(); - this.availableTables.put("direct-users", "start,end,filename"); - this.availableTables.put("censorship-events", "start,end,filename"); - this.availableTables.put("bridge-users", "start,end,filename"); this.availableTables.put("userstats-relay", "start,end,filename"); this.availableTables.put("userstats-bridge", "start,end,filename"); this.availableTables.put("userstats-censorship-events", @@ -102,10 +94,6 @@ public class RObjectGenerator implements ServletContextListener { this.availableGraphs.put("bandwidth-flags", "start,end,filename"); this.availableGraphs.put("bwhist-flags", "start,end,filename"); this.availableGraphs.put("dirbytes", "start,end,filename"); - this.availableGraphs.put("direct-users", - "start,end,country,events,filename"); - this.availableGraphs.put("bridge-users", - "start,end,country,filename"); this.availableGraphs.put("torperf", "start,end,source,filesize,filename"); this.availableGraphs.put("torperf-failures",
tor-commits@lists.torproject.org