commit 4426cf9cfa1d8965057c3fe34184c5ce459fa390 Author: Karsten Loesing karsten.loesing@gmx.net Date: Tue Aug 9 15:52:38 2011 +0200
Remove outdated censorship detector code (#2718). --- task-2718/README | 27 -------------------- task-2718/convert-dirreqs-sql.R | 8 ------ task-2718/detect-censorship.R | 52 --------------------------------------- 3 files changed, 0 insertions(+), 87 deletions(-)
diff --git a/task-2718/README b/task-2718/README deleted file mode 100644 index 8c60847..0000000 --- a/task-2718/README +++ /dev/null @@ -1,27 +0,0 @@ -Here's how you run the censorship detector prototype: - - $ wget https://metrics.torproject.org/csv/direct-users.csv - $ R --slave -f detect-censorship.R - -------------------------------------------------------------------------- - -Extracting raw directory requests from the metrics-web database: - -- Export dirreq_stats table from the metrics-web database via psql: - - # \f ',' - # \a - # \t - # \o dirreqs-sql.csv - # SELECT * FROM dirreq_stats ORDER BY statsend; - # \o - # \t - # \a - -- Transform the huge (!) CSV file (104M) from long to wide format. Note - that this takes a while: - - $ R --slave -f convert-dirreqs-sql.R - -- The result is in dirreqs.csv (8.8M). - diff --git a/task-2718/convert-dirreqs-sql.R b/task-2718/convert-dirreqs-sql.R deleted file mode 100644 index e330307..0000000 --- a/task-2718/convert-dirreqs-sql.R +++ /dev/null @@ -1,8 +0,0 @@ -library(ggplot2) -data <- read.csv("dirreqs-sql.csv", header = FALSE) -data <- data.frame(fingerprint = data$V1, statsend = data$V2, - seconds = data$V3, country = data$V4, requests = data$V5) -data <- cast(data, fingerprint + statsend + seconds ~ country, - value = "requests") -write.csv(data, file = "dirreqs.csv", quote = FALSE, row.names = FALSE) - diff --git a/task-2718/detect-censorship.R b/task-2718/detect-censorship.R deleted file mode 100644 index f27e863..0000000 --- a/task-2718/detect-censorship.R +++ /dev/null @@ -1,52 +0,0 @@ -# Tor Censorship Detector -# Usage: R --slave < detect-censorship.R - -# Read CSV file containing daily user number estimates. -direct <- read.csv("direct-users.csv") - -# Start plotting everything to a single PDF (with multiple pages). -pdf("detect-censorship.pdf") - -# Convert the column containing daily Iranian users to a time series -# object, starting on the 263th day of 2009 with a frequency of 365 days. -# We're probably off by a day or two, but this should be fine for now. -all <- ts(direct$ir, start = c(2009, 263), frequency = 365) - -# Uncomment to print the time series values. -#print(all) - -# Let's try our approach for the last 365 days to see if we detect any -# blocking in that time period. In the final version of this script, we'd -# only have a single run with i = 1. -for (i in 365:1) { - idx <- length(direct$date) - i - - # Convert the daily Iranian users until i days in the past to a time - # series object. - x <- ts(direct$ir[1:idx], start = c(2009, 263), frequency = 365) - - # Apply an ARIMA(1, 0, 1) model to the time series. - x.fit = arima(x, order = c(1, 0, 1)) - - # Predict 10 dates ahead. - x.fore=predict(x.fit, n.ahead=10) - - # Calculate a lower bound. Here we use the predicted value minus three - # standard errors. - L = x.fore$pred - 3*x.fore$se - - # If the observed daily user number is lower than our predicted lower - # bound, plot the data and lower bound. - if (direct$ir[idx + 1] < L[1]) { - - # Plot the full time series. - ts.plot(all) - - # Add a line for the ten predicted values. - lines(L, col = "red", lwd = 2) - } -} - -# Close the PDF device. -dev.off() -