[tor-commits] [metrics-tasks/master] Add detect-censorship.R script for #2718.

karsten at torproject.org karsten at torproject.org
Sat Mar 12 07:27:53 UTC 2011


commit 667d9a51b739f76808e5d84abca72be4b7f84dc6
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Sat Mar 12 08:26:55 2011 +0100

    Add detect-censorship.R script for #2718.
---
 task-2718/.gitignore          |    3 ++
 task-2718/README              |    5 ++++
 task-2718/detect-censorship.R |   52 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/task-2718/.gitignore b/task-2718/.gitignore
new file mode 100644
index 0000000..673fa5f
--- /dev/null
+++ b/task-2718/.gitignore
@@ -0,0 +1,3 @@
+direct-users.csv
+*.pdf
+
diff --git a/task-2718/README b/task-2718/README
new file mode 100644
index 0000000..807142a
--- /dev/null
+++ b/task-2718/README
@@ -0,0 +1,5 @@
+Here's how you run the censorship detector prototype:
+
+  $ wget https://metrics.torproject.org/csv/direct-users.csv
+  $ R --slave -f detect-censorship.R
+
diff --git a/task-2718/detect-censorship.R b/task-2718/detect-censorship.R
new file mode 100644
index 0000000..f27e863
--- /dev/null
+++ b/task-2718/detect-censorship.R
@@ -0,0 +1,52 @@
+# Tor Censorship Detector
+# Usage: R --slave < detect-censorship.R
+
+# Read CSV file containing daily user number estimates.
+direct <- read.csv("direct-users.csv")
+
+# Start plotting everything to a single PDF (with multiple pages).
+pdf("detect-censorship.pdf")
+
+# Convert the column containing daily Iranian users to a time series
+# object, starting on the 263th day of 2009 with a frequency of 365 days.
+# We're probably off by a day or two, but this should be fine for now.
+all <- ts(direct$ir, start = c(2009, 263), frequency = 365)
+
+# Uncomment to print the time series values.
+#print(all)
+
+# Let's try our approach for the last 365 days to see if we detect any
+# blocking in that time period.  In the final version of this script, we'd
+# only have a single run with i = 1.
+for (i in 365:1) {
+  idx <- length(direct$date) - i
+
+  # Convert the daily Iranian users until i days in the past to a time
+  # series object.
+  x <- ts(direct$ir[1:idx], start = c(2009, 263), frequency = 365)
+
+  # Apply an ARIMA(1, 0, 1) model to the time series.
+  x.fit = arima(x, order = c(1, 0, 1))
+
+  # Predict 10 dates ahead.
+  x.fore=predict(x.fit, n.ahead=10)
+
+  # Calculate a lower bound.  Here we use the predicted value minus three
+  # standard errors.
+  L = x.fore$pred - 3*x.fore$se
+
+  # If the observed daily user number is lower than our predicted lower
+  # bound, plot the data and lower bound.
+  if (direct$ir[idx + 1] < L[1]) {
+
+    # Plot the full time series.
+    ts.plot(all)
+
+    # Add a line for the ten predicted values.
+    lines(L, col = "red", lwd = 2) 
+  }
+}
+
+# Close the PDF device.
+dev.off()
+





More information about the tor-commits mailing list