[tor-commits] [metrics-tasks/master] Add code to extract and transform dirreqs from the metrics-web database.
karsten at torproject.org
karsten at torproject.org
Fri Mar 25 12:45:58 UTC 2011
commit 34ff5894093706ceee424968c1ed18cd729ed7d9
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Fri Mar 25 13:45:27 2011 +0100
Add code to extract and transform dirreqs from the metrics-web database.
---
task-2718/.gitignore | 2 +-
task-2718/README | 22 ++++++++++++++++++++++
task-2718/convert-dirreqs-sql.R | 8 ++++++++
3 files changed, 31 insertions(+), 1 deletions(-)
diff --git a/task-2718/.gitignore b/task-2718/.gitignore
index 673fa5f..917bb1b 100644
--- a/task-2718/.gitignore
+++ b/task-2718/.gitignore
@@ -1,3 +1,3 @@
-direct-users.csv
+*.csv
*.pdf
diff --git a/task-2718/README b/task-2718/README
index 807142a..8c60847 100644
--- a/task-2718/README
+++ b/task-2718/README
@@ -3,3 +3,25 @@ Here's how you run the censorship detector prototype:
$ wget https://metrics.torproject.org/csv/direct-users.csv
$ R --slave -f detect-censorship.R
+-------------------------------------------------------------------------
+
+Extracting raw directory requests from the metrics-web database:
+
+- Export dirreq_stats table from the metrics-web database via psql:
+
+ # \f ','
+ # \a
+ # \t
+ # \o dirreqs-sql.csv
+ # SELECT * FROM dirreq_stats ORDER BY statsend;
+ # \o
+ # \t
+ # \a
+
+- Transform the huge (!) CSV file (104M) from long to wide format. Note
+ that this takes a while:
+
+ $ R --slave -f convert-dirreqs-sql.R
+
+- The result is in dirreqs.csv (8.8M).
+
diff --git a/task-2718/convert-dirreqs-sql.R b/task-2718/convert-dirreqs-sql.R
new file mode 100644
index 0000000..e330307
--- /dev/null
+++ b/task-2718/convert-dirreqs-sql.R
@@ -0,0 +1,8 @@
+library(ggplot2)
+data <- read.csv("dirreqs-sql.csv", header = FALSE)
+data <- data.frame(fingerprint = data$V1, statsend = data$V2,
+ seconds = data$V3, country = data$V4, requests = data$V5)
+data <- cast(data, fingerprint + statsend + seconds ~ country,
+ value = "requests")
+write.csv(data, file = "dirreqs.csv", quote = FALSE, row.names = FALSE)
+
More information about the tor-commits
mailing list