commit 34ff5894093706ceee424968c1ed18cd729ed7d9 Author: Karsten Loesing karsten.loesing@gmx.net Date: Fri Mar 25 13:45:27 2011 +0100
Add code to extract and transform dirreqs from the metrics-web database. --- task-2718/.gitignore | 2 +- task-2718/README | 22 ++++++++++++++++++++++ task-2718/convert-dirreqs-sql.R | 8 ++++++++ 3 files changed, 31 insertions(+), 1 deletions(-)
diff --git a/task-2718/.gitignore b/task-2718/.gitignore index 673fa5f..917bb1b 100644 --- a/task-2718/.gitignore +++ b/task-2718/.gitignore @@ -1,3 +1,3 @@ -direct-users.csv +*.csv *.pdf
diff --git a/task-2718/README b/task-2718/README index 807142a..8c60847 100644 --- a/task-2718/README +++ b/task-2718/README @@ -3,3 +3,25 @@ Here's how you run the censorship detector prototype: $ wget https://metrics.torproject.org/csv/direct-users.csv $ R --slave -f detect-censorship.R
+------------------------------------------------------------------------- + +Extracting raw directory requests from the metrics-web database: + +- Export dirreq_stats table from the metrics-web database via psql: + + # \f ',' + # \a + # \t + # \o dirreqs-sql.csv + # SELECT * FROM dirreq_stats ORDER BY statsend; + # \o + # \t + # \a + +- Transform the huge (!) CSV file (104M) from long to wide format. Note + that this takes a while: + + $ R --slave -f convert-dirreqs-sql.R + +- The result is in dirreqs.csv (8.8M). + diff --git a/task-2718/convert-dirreqs-sql.R b/task-2718/convert-dirreqs-sql.R new file mode 100644 index 0000000..e330307 --- /dev/null +++ b/task-2718/convert-dirreqs-sql.R @@ -0,0 +1,8 @@ +library(ggplot2) +data <- read.csv("dirreqs-sql.csv", header = FALSE) +data <- data.frame(fingerprint = data$V1, statsend = data$V2, + seconds = data$V3, country = data$V4, requests = data$V5) +data <- cast(data, fingerprint + statsend + seconds ~ country, + value = "requests") +write.csv(data, file = "dirreqs.csv", quote = FALSE, row.names = FALSE) +
tor-commits@lists.torproject.org