commit 856c04702b0c99ea9f3f192b6ebbb1a4be5fb2f3 Author: Karsten Loesing karsten.loesing@gmx.net Date: Sat May 9 10:54:22 2015 +0200
Remove all unused graphing code. --- Makefile | 7 +-- README | 9 --- metrics/HOWTO | 109 ------------------------------------ metrics/filter.R | 149 -------------------------------------------------- metrics/timematrix.R | 36 ------------ plot_results.R | 146 ------------------------------------------------- 6 files changed, 1 insertion(+), 455 deletions(-)
diff --git a/Makefile b/Makefile index e85b8d2..2cafa76 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,6 @@ ### See LICENSE for licensing information
CC=gcc -Wall -Werror -ggdb -R=R CMD BATCH --vanilla -IMAGES=first-download.png first-local.png first-net.png second-download.png second-local.png second-net.png
all: trivsocks-client
@@ -17,8 +15,5 @@ test: trivsocks-client ./trivsocks-client -4 tor.eff.org / ./trivsocks-client -5 tor.eff.org /
-$(IMAGES): plot_results.R - $(R) $< - clean: - rm -f *~ *.o trivsocks-client *.png *.Rout + rm -f *~ *.o trivsocks-client diff --git a/README b/README index 48b9617..71d4ce5 100644 --- a/README +++ b/README @@ -12,7 +12,6 @@ Contents Makefile: Builds and tests trivsocks-client
[run_test.py: Script to automate running of trivsocks-client -- defect] -[plot_results.R: Plot the results from run_test.py -- defect]
measurements-HOWTO: Instructions for setting up Torperf example_start-tors_fastslow.txt: Example start script for Torperfs with @@ -29,11 +28,3 @@ Contents LICENSE: The Tor license (3-clause BSD) README: This file
-Subdirectory /metrics ------------- -------- - -A set of utilities for filtering and graphing Tor performance data. - - filter.R: filters torperf data and prepares it for graphing - timematrix.R: graphs tordata for interpretation and visualization - HOWTO: documentation and examples diff --git a/metrics/HOWTO b/metrics/HOWTO deleted file mode 100644 index ce6f7eb..0000000 --- a/metrics/HOWTO +++ /dev/null @@ -1,109 +0,0 @@ -HOWTO -- How to generate nifty graphs of tor performance - -Welcome traveler! You have reached the howto for some tor performance -and metrics stuff. You will find here some techniques and scripts -developed during several tasks including: -#1919; in which we examine torperfs with fixed entry guards -#2543; in which we create graphs of #1919 data -#2563; in which we generalize techniques from #2543 for the future - -The remainder of this HOWTO will walk you through what you need to do -to use the generalized techniques to generate graphs from performance -data. We will use #2543 as an example, because it is from this -example that the generalized technique was derived. This is intended -to be a living document. If something is unclear, or if you wish to -request a feature, please open a ticket: -https://trac.torproject.org/projects/tor/newticket - -As far as I know, this document was written by Karsten, Mike Perry, -and Tom Benjamin. If you are also an author of this document, please -add yourself to this list. - -Step 1: Download Torperf request files --------------------------------------- - -The 15 Torperf request files are available here: - - https://metrics.torproject.org/data.html#performance - -The wget commands to download all of them are: - - wget https://metrics.torproject.org/data/torperf-50kb.data - wget https://metrics.torproject.org/data/torperf-1mb.data - wget https://metrics.torproject.org/data/torperf-5mb.data - wget https://metrics.torproject.org/data/torperffastratio-50kb.data - wget https://metrics.torproject.org/data/torperffastratio-1mb.data - wget https://metrics.torproject.org/data/torperffastratio-5mb.data - wget https://metrics.torproject.org/data/torperffast-50kb.data - wget https://metrics.torproject.org/data/torperffast-1mb.data - wget https://metrics.torproject.org/data/torperffast-5mb.data - wget https://metrics.torproject.org/data/torperfslow-50kb.data - wget https://metrics.torproject.org/data/torperfslow-1mb.data - wget https://metrics.torproject.org/data/torperfslow-5mb.data - wget https://metrics.torproject.org/data/torperfslowratio-50kb.data - wget https://metrics.torproject.org/data/torperfslowratio-1mb.data - wget https://metrics.torproject.org/data/torperfslowratio-5mb.data - -Note that the torperf-*.data files are quite big already (25M+). - - -Step 2: Install R and ggplot2 ------------------------------ - -Install R 2.8 or higher. - -Run R as user and install ggplot2, quit R, start R again and try to load -ggplot2: - - $ R - > install.packages("ggplot2") - > q() # No need to save the workspace image, ever. - $ R - > library(ggplot2) - > q() - - -Step 3: Filter the data ------------------------ - -Before actually graphing the Torperf data, we should filter it to avoid -reading 29M of data for each graph. filter.R is a script that -accomplishes this task, writing it's output to filtered.csv -It is used as follows: - -1) Decide which files you are interested in. If you only want graphs -based on the fast guard nodes, you only need to crunch those files. - -2) Decide what date range you are interested in. The default is to -include all data since 2001-02-01 until 2099-12-31, by which time I -expect this script may be obsolete. - -usage: R --slave -f filter.R --args [-start=DATE] [-end=DATE] FILENAME(S) - -filename must be of the form guardname-basesizeSUFFIX.data -where SUFFIX is one of kb, mb, gb, tb - eg: R --slave -f filter.R --args -start=2011-02-01 -end=2099-12-31 *.data - eg: R --slave -f filter.R --args torperf-50kb.data - -So, to filter all data from #1919 you would execute: - $ R --slave -f filter.R --args *.data - -The script may take some time to run if the data files are large. - - -Step 4: Visualize the data --------------------------- - -Let's start with plotting a matrix of completion times graphs for every -file size and guard selection. - - $ R --slave -f timematrix.R - -This execution may take around 15 seconds. - - -Step 5: Find a more useful visualization of the data ----------------------------------------------------- - -... TODO ... - diff --git a/metrics/filter.R b/metrics/filter.R deleted file mode 100644 index f069856..0000000 --- a/metrics/filter.R +++ /dev/null @@ -1,149 +0,0 @@ -## A new and "improved" genericised version of the old filter script -## This version was created for task 2563 -## See HOWTO to put this in context -## -## usage: R -f filter.R --args [-start=DATE] [-end=DATE] FILENAME(S) -## filename must be of the form guardname-basesizeSUFFIX.data -## where SUFFIX is one of kb, mb, gb, tb -## -## eg: R -f filter.R --args -start=2011-02-01 -end=2099-12-31 *.data -## eg: R -f filter.R --args torperf-50kb.data -## -## This R script reads in Torperf files as specified on the command line -## and writes a filtered version to filtered.csv for later processing. - -FilterMain <- function(ARGV) { - kDebug <- FALSE # set TRUE for debugging output - kVersion <- 0.3 - if (kDebug) { cat("filter.R version ", kVersion, "\n\n") } - files <- NULL # files is a list of torperfFiles as definied below - setClass("torperfFile", - representation( - filename = "character", - guardLabel = "character", - filesizeLabel = "character", - filesize = "numeric" - ) - ) - - ## default values - ## cutoff dates for observations - start <- as.POSIXct("2011-02-01", origin = "1970-01-01") - end <- as.POSIXct("2099-12-31", origin = "1970-01-01") - - ## process command line arguments - args <- unlist(strsplit(ARGV, " ")) - - ## there are better ways to process command line args, but this works for me :-) - for (arg in args) { - if (kDebug) { cat('arg: ', arg, "\n") } - ## if start date specified - if (length(splitArgL <- unlist(strsplit(arg, "-start="))) == 2) { - if (kDebug) { cat('Starting from ', splitArgL[2], '\n') } - start <- as.POSIXct(splitArgL[2], origin = "1970-01-01") - next - } - ## if end date specified - if (length(splitArgL <- unlist(strsplit(arg, "-end="))) == 2) { - if (kDebug) { cat('Ending at ', splitArgL[2], '\n') } - end <- as.POSIXct(splitArgL[2], origin = "1970-01-01") - next - } - ## if the argument is -start= or -end= we will not reach this line - ## now, if it isn't a parameter add it to the file list - ## parse filename for metadata... - ## examples: - ## "torperf-50kb.data" should result in - ## filename = "torperf-50kb.data" - ## guardLabel = "torperf" - ## filesizeLabel = "50kb" - ## filesize = 50 * 1024 - my.file <- new("torperfFile", filename = arg) - - ## get base filename (strip out leading parts of filename such as dirname) - baseFilename <- basename(my.file@filename) - parseFileStr <- unlist(strsplit(baseFilename, "-")) ## split the two parts of the filename string - if (length(parseFileStr) != 2) { - cat("error: filenames must be of the form guard-filesize.data, you said "", baseFilename, ""\n") - quit("no", 1) - } - my.file@guardLabel <- parseFileStr[1] - cdr <- parseFileStr[2] - parseFilesize <- unlist(strsplit(cdr, "\.")) - if (length(parseFilesize) != 2) { - cat("error: tail of filename must be filesize.data, you said "", cdr, ""\n") - quit("no", 1) - } - my.file@filesizeLabel <- tolower(parseFilesize[1]) ## smash case to make our life easier - - fileBaseSize <- as.integer(unlist(strsplit(my.file@filesizeLabel, "[a-z]"))[1]) - fileSizeMultiplierStr <- unlist(strsplit(my.file@filesizeLabel, '[0-9]')) - fileSizeMultiplierStr <- fileSizeMultiplierStr[length(fileSizeMultiplierStr)] - fileSizeMultiplier <- 1 ## assume no suffix - if (fileSizeMultiplierStr == "kb") { fileSizeMultiplier <- 1024 } - if (fileSizeMultiplierStr == "mb") { fileSizeMultiplier <- 1024 * 1024 } - if (fileSizeMultiplierStr == "gb") { fileSizeMultiplier <- 1024 * 1024 * 1024} - ## yeah right, like we are really pushing TB of data - if (fileSizeMultiplierStr == "tb") { fileSizeMultiplier <- 1024 * 1024 * 1024 * 1024 } - my.file@filesize <- fileBaseSize * fileSizeMultiplier - - if (kDebug) { - cat("i will read file: ", my.file@filename, ' ', - my.file@guardLabel, ' ', - my.file@filesizeLabel, ' ', - my.file@filesize, "\n") - } - - files <- c(files, my.file) - } - - ## sanity check arguments - if (start >= end) { - cat("error: start date must be before end date\n"); - quit("no", 1) - } - if (length(files) == 0) { - cat("error: input files must be specified as arguments\n") - quit("no", 1) ## terminate with non-zero errlev - } - - if (kDebug) { - cat("filtering from ", as.character.POSIXt(start), " to ", - as.character.POSIXt(end), "\n") - } - - ## Turn a given Torperf file into a data frame with the information we care - ## about. - read <- function(filename, guards, filesize, bytes) { - x <- read.table(filename) - x <- x[as.POSIXct(x$V1, origin = "1970-01-01") >= start & - as.POSIXct(x$V1, origin = "1970-01-01") <= end, ] - if (length(x$V1) == 0) - NULL - else - data.frame( - started = as.POSIXct(x$V1, origin = "1970-01-01"), - timeout = x$V17 == 0, - failure = x$V17 > 0 & x$V20 < bytes, - completemillis = ifelse(x$V17 > 0 & x$V20 >= bytes, - round((x$V17 * 1000 + x$V18 / 1000) - - (x$V1 * 1000 + x$V19 / 1000), 0), NA), - guards = guards, - filesize = filesize) - } - - ## Read in files and bind them to a single data frame. - filtered <- NULL - for (file in files) { - if (kDebug) { cat('Processing ', file@filename, "...\n") } - filtered <- rbind(filtered, - read(file@filename, file@guardLabel, file@filesizeLabel, file@filesize) - ) - } - - # Write data frame to a csv file for later processing. - write.csv(filtered, "filtered.csv", quote = FALSE, row.names = FALSE) - -} - -FilterMain(commandArgs(TRUE)) diff --git a/metrics/timematrix.R b/metrics/timematrix.R deleted file mode 100644 index ec01a25..0000000 --- a/metrics/timematrix.R +++ /dev/null @@ -1,36 +0,0 @@ -# Load ggplot library without printing out stupid warnings. -options(warn = -1) -suppressPackageStartupMessages(library("ggplot2")) - -# Read in filtered data. -data <- read.csv("filtered.csv", stringsAsFactors = FALSE) - -# Remove NA's -data <- na.omit(data) - -# Remove "outliers" -data <- data[(data$filesize == "50kb" & data$completemillis < 60000) | - (data$filesize == "1mb" & data$completemillis < 120000) | - (data$filesize == "5mb" & data$completemillis < 300000), ] - -# Plot a matrix of scatter plots; the first step is to define which data -# we want to plot (here: data) and what to put on x and y axis. -ggplot(data, aes(x = as.POSIXct(started), y = completemillis / 1000)) + - -# Draw a point for every observation, but with an alpha value of 1/10 to -# reduce overplotting -geom_point(alpha = 1/10) + - -# Draw a matrix of these graphs with different filesizes and different -# guards. -facet_grid(filesize ~ guards, scales = "free_y") + - -# Rename y axis. -scale_y_continuous(name = "Completion time in seconds") + - -# Rename x axis. -scale_x_datetime(name = "Starting time") - -# Save the result to a large PNG file. -ggsave("timematrix.png", width = 10, height = 10, dpi = 150) - diff --git a/plot_results.R b/plot_results.R deleted file mode 100644 index a291414..0000000 --- a/plot_results.R +++ /dev/null @@ -1,146 +0,0 @@ -### Copyright 2007 Steven J. Murdoch -### See LICENSE for licensing information - -### XXX This script is broken, but we should look at it some more and -### maybe reuse parts of it when implementing Trac ticket #2563 (Add R -### code for processing Torperf data to the Torperf repository) - -UFACTOR = 1e6 - -## Subtract to timevals, maintaining precision -todelta <- function(startsec, startusec, tsec, tusec) { - tsec[tsec == 0] <- NA - dsec <- tsec - startsec - dusec <- tusec - startusec - return(dsec*UFACTOR + dusec) -} - - -parsedata <- function(filename, size) { - - filename <- paste("data/run2/", filename, sep="") - - t = read.table(filename, header=TRUE) - - tStart <- t$startsec*UFACTOR + t$startusec - dSocket <- todelta(t$startsec, t$startusec, t$socketsec, t$socketusec) - dConnect <- todelta(t$startsec, t$startusec, t$connectsec, t$connectusec) - dNegotiate <- todelta(t$startsec, t$startusec, t$negotiatesec, t$negotiateusec) - dRequest <- todelta(t$startsec, t$startusec, t$requestsec, t$requestusec) - dResponse <- todelta(t$startsec, t$startusec, t$responsesec, t$responseusec) - dDRequest <- todelta(t$startsec, t$startusec, t$datarequestsec, t$datarequestusec) - dDResponse <- todelta(t$startsec, t$startusec, t$dataresponsesec, t$dataresponseusec) - dDComplete <- todelta(t$startsec, t$startusec, t$datacompletesec, t$datacompleteusec) - cbWrite <- t$writebytes - cbRead <- t$readbytes - - results <- data.frame(tStart, dSocket, dConnect, - dNegotiate, dRequest, dResponse, - dDRequest, dDResponse, dDComplete, - cbWrite, cbRead) - - invalid <- abs(results$cbRead - size) > 64 - results[invalid,] <- NA - - return(results) -} - -plotdist <- function(data, factor, labels, title, ylim=c(NA,NA)) { - ## Scale units - if (factor == 1e6) - ylab <- "Time (s)" - else if (factor == 1e3) - ylab <- "Time (ms)" - else { - ylab <- "Time (us)" - factor <- 1 - } - - d <- na.omit(data)/factor - - ## Find plotting range - MinY<- NULL - MaxY <- NULL - - range <- 1.5 - - for (col in d) { - s <- summary(col) - Q1 <- as.vector(s[2]) - Q3 <- as.vector(s[5]) - InterQ <- Q3-Q1 - a <- Q1 - range*InterQ - b <- Q3 + range*InterQ - - if (is.null(MinY) || a<MinY) - MinY <- a - - if (is.null(MaxY) || b>MaxY) - MaxY <- b - } - - if (!is.na(ylim[1])) - MinY <- ylim[1] - - if (!is.na(ylim[2])) - MaxY <- ylim[2] - - ## Find how many points this will cause to be skipped - skipped <- vector() - for (i in (1:length(d))) { - col <- d[[i]] - isSkipped <- col<MinY | col>MaxY - d[[i]][isSkipped] <- NA - s <- length(which(isSkipped)) - ss <- paste("(",s,")",sep="") - skipped <- append(skipped, ss) - } - - labels <- mapply(paste, labels, skipped) - if (length(d)>1) - title <- paste(title, " (", length(d[[1]]), " runs)", sep="") - else - title <- paste(title, " (", length(d[[1]]), " runs, ", s, " skipped)", sep="") - - ## Plot the data - boxplot(names=labels, d, frame.plot=FALSE, ylab=ylab, range=range, - ylim=c(MinY, MaxY), xlab="Event (# points omitted)", main=title, - pars=list(show.names=TRUE, boxwex = 0.8, staplewex = 0.5, outwex = 0.5)) -} - -first <- parsedata("first-big.data", 1048869) -second <- parsedata("second-big.data", 1048868) - -EventNames <- c("start", - "socket()", "connect()", "auth", "SOCKS req", "SOCKS resp", - "HTTP req", "HTTP resp", "HTTP done") - -png("first-local.png", width=800, height=533, bg="transparent") -par(mar=c(4.3,4.1,3.1,0.1)) -plotdist(first[2:5], 1e3, EventNames[2:5], "Local events -- first request", c(0,2)) -dev.off() - -png("second-local.png", width=800, height=533, bg="transparent") -par(mar=c(4.3,4.1,5.1,0.1)) -plotdist(second[2:5], 1e3, EventNames[2:5], "Local events -- second request", c(0,2)) -dev.off() - -png("first-net.png", width=800, height=533, bg="transparent") -par(mar=c(4.3,4.1,3.1,0.1)) -plotdist(first[6:8], 1e6, EventNames[6:8], "Network events -- first request", c(0,8)) -dev.off() - -png("second-net.png", width=800, height=533, bg="transparent") -par(mar=c(4.3,4.1,5.1,0.1)) -plotdist(second[6:8], 1e6, EventNames[6:8], "Network events -- second request", c(0,8)) -dev.off() - -png("first-download.png", width=600, height=533, bg="transparent") -par(mar=c(0.3,4.1,3.1,0.1)) -plotdist(first[9], 1e6, EventNames[9], "HTTP download -- first request", c(0,150)) -dev.off() - -png("second-download.png", width=600, height=533, bg="transparent") -par(mar=c(0.3,4.1,3.1,0.1)) -plotdist(second[9], 1e6, EventNames[9], "HTTP download -- second request", c(0,150)) -dev.off()