commit 8608c2345de5b5c3d94452d231bd5b69262e52c7 Author: Karsten Loesing karsten.loesing@gmx.net Date: Mon Aug 27 09:08:03 2012 +0200
Make torflow report look like a Tor Tech Report. --- 2009/torflow/torflow.bib | 53 ++---------------- 2009/torflow/torflow.tex | 125 ++++++++++++++++++++---------------------- 2009/torflow/tortechrep.cls | 1 + 3 files changed, 66 insertions(+), 113 deletions(-)
diff --git a/2009/torflow/torflow.bib b/2009/torflow/torflow.bib index c8c667e..711a947 100644 --- a/2009/torflow/torflow.bib +++ b/2009/torflow/torflow.bib @@ -10,7 +10,7 @@ @Misc{path-spec, author = {Roger Dingledine and Nick Mathewson}, title = {{Tor Path Specifications}}, - note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/path-spec.txt%7D%7D, + note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/path-spec.txt%7D%7D, }
@Misc{nickm-iocp, @@ -34,47 +34,16 @@ note = {\url{http://www.blackhat.com/presentations/bh-usa-07/Perry/Presentation/bh-usa-07... }
-@Misc{perry-ssh-ortalk, - key = {perry-ssh-ortalk}, - title = {{SSH Key Spoofing}}, - author = {Mike Perry}, - note = {\url{http://archives.seul.org/or/talk/Jan-2007/msg00030.html%7D%7D -} - @Misc{control-spec, author = {Roger Dingledine and Nick Mathewson}, title = {Tor Control Protocol Specifications}, - note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/control-spec.txt%7D%..., + note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/control-spec.txt%7D%7D, }
@Misc{dir-spec, author = {Roger Dingledine and Nick Mathewson}, - title = {Tor Control Protocol Specifications}, - note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/dir-spec.txt%7D%7D, -} - -@Misc{Elixir, - key = {Elixir}, - title = {{Elixir}}, - note = {\url{http://elixir.ematia.de/trac/wiki%7D%7D -} - -@Misc{SQLAlchemy, - key = {SQLALchemy}, - title = {{SQLAlchemy Database Toolkit for Python}}, - note = {\url{http://www.sqlalchemy.org/%7D%7D -} - -@Misc{BeautifulSoup, - key = {BeautifulSoup}, - title = {{Beautiful Soup: Elixir and Tonic}}, - note = {\url{http://www.crummy.com/software/BeautifulSoup/%7D%7D -} - -@Misc{Javascript.g, - key = {Javascript.g}, - title = {{Antlr Javascript Grammar}}, - note = {\url{http://www.antlr.org/grammar/1206736738015/JavaScript.g%7D%7D + title = {Tor Directory Protocol Specifications}, + note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt%7D%7D, }
@mastersthesis{renner-thesis, @@ -88,19 +57,7 @@ @Misc{tor-spec, author = {Roger Dingledine and Nick Mathewson}, title = {{Tor Protocol Specifications}}, - note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/tor-spec.txt%7D%7D, -} - -@Misc{bug440, - author = {Mike Perry}, - title = {{Guard Nodes Not Weighted By Bandwidth}}, - note = {\url{http://bugs.torproject.org/flyspray/index.php?do=details%5C&id=440%7D%7D -} - -@Misc{perry-balancing, - author = {Mike Perry}, - title = {{Exit Balancing Patch}}, - note = {\url{http://archives.seul.org/or/dev/Jul-2007/msg00021.html%7D%7D + note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/tor-spec.txt%7D%7D, }
@Misc{ads-malware, diff --git a/2009/torflow/torflow.tex b/2009/torflow/torflow.tex index 1abaebd..75924bb 100644 --- a/2009/torflow/torflow.tex +++ b/2009/torflow/torflow.tex @@ -1,38 +1,21 @@ -% XXX: Change to llncs 11pt aka -%\documentclass{llncs} -\documentclass[letterpaper,11pt]{llncs} -%\documentclass{article} % llncs +\documentclass{tortechrep}
-\usepackage{usenix} \usepackage{url} -\usepackage{graphics} +\usepackage{graphicx} \usepackage{amsmath} \usepackage{listings} - -\setlength{\textwidth}{5.9in} -\setlength{\textheight}{8.4in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - -\newenvironment{tightlist}{\begin{list}{$\bullet$}{ - \setlength{\itemsep}{0mm} - \setlength{\parsep}{0mm} - % \setlength{\labelsep}{0mm} - % \setlength{\labelwidth}{0mm} - % \setlength{\topsep}{0mm} - }}{\end{list}} +\usepackage{courier}
\begin{document}
\title{TorFlow: Tor Network Analysis} - -\author{Mike Perry \ The Internet \ mikeperry@fscked.org} - -%\institute{The Internet} - +\author{Mike Perry} +\contact{mikeperry@fscked.org} +\reportid{2009-08-003\footnote{This report was presented at 2nd Hot Topics +in Privacy Enhancing Technologies (HotPETs 2009), Seattle, WA, USA, August +2009.}} +\date{August 7, 2009} \maketitle -\pagestyle{plain}
\begin{abstract} The Tor Network is a low-latency anonymity, privacy, and censorship @@ -74,7 +57,8 @@ misconfiguration, or much less often, due to malice. This most frequently comes in the form of truncating TCP streams or failing DNS, but occasionally presents itself as SSL spoofing or interception by the upstream ISP. On rare occasion, SSH hijacking and web content injection have also been -observed.~\cite{perry-ssh-ortalk}. +observed.% +\footnote{\url{https://lists.torproject.org/pipermail/tor-talk/2007-January/007352.html%7D%.... % XXX: There's another ref for this involving web injection
\section{Overview} @@ -106,18 +90,18 @@ that provides well-formed information on Tor client status and events and optionally enables control over circuit construction and association of SOCKS streams to individual circuits.
-\begin{figure}[htp] +\begin{figure}[ht] \centering -\includegraphics{ControlPort2.pdf} +\includegraphics[width=.8\textwidth]{ControlPort2.pdf} \caption{Example Tor Control Port connection with representative Tor Traffic.} \label{fig:ControlPort2.pdf} \end{figure}
\subsection{TorCtl Organization}
-\begin{figure}[htp] +\begin{figure}[ht] \centering -\includegraphics{PathSupport.pdf} +\includegraphics[width=.9\textwidth]{PathSupport.pdf} \caption{TorCtl Core Class Diagram} \label{fig:PathSupport.pdf} \end{figure} @@ -172,8 +156,11 @@ circuit creation time and failure reason, and stream capacity and failure reason.
The second is a SQL-based system that stores circuit and/or stream events in -SQL tables. The SQL system uses Elixir~\cite{Elixir} and -SQLAlchemy~\cite{SQLAlchemy}, so the backend database can be any that is +SQL tables. The SQL system uses Elixir% +\footnote{\url{http://elixir.ematia.de/trac/wiki%7D%7D and +SQLAlchemy% +\footnote{\url{http://www.sqlalchemy.org/%7D%7D, +so the backend database can be any that is supported by SQLAlchemy (which includes just about every modern database backend).
@@ -219,9 +206,9 @@ Loesing as being a result of our rate limiting algorithm emptying its token buckets in sync across the network at the top of each second as opposed to continuously. When this is addressed, the Pareto fit should improve.
-\begin{figure}[htp] +\begin{figure}[ht] \centering -\includegraphics{0-93-100000-buildtimes-res100.pdf} +\includegraphics[width=.9\textwidth]{0-93-100000-buildtimes-res100.pdf} \caption{Network-wide bandwidth-weighted circuit build time distribution (ms).} \label{fig:buildtimes} \end{figure} @@ -232,7 +219,7 @@ recalibrated Tor's circuit timeout in the client.
\subsection{Guard Node Rebalancing}
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{ExtendsBar.pdf} \includegraphics{ExtendsBar2.pdf} @@ -245,7 +232,7 @@ responsiveness and reliability of 5% slices of the network (lower percentiles indicate higher advertised bandwidth). Repeated measurement showed that nodes became progressively less responsive and more failure prone as they got slower, up until the 50% mark, at which point the pattern suddenly stopped. -This pattern can be seen in the left side of Figures \ref{fig:Extends} and +This pattern can be seen in the left side of Figures~\ref{fig:Extends} and \ref{fig:Failure}.
This 50% mark was the same point where nodes ceased to be considered for @@ -255,13 +242,16 @@ We eventually discovered that client guard node selection, instead of being weighted based on bandwidth, was actually uniform. We developed a new algorithm to fix this, as well as to properly account for weighting both guards and exits according to their scarcity when being selected for other positions in -the network~\cite{bug440,perry-balancing}. +the network.% +\footnote{\url{https://trac.torproject.org/projects/tor/ticket/440%7D%7D +\textsuperscript{,}% +\footnote{\url{https://lists.torproject.org/pipermail/tor-dev/2007-July/001255.html%7D%7D
Without an autoupdater, it took over a year for enough clients to upgrade for the results to be visible in our scans, but it appears that at least among guards, the load is now considerably more uniform.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{CircFailure.pdf} \includegraphics{CircFailure2.pdf} @@ -270,7 +260,7 @@ guards, the load is now considerably more uniform. \end{figure}
However, it is obvious that irregularities still remain. Interestingly, the -points of very low failure rates in Figure \ref{fig:Failure} correspond to the +points of very low failure rates in Figure~\ref{fig:Failure} correspond to the periods between 01:00 and 03:00 PST, when most of the US is asleep, and consistently appeared at that time in numerous scans. This seems to suggest we should avoid capacity scans during those hours. It also suggests that circuit @@ -280,7 +270,7 @@ crypto operations fast enough, it begins dropping circuit creation cells. This could explain the sharp difference in high load vs low load conditions for circuit failure, but not for stream capacity.
-Furthermore, it appears in the right side of Figure \ref{fig:Failure} as +Furthermore, it appears in the right side of Figure~\ref{fig:Failure} as though the slower 50% of the network is now exhibiting significantly higher failure percentages than the first 50%. In order to explore this, we ran a number of additional circuit failure scans utilizing TorFlow's Node @@ -302,7 +292,7 @@ After more investigation and many scans, two consistent failure classes emerged: Windows nodes, and non-bandwidth limited nodes, each of which seemed to perform a bit worse as Guard and Exit nodes than as the middle nodes.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{CircFailure-Win2.pdf} \includegraphics{CircFailure-WinMid.pdf} @@ -312,7 +302,7 @@ to perform a bit worse as Guard and Exit nodes than as the middle nodes.
The Windows node result is not entirely surprising, as it is known that these nodes will have difficulty servicing large numbers of sockets using normal -WinSock~\cite{nickm-iocp}. As can be seen in Figure \ref{fig:WinFail}, these +WinSock~\cite{nickm-iocp}. As can be seen in Figure~\ref{fig:WinFail}, these nodes exhibit significantly higher circuit failure rates than non-Windows nodes, and also predictably fare worse in either the Guard or Exit position, where they have to maintain significantly more TCP sockets for clients and @@ -322,7 +312,7 @@ There are some aberrations. In particular, the high-end Windows nodes seem to be on par with their peers. This is likely due to the higher socket limits of server editions of Windows as compared to desktop.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{CircFailure-BwLimit2.pdf} \includegraphics{Extends-BwLimit2.pdf} @@ -331,7 +321,7 @@ nodes} \label{fig:BwLimited} \end{figure}
-Interestingly, as can be seen in the left side of Figure \ref{fig:BwLimited}, +Interestingly, as can be seen in the left side of Figure~\ref{fig:BwLimited}, nodes that have configured a specific bandwidth rate limit are considerably more reliable than those that set no limit and just fill their upstream to the max. One potential reason for this could be that due to Tor's multiplexing of @@ -340,7 +330,7 @@ ability of circuit creation cells to get through in time. Other possibilities include asymmetric bandwidth limits and OS and CPU limits being easier to hit, causing failure as opposed to smooth throttling.
-Also of interest from the right side of Figure \ref{fig:BwLimited} is the fact +Also of interest from the right side of Figure~\ref{fig:BwLimited} is the fact that despite only emptying their queues once per second, the circuit extend latencies of bandwidth-limited nodes are still typically less than their non-limited neighbors. This indicates that most of these rate limited nodes @@ -357,7 +347,7 @@ ability to make a TCP connection for non-limited nodes, and is possibly tied to the ability to transfer a create cell through the network and also implicate TCP flow control issues.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{CircFailure-LimitWin.pdf} \includegraphics{CircFailure-WinLimit.pdf} @@ -367,15 +357,15 @@ implicate TCP flow control issues.
It is also the case that many of the Windows nodes also do not set bandwidth limits for themselves. This led us to perform four scans to compare -the effect of Windows, the results of which are shown in Figure \ref{fig:LimitFail}. +the effect of Windows, the results of which are shown in Figure~\ref{fig:LimitFail}.
-On the left side of Figure \ref{fig:LimitFail}, it can be seen that while +On the left side of Figure~\ref{fig:LimitFail}, it can be seen that while non-Win32 non-limited nodes do exhibit higher failure rates than the limited -nodes in Figure \ref{fig:BwLimited}, the bulk of the failure is due to the +nodes in Figure~\ref{fig:BwLimited}, the bulk of the failure is due to the Windows non-limited nodes. Furthermore, on the right of Figure \ref{fig:LimitFail}, it can be seen that Limited Windows nodes perform significantly better than non-limited, though again not quite on par with -limited nodes from Figure \ref{fig:BwLimited}. This could be due to the +limited nodes from Figure~\ref{fig:BwLimited}. This could be due to the limited nodes' operators ensuring that they set their bandwidth rate below the point at which Tor begins to experience performance problems or otherwise slow down their system. @@ -385,7 +375,7 @@ bandwidth limits below their connection's capacity, and that we need to ensure that the Vidalia UI is clear enough for Windows users to be able to set limits properly, and understand the importance of doing so.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{BadNodes.pdf} \includegraphics{BadNodesWin.pdf} @@ -398,7 +388,7 @@ proportion of non-limited and Windows nodes in each percentile slice in Figure \ref{fig:BadNodes}. On the left is the combination of non-limited and Windows nodes, and on the right is Windows nodes that are non-limited. It can be seen that the amount of Windows non-limited nodes roughly correspond to the level -of failure rates from the right side of Figure \ref{fig:Failure}. Of course, +of failure rates from the right side of Figure~\ref{fig:Failure}. Of course, correlation does not imply causation, but it does give us a starting point to work with.
@@ -413,7 +403,7 @@ It would be much better if we could use a balancing metric or metrics, and use them directly to alter client load allocation to correct for arbitrary unbalancing.
-\begin{figure}[htp] +\begin{figure}[ht] \centering \includegraphics{StreamBwBar2.pdf} \caption{Stream bandwidth capacity as measured by recent feedback scan} @@ -421,7 +411,7 @@ unbalancing. \end{figure}
In a well-balanced network, all streams should receive the same bandwidth. -It can clearly be seen from Figure \ref{fig:StreamBw} that this is not the +It can clearly be seen from Figure~\ref{fig:StreamBw} that this is not the case, and that some segments of the network are providing clients with much better capacities than others.
@@ -500,8 +490,9 @@ purposes of creating botnets or mining account credentials.
\subsection{General Methodology}
-\lstset{language=Python} -\begin{lstlisting}[frame=single] +\begin{figure} +\lstset{basicstyle=\footnotesize\ttfamily,language=Python} +\begin{lstlisting} TorResult = PerformFetch(Tor, URL, TorAuthSet) NonTorResult1 = PerformFetch(NonTorIP1, URL, NonTorAuthSet) if IsPrefix(TorResult, NonTorResult1): @@ -524,8 +515,12 @@ purposes of creating botnets or mining account credentials. return OK return FAIL_MODIFICATION \end{lstlisting} +\caption{Pseudocode that all scans follow} +\label{fig:pseudocode} +\end{figure}
-In general, all scans follow the pattern in the above pseudocode: +In general, all scans follow the pattern in the pseudocode in +Figure~\ref{fig:pseudocode}: First they perform an operation without Tor. They then perform that same operation through Tor. If the relevant content matches, it is a success. Otherwise, they perform the operation again from a new Non-Tor IP but using @@ -543,7 +538,8 @@ subclassifications of these.
\subsection{HTML and JavaScript Scanning}
-In the case of HTML scanning, we use Beautiful Soup~\cite{BeautifulSoup} to +In the case of HTML scanning, we use Beautiful Soup% +\footnote{\url{http://www.crummy.com/software/BeautifulSoup/%7D%7D to strip content down to only tags that can contain plugins, script, or CSS in order to eliminate localization and content changes from comparison and to obtain a set of page script, iframe, object, and link tags for recursion. @@ -561,7 +557,9 @@ Inspector script post-scan.
If the HTML Difference Pruner finds no new Tor differences after pruning, we rerun the unpruned fetches through a JavaScript Difference Pruner that uses a -Javascript parser from the Antlr Project~\cite{Javascript.g} to prune +Javascript parser from the Antlr Project% +\footnote{\url{http://www.antlr.org/grammar/1206736738015/JavaScript.g%7D%7D +to prune differences from an AST. This is done to ensure we haven't pruned a tag or attribute that varies because of minor Javascript differences (such as unique identifiers embedded in script). If no Tor differences remain, the node has @@ -687,7 +685,7 @@ circuit scans first. However, certain characteristics may affect circuit failure and not stream capacity and vice-versa, so all of the pertinent results should ideally be repeated with stream bandwidth scans.
-\section{Acknowledgments} +\section*{Acknowledgments}
We'd like to thank all of our Google Summer of Code students who have contributed various features to TorFlow: Johannes Renner for his GeoIP-based @@ -705,9 +703,6 @@ Lastly, we'd like to thank Roger and Nick for having the foresight to design such a flexible control mechanism for Tor, for their thorough efforts at documenting it and the rest of Tor, and for Tor in general.
-\bibliographystyle{plain} \bibliography{torflow} - -\clearpage -\appendix +\bibliography{torflow}
\end{document} diff --git a/2009/torflow/tortechrep.cls b/2009/torflow/tortechrep.cls new file mode 120000 index 0000000..4c24db2 --- /dev/null +++ b/2009/torflow/tortechrep.cls @@ -0,0 +1 @@ +../../tortechrep.cls \ No newline at end of file