[tor-commits] [tech-reports/master] Make torflow report look like a Tor Tech Report.

Thu Sep 13 13:43:00 UTC 2012

commit 8608c2345de5b5c3d94452d231bd5b69262e52c7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Mon Aug 27 09:08:03 2012 +0200

    Make torflow report look like a Tor Tech Report.
---
 2009/torflow/torflow.bib    |   53 ++----------------
 2009/torflow/torflow.tex    |  125 ++++++++++++++++++++----------------------
 2009/torflow/tortechrep.cls |    1 +
 3 files changed, 66 insertions(+), 113 deletions(-)

diff --git a/2009/torflow/torflow.bib b/2009/torflow/torflow.bib
index c8c667e..711a947 100644
--- a/2009/torflow/torflow.bib
+++ b/2009/torflow/torflow.bib
@@ -10,7 +10,7 @@
 @Misc{path-spec,
    author =      {Roger Dingledine and Nick Mathewson},
    title =       {{Tor Path Specifications}},
-   note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/path-spec.txt}},
+   note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/path-spec.txt}},
 }
 
 @Misc{nickm-iocp,
@@ -34,47 +34,16 @@
   note =         {\url{http://www.blackhat.com/presentations/bh-usa-07/Perry/Presentation/bh-usa-07-perry.pdf}}
 }
 
- at Misc{perry-ssh-ortalk,
-  key =          {perry-ssh-ortalk},
-  title =        {{SSH Key Spoofing}},
-  author =       {Mike Perry},
-  note =         {\url{http://archives.seul.org/or/talk/Jan-2007/msg00030.html}}
-}
-
 @Misc{control-spec,
    author =      {Roger Dingledine and Nick Mathewson},
    title =       {Tor Control Protocol Specifications},
-   note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/control-spec.txt}},
+   note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/control-spec.txt}},
 }
 
 @Misc{dir-spec,
    author =      {Roger Dingledine and Nick Mathewson},
-   title =       {Tor Control Protocol Specifications},
-   note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/dir-spec.txt}},
-}
-
- at Misc{Elixir,
-  key =          {Elixir},
-  title =        {{Elixir}},
-  note =         {\url{http://elixir.ematia.de/trac/wiki}}
-}
-
- at Misc{SQLAlchemy,
-  key =          {SQLALchemy},
-  title =        {{SQLAlchemy Database Toolkit for Python}},
-  note =         {\url{http://www.sqlalchemy.org/}}
-}
-
- at Misc{BeautifulSoup,
-  key =          {BeautifulSoup},
-  title =        {{Beautiful Soup: Elixir and Tonic}},
-  note =         {\url{http://www.crummy.com/software/BeautifulSoup/}}
-}
-
- at Misc{Javascript.g,
-  key =          {Javascript.g},
-  title =        {{Antlr Javascript Grammar}},
-  note =         {\url{http://www.antlr.org/grammar/1206736738015/JavaScript.g}}
+   title =       {Tor Directory Protocol Specifications},
+   note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt}},
 }
 
 @mastersthesis{renner-thesis,
@@ -88,19 +57,7 @@
 @Misc{tor-spec,
    author =      {Roger Dingledine and Nick Mathewson},
    title =       {{Tor Protocol Specifications}},
-   note = {\url{https://git.torproject.org/checkout/tor/master/doc/spec/tor-spec.txt}},
-}
-
- at Misc{bug440,
-   author = {Mike Perry},
-   title = {{Guard Nodes Not Weighted By Bandwidth}},
-   note = {\url{http://bugs.torproject.org/flyspray/index.php?do=details\&id=440}}
-}
-
- at Misc{perry-balancing,
-   author = {Mike Perry},
-   title = {{Exit Balancing Patch}},
-   note = {\url{http://archives.seul.org/or/dev/Jul-2007/msg00021.html}}
+   note = {\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/tor-spec.txt}},
 }
 
 @Misc{ads-malware,
diff --git a/2009/torflow/torflow.tex b/2009/torflow/torflow.tex
index 1abaebd..75924bb 100644
--- a/2009/torflow/torflow.tex
+++ b/2009/torflow/torflow.tex
@@ -1,38 +1,21 @@
-% XXX: Change to llncs 11pt aka 
-%\documentclass{llncs}
-\documentclass[letterpaper,11pt]{llncs}
-%\documentclass{article} % llncs
+\documentclass{tortechrep}
 
-\usepackage{usenix}
 \usepackage{url}
-\usepackage{graphics}
+\usepackage{graphicx}
 \usepackage{amsmath}
 \usepackage{listings}  
-
-\setlength{\textwidth}{5.9in}
-\setlength{\textheight}{8.4in}
-\setlength{\topmargin}{.5cm}
-\setlength{\oddsidemargin}{1cm}
-\setlength{\evensidemargin}{1cm}
-
-\newenvironment{tightlist}{\begin{list}{$\bullet$}{
-  \setlength{\itemsep}{0mm}
-    \setlength{\parsep}{0mm}
-    %  \setlength{\labelsep}{0mm}
-    %  \setlength{\labelwidth}{0mm}
-    %  \setlength{\topsep}{0mm}
-    }}{\end{list}}
+\usepackage{courier}  
 
 \begin{document}
 
 \title{TorFlow: Tor Network Analysis}
-
-\author{Mike Perry \\ The Internet \\ mikeperry at fscked.org}
-
-%\institute{The Internet}
-
+\author{Mike Perry}
+\contact{mikeperry at fscked.org}
+\reportid{2009-08-003\footnote{This report was presented at 2nd Hot Topics
+in Privacy Enhancing Technologies (HotPETs 2009), Seattle, WA, USA, August
+2009.}}
+\date{August 7, 2009}
 \maketitle
-\pagestyle{plain}
 
 \begin{abstract}
   The Tor Network is a low-latency anonymity, privacy, and censorship
@@ -74,7 +57,8 @@ misconfiguration, or much less often, due to malice. This most frequently
 comes in the form of truncating TCP streams or failing DNS, but occasionally
 presents itself as SSL spoofing or interception by the upstream ISP. On rare
 occasion, SSH hijacking and web content injection have also been
-observed.~\cite{perry-ssh-ortalk}.
+observed.%
+\footnote{\url{https://lists.torproject.org/pipermail/tor-talk/2007-January/007352.html}}.
 % XXX: There's another ref for this involving web injection
 
 \section{Overview}
@@ -106,18 +90,18 @@ that provides well-formed information on Tor client status and events and
 optionally enables control over circuit construction and association of SOCKS
 streams to individual circuits.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
-\includegraphics{ControlPort2.pdf}
+\includegraphics[width=.8\textwidth]{ControlPort2.pdf}
 \caption{Example Tor Control Port connection with representative Tor Traffic.}
 \label{fig:ControlPort2.pdf}
 \end{figure}
 
 \subsection{TorCtl Organization}
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
-\includegraphics{PathSupport.pdf}
+\includegraphics[width=.9\textwidth]{PathSupport.pdf}
 \caption{TorCtl Core Class Diagram}
 \label{fig:PathSupport.pdf}
 \end{figure}
@@ -172,8 +156,11 @@ circuit creation time and failure reason, and stream capacity and failure
 reason.
 
 The second is a SQL-based system that stores circuit and/or stream events in
-SQL tables. The SQL system uses Elixir~\cite{Elixir} and
-SQLAlchemy~\cite{SQLAlchemy}, so the backend database can be any that is
+SQL tables. The SQL system uses Elixir%
+\footnote{\url{http://elixir.ematia.de/trac/wiki}} and
+SQLAlchemy%
+\footnote{\url{http://www.sqlalchemy.org/}},
+so the backend database can be any that is
 supported by SQLAlchemy (which includes just about every modern database
 backend).
 
@@ -219,9 +206,9 @@ Loesing as being a result of our rate limiting algorithm emptying its token
 buckets in sync across the network at the top of each second as opposed to
 continuously. When this is addressed, the Pareto fit should improve.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
-\includegraphics{0-93-100000-buildtimes-res100.pdf}
+\includegraphics[width=.9\textwidth]{0-93-100000-buildtimes-res100.pdf}
 \caption{Network-wide bandwidth-weighted circuit build time distribution (ms).}
 \label{fig:buildtimes}
 \end{figure}
@@ -232,7 +219,7 @@ recalibrated Tor's circuit timeout in the client.
 
 \subsection{Guard Node Rebalancing}
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{ExtendsBar.pdf}
 \includegraphics{ExtendsBar2.pdf}
@@ -245,7 +232,7 @@ responsiveness and reliability of 5\% slices of the network (lower percentiles
 indicate higher advertised bandwidth). Repeated measurement showed that nodes
 became progressively less responsive and more failure prone as they got
 slower, up until the 50\% mark, at which point the pattern suddenly stopped.
-This pattern can be seen in the left side of Figures \ref{fig:Extends} and
+This pattern can be seen in the left side of Figures~\ref{fig:Extends} and
 \ref{fig:Failure}.
 
 This 50\% mark was the same point where nodes ceased to be considered for
@@ -255,13 +242,16 @@ We eventually discovered that client guard node selection, instead of being
 weighted based on bandwidth, was actually uniform. We developed a new algorithm
 to fix this, as well as to properly account for weighting both guards and
 exits according to their scarcity when being selected for other positions in
-the network~\cite{bug440,perry-balancing}.
+the network.%
+\footnote{\url{https://trac.torproject.org/projects/tor/ticket/440}}
+\textsuperscript{,}%
+\footnote{\url{https://lists.torproject.org/pipermail/tor-dev/2007-July/001255.html}}
 
 Without an autoupdater, it took over a year for enough clients to upgrade for
 the results to be visible in our scans, but it appears that at least among
 guards, the load is now considerably more uniform.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{CircFailure.pdf}
 \includegraphics{CircFailure2.pdf}
@@ -270,7 +260,7 @@ guards, the load is now considerably more uniform.
 \end{figure}
 
 However, it is obvious that irregularities still remain. Interestingly, the
-points of very low failure rates in Figure \ref{fig:Failure} correspond to the
+points of very low failure rates in Figure~\ref{fig:Failure} correspond to the
 periods between 01:00 and 03:00 PST, when most of the US is asleep, and
 consistently appeared at that time in numerous scans. This seems to suggest we
 should avoid capacity scans during those hours. It also suggests that circuit
@@ -280,7 +270,7 @@ crypto operations fast enough, it begins dropping circuit creation cells. This
 could explain the sharp difference in high load vs low load conditions for
 circuit failure, but not for stream capacity.
 
-Furthermore, it appears in the right side of Figure \ref{fig:Failure} as
+Furthermore, it appears in the right side of Figure~\ref{fig:Failure} as
 though the slower 50\% of the network is now exhibiting significantly higher
 failure percentages than the first 50\%. In order to explore this, we ran a
 number of additional circuit failure scans utilizing TorFlow's Node
@@ -302,7 +292,7 @@ After more investigation and many scans, two consistent failure classes
 emerged: Windows nodes, and non-bandwidth limited nodes, each of which seemed
 to perform a bit worse as Guard and Exit nodes than as the middle nodes.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{CircFailure-Win2.pdf}
 \includegraphics{CircFailure-WinMid.pdf}
@@ -312,7 +302,7 @@ to perform a bit worse as Guard and Exit nodes than as the middle nodes.
 
 The Windows node result is not entirely surprising, as it is known that these
 nodes will have difficulty servicing large numbers of sockets using normal
-WinSock~\cite{nickm-iocp}. As can be seen in Figure \ref{fig:WinFail}, these
+WinSock~\cite{nickm-iocp}. As can be seen in Figure~\ref{fig:WinFail}, these
 nodes exhibit significantly higher circuit failure rates than non-Windows
 nodes, and also predictably fare worse in either the Guard or Exit position,
 where they have to maintain significantly more TCP sockets for clients and
@@ -322,7 +312,7 @@ There are some aberrations. In particular, the high-end Windows nodes seem to
 be on par with their peers. This is likely due to the higher socket limits of
 server editions of Windows as compared to desktop.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{CircFailure-BwLimit2.pdf}
 \includegraphics{Extends-BwLimit2.pdf}
@@ -331,7 +321,7 @@ nodes}
 \label{fig:BwLimited}
 \end{figure}
 
-Interestingly, as can be seen in the left side of Figure \ref{fig:BwLimited},
+Interestingly, as can be seen in the left side of Figure~\ref{fig:BwLimited},
 nodes that have configured a specific bandwidth rate limit are considerably
 more reliable than those that set no limit and just fill their upstream to the
 max. One potential reason for this could be that due to Tor's multiplexing of
@@ -340,7 +330,7 @@ ability of circuit creation cells to get through in time. Other possibilities
 include asymmetric bandwidth limits and OS and CPU limits being easier to hit,
 causing failure as opposed to smooth throttling.
 
-Also of interest from the right side of Figure \ref{fig:BwLimited} is the fact
+Also of interest from the right side of Figure~\ref{fig:BwLimited} is the fact
 that despite only emptying their queues once per second, the circuit extend
 latencies of bandwidth-limited nodes are still typically less than their
 non-limited neighbors. This indicates that most of these rate limited nodes
@@ -357,7 +347,7 @@ ability to make a TCP connection for non-limited nodes, and is possibly tied
 to the ability to transfer a create cell through the network and also
 implicate TCP flow control issues.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{CircFailure-LimitWin.pdf}
 \includegraphics{CircFailure-WinLimit.pdf}
@@ -367,15 +357,15 @@ implicate TCP flow control issues.
 
 It is also the case that many of the Windows nodes also do not set
 bandwidth limits for themselves. This led us to perform four scans to compare
-the effect of Windows, the results of which are shown in Figure \ref{fig:LimitFail}.
+the effect of Windows, the results of which are shown in Figure~\ref{fig:LimitFail}.
 
-On the left side of Figure \ref{fig:LimitFail}, it can be seen that while
+On the left side of Figure~\ref{fig:LimitFail}, it can be seen that while
 non-Win32 non-limited nodes do exhibit higher failure rates than the limited
-nodes in Figure \ref{fig:BwLimited}, the bulk of the failure is due to the
+nodes in Figure~\ref{fig:BwLimited}, the bulk of the failure is due to the
 Windows non-limited nodes. Furthermore, on the right of Figure
 \ref{fig:LimitFail}, it can be seen that Limited Windows nodes perform
 significantly better than non-limited, though again not quite on par with
-limited nodes from Figure \ref{fig:BwLimited}. This could be due to the
+limited nodes from Figure~\ref{fig:BwLimited}. This could be due to the
 limited nodes' operators ensuring that they set their bandwidth rate below the
 point at which Tor begins to experience performance problems or otherwise slow
 down their system.
@@ -385,7 +375,7 @@ bandwidth limits below their connection's capacity, and that we need to ensure
 that the Vidalia UI is clear enough for Windows users to be able to set 
 limits properly, and understand the importance of doing so.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{BadNodes.pdf}
 \includegraphics{BadNodesWin.pdf}
@@ -398,7 +388,7 @@ proportion of non-limited and Windows nodes in each percentile slice in Figure
 \ref{fig:BadNodes}. On the left is the combination of non-limited and Windows
 nodes, and on the right is Windows nodes that are non-limited. It can be seen
 that the amount of Windows non-limited nodes roughly correspond to the level
-of failure rates from the right side of Figure \ref{fig:Failure}. Of course,
+of failure rates from the right side of Figure~\ref{fig:Failure}. Of course,
 correlation does not imply causation, but it does give us a starting point to
 work with.
 
@@ -413,7 +403,7 @@ It would be much better if we could use a balancing metric or metrics, and use
 them directly to alter client load allocation to correct for arbitrary
 unbalancing.
 
-\begin{figure}[htp]
+\begin{figure}[ht]
 \centering
 \includegraphics{StreamBwBar2.pdf}
 \caption{Stream bandwidth capacity as measured by recent feedback scan}
@@ -421,7 +411,7 @@ unbalancing.
 \end{figure}
 
 In a well-balanced network, all streams should receive the same bandwidth. 
-It can clearly be seen from Figure \ref{fig:StreamBw} that this is not the
+It can clearly be seen from Figure~\ref{fig:StreamBw} that this is not the
 case, and that some segments of the network are providing clients with 
 much better capacities than others.
 
@@ -500,8 +490,9 @@ purposes of creating botnets or mining account credentials.
 
 \subsection{General Methodology}
 
-\lstset{language=Python}
-\begin{lstlisting}[frame=single] 
+\begin{figure}
+\lstset{basicstyle=\footnotesize\ttfamily,language=Python}
+\begin{lstlisting}
   TorResult = PerformFetch(Tor, URL, TorAuthSet)
   NonTorResult1 = PerformFetch(NonTorIP1, URL, NonTorAuthSet)
   if IsPrefix(TorResult, NonTorResult1):
@@ -524,8 +515,12 @@ purposes of creating botnets or mining account credentials.
     return OK
   return FAIL_MODIFICATION
 \end{lstlisting}
+\caption{Pseudocode that all scans follow}
+\label{fig:pseudocode}
+\end{figure}
 
-In general, all scans follow the pattern in the above pseudocode:
+In general, all scans follow the pattern in the pseudocode in
+Figure~\ref{fig:pseudocode}:
 First they perform an operation without Tor. They then perform that same
 operation through Tor. If the relevant content matches, it is a success.
 Otherwise, they perform the operation again from a new Non-Tor IP but using
@@ -543,7 +538,8 @@ subclassifications of these.
 
 \subsection{HTML and JavaScript Scanning}
 
-In the case of HTML scanning, we use Beautiful Soup~\cite{BeautifulSoup} to
+In the case of HTML scanning, we use Beautiful Soup%
+\footnote{\url{http://www.crummy.com/software/BeautifulSoup/}} to
 strip content down to only tags that can contain plugins, script, or CSS in
 order to eliminate localization and content changes from comparison and to
 obtain a set of page script, iframe, object, and link tags for recursion. 
@@ -561,7 +557,9 @@ Inspector script post-scan.
 
 If the HTML Difference Pruner finds no new Tor differences after pruning, we
 rerun the unpruned fetches through a JavaScript Difference Pruner that uses a
-Javascript parser from the Antlr Project~\cite{Javascript.g} to prune
+Javascript parser from the Antlr Project%
+\footnote{\url{http://www.antlr.org/grammar/1206736738015/JavaScript.g}}
+to prune
 differences from an AST. This is done to ensure we haven't pruned a tag or
 attribute that varies because of minor Javascript differences (such as unique
 identifiers embedded in script). If no Tor differences remain, the node has
@@ -687,7 +685,7 @@ circuit scans first. However, certain characteristics may affect circuit
 failure and not stream capacity and vice-versa, so all of the pertinent
 results should ideally be repeated with stream bandwidth scans.
 
-\section{Acknowledgments}
+\section*{Acknowledgments}
 
 We'd like to thank all of our Google Summer of Code students who have
 contributed various features to TorFlow: Johannes Renner for his GeoIP-based
@@ -705,9 +703,6 @@ Lastly, we'd like to thank Roger and Nick for having the foresight to design
 such a flexible control mechanism for Tor, for their thorough efforts at
 documenting it and the rest of Tor, and for Tor in general.
 
-\bibliographystyle{plain} \bibliography{torflow}
-
-\clearpage
-\appendix
+\bibliography{torflow}
 
 \end{document}
diff --git a/2009/torflow/tortechrep.cls b/2009/torflow/tortechrep.cls
new file mode 120000
index 0000000..4c24db2
--- /dev/null
+++ b/2009/torflow/tortechrep.cls
@@ -0,0 +1 @@
+../../tortechrep.cls
\ No newline at end of file