commit 532e0ba5ba8acf98e034e546e3e619e749ecd545 Author: Philipp Winter identity.function@gmail.com Date: Wed Feb 6 01:30:43 2013 +0100
Add new technical report discussing the Tor censorship analysis tool. --- .../censorship-analysis-tool.bib | 59 +++++ .../censorship-analysis-tool.tex | 244 ++++++++++++++++++++ 2013/censorship-analysis-tool/tortechrep.cls | 1 + 3 files changed, 304 insertions(+), 0 deletions(-)
diff --git a/2013/censorship-analysis-tool/censorship-analysis-tool.bib b/2013/censorship-analysis-tool/censorship-analysis-tool.bib new file mode 100644 index 0000000..4a30e60 --- /dev/null +++ b/2013/censorship-analysis-tool/censorship-analysis-tool.bib @@ -0,0 +1,59 @@ +@misc{censorwiki, + title = {{Censorship Wiki}}, + note = {URL: \url{https://censorshipwiki.torproject.org%7D%7D, +} + +@misc{iran, + author = {phobos}, + title = {{Update on Internet censorship in Iran}}, + note = {URL: \url{https://blog.torproject.org/blog/update-internet-censorship-iran%7D%7D, +} + +@inproceedings{Wright2011, + address = {San Francisco, CA, USA}, + author = {Wright, Joss and de Souza, Tulio and Brown, Ian}, + booktitle = {Free and Open Communications on the Internet}, + publisher = {USENIX Association}, + title = {{Fine-Grained Censorship Mapping: Information Sources, Legality and Ethics}}, + year = {2011}, + note = {URL: \url{http://static.usenix.org/event/foci11/tech/final_files/Wright.pdf%7D%7D +} + +@inproceedings{Filasto2012, + address = {Bellevue, WA, USA}, + author = {Filast`{o}, Arturo and Appelbaum, Jacob}, + booktitle = {Free and Open Communications on the Internet}, + publisher = {USENIX Association}, + title = {{OONI: Open Observatory of Network Interference}}, + year = {2012}, + note = {URL: \url{https://www.usenix.org/system/files/conference/foci12/foci12-final12.pdf%7D%... +} + +@misc{tlshistory, + author = {Nick Mathewson}, + title = {{TLSHistory}}, + note = {URL: \url{https://trac.torproject.org/projects/tor/wiki/org/projects/Tor/TLSHistory%7D..., +} + +@misc{daphne, + title = {daphne}, + note = {URL: \url{https://trac.torproject.org/projects/tor/wiki/doc/OONI/Tests/daphne%7D%7D, +} + +@misc{mirrors, + author = {{The Tor Project}}, + title = {{Tor: Mirrors}}, + note = {URL: \url{https://www.torproject.org/getinvolved/mirrors.html.en%7D%7D, +} + +@misc{obfsproxy, + author = {{The Tor Project}}, + title = {obfsproxy}, + note = {URL: \url{https://www.torproject.org/projects/obfsproxy.html.en%7D%7D +} + +@misc{gettor, + author = {{The Tor Project}}, + title = {{GetTor e-mail autoresponder}}, + note = {URL: \url{https://www.torproject.org/projects/gettor.html.en%7D%7D +} diff --git a/2013/censorship-analysis-tool/censorship-analysis-tool.tex b/2013/censorship-analysis-tool/censorship-analysis-tool.tex new file mode 100644 index 0000000..73f479f --- /dev/null +++ b/2013/censorship-analysis-tool/censorship-analysis-tool.tex @@ -0,0 +1,244 @@ +\documentclass{tortechrep} +\usepackage{url} +\usepackage{graphicx} + +\begin{document} + +\title{ + Design Requirements for a \ + Tor Censorship Analysis Tool +} + +\author{Philipp Winter} + +\contact{\href{mailto:phw@torproject.org}{phw@torproject.org}} +\reportid{2013-02-001} +\date{\today} + +\maketitle + +\section{Introduction} +% motivation +The Tor network is documented to be blocked in several countries +\cite{censorwiki}. Analyzing and circumventing these blocks typically requires +detailed \emph{packet traces} or access to \emph{machines inside censoring +countries}. Both, however, are not always easy to acquire: + +\begin{enumerate} + % why network traces are not so good + \item Network traces are problematic for two reasons. First, they are + difficult to obtain since they require the cooperation of users within + censoring countries. Second, they are hard to anonymize and must not + fall into wrong hands. Derived information, such as flow + diagrams\footnote{See, for example, the blog post discussing a Tor + block in Iran \cite{iran}.}, are typically safe to publish but + frequently lack important information. + % why access inside countries is hard + \item The alternative to network traces is to gain access to machines inside the + censoring regime. This approach turns out to be difficult as well; mostly + due to the lack of volunteers who could provide machines or the lack of VPS + providers and open SOCKS proxies. +\end{enumerate} + +% where we are heading +These problems show that there is a strong need for a lightweight tool which can +assist in analyzing censorship events. This tool should be run by censored users +and perform several tests to gain a rough understanding of how and if Tor could +be blocked in the respective network. The results of these tests should make it +back to the Tor project and are used to improve circumvention technology such as +obfsproxy \cite{obfsproxy} and to document censorship \cite{censorwiki}. + +% what this techreport does +This technical report discusses the design requirements for such a censorship +analysis tool. We list the desired features, discuss how they can be implemented +and we give a rough overview of the software design. After all, this +technical report should serve as basis for the development and deployment of the +censorship analysis tool. + +\section{Feature Requirements} +\label{sec:features} +The following list enumerates the features which are desirable in a censorship +analyzer. Naturally, certain features are harder to implement than others, so +the list is organized in ascending order based on the difficulty of the +respective feature. + +\begin{enumerate} + \item \textbf{Capture debugging process}: The tool should be able to create + a pcap file of the network debugging process to allow further + inspection. While very handy, pcaps are sensitive data and would require + Administrator/root permissions. + + \item \textbf{User-friendly output}: While the censorship analyzer is meant + to assist Tor developers in debugging censorship incidents, + user-friendly log messages are easy to add and can give users an idea of + why their Tor fails to connect. Based on the gathered data, the analyzer + could give the user suggestions on what to try next. This might even + slightly reduce the help desk's load. + + \item \textbf{Obfuscate tests}: Censors might be interested in identifying + the Tor censorship analyzer and try to actively falsify the tests. + Therefore, the analyzer should make an effort to stay under the radar. + In particular, the analyzer should implement: + \begin{enumerate} + \item Random sleep periods between (and perhaps during) tests. + \item Randomize the order of executed tests. + \item Use random IP addresses for tests such as the relay + reachability discussed below. + \end{enumerate} + Note that it is not possible to completely hide the analyzer's + existence. Rather, this feature should be understood as hiding + all too obvious network activity. + + \item \textbf{Leave no traces behind}: The analyzer should not leave any + traces on the user's hard disk. Ideally, the analyzer should generate a + single report file which is placed in the same directory as the analyzer + itself. That would make it possible for users to conveniently delete all + traces. Temporary analysis files should be deleted after the report was + generated. + + \item \textbf{DirAuth reachability}: Try to connect to the directory + authorities and download the consensus. If this fails, check if: + \begin{enumerate} + \item The authorities respond to ICMP echo requests to see if the + IP addresses are blocked. + \item Run traceroutes to the directory authorities as well as to + other---hopefully unblocked---hosts in the same subnet as the + directory authorities. This could yield the location of + censoring boxes and serve as proof that the IP addresses are, in + fact, blocked. + \end{enumerate} + + \item \textbf{Web site reachability}: Try to connect to + \url{https://www.torproject.org%7D and fetch the index page. If the web + site fails to load, check if: + \begin{enumerate} + \item One of the official Tor mirrors \cite{mirrors} + works\footnote{In particular, mirrors without the strings ``tor'' + or ``torproject'' in the domain should be given a try.}. + \item The domain \texttt{www.torproject.org} resolves to the correct, + non-poisoned IP addresses. + \item A simple TCP connection to + \texttt{www.torproject.org} succeeds. If so, in the subsequent + step, a TLS session could be established. That way, it is possible + to find out whether DPI boxes are inspecting the SNI in the TLS + client hello. + \item The hosts behind \texttt{www.torproject.org} respond to ICMP + echo requests. + \end{enumerate} + + \item \textbf{Bridge distribution}: Try to connect to + \url{https://bridges.torproject.org%7D and fetch the index page. If the + web site loads, it is safe to assume that obfsproxy bridges can be + fetched as well. If the web site fails to load, check if: + \begin{enumerate} + \item The domain \texttt{bridges.torproject.org} resolves to the + correct, non-poisoned IP address. + \item A simple TCP connection to + \texttt{bridges.torproject.org} succeeds. If so, in the subsequent + step, a TLS session could be established. That way, it is possible + to find out whether DPI boxes are inspecting the SNI in the TLS + client hello. + \item The host behind \texttt{bridges.torproject.org} responds to + ICMP echo requests. + \end{enumerate} + + \item \textbf{Relay reachability}: Try to connect to a number of Tor relays + listed in the consensus. Typically, clients connect to entry guards. + However, it would also be interesting to learn whether connections to + pure middle or exit relays succeed\footnote{This could be an indicator + that a censor is blindly blacklisting all IP addresses found in the + consensus.}. If this fails, check if: + \begin{enumerate} + \item A Tor-specific TLS client hello can be sent to + \texttt{mail.google.com:443} -- assuming that this host is + reachable. If the connection is closed in a non-clean fashion, + this could be an indicator that fields in the TLS client hello + are subject to filtering. + \item (Private) bridges and their censorship-resistant + variants (brdgrd, obfs2, obfs3, flashproxies) are reachable. + % TODO - all bridges we put in the censorship analyzer bundle + % will eventually get blocked. + \end{enumerate} + + \item \textbf{Gather debug information}: Censorship is typically not + homogeneous across a country and varies depending on provinces, + autonomous systems or ISPs \cite{Wright2011}. As a result, we are + interested in information which can help shed light on the respective + censorship infrastructure. Also, this would help ruling out + interferences and prevent jumping to wrong conclusions. Of interest + would be: + \begin{enumerate} + \item What ISP does the user have? + \item What is the autonomous system number? + \item Is the user behind a captive portal? + \item Is all traffic forced to go through an HTTP proxy? + \end{enumerate} + + \item \textbf{Debug the TLS handshake}: Tor is frequently blocked based on + identifying information in its TLS handshake + \cite{tlshistory,censorwiki}. Debugging the exact fingerprint used by + DPI boxes to identify Tor can be of great value. This is, however, a + very hard problem which requires a client server architecture to infer + fingerprints. The tool daphne was started with this goal in mind + \cite{daphne}. +\end{enumerate} + +\section{Software Architecture} +The following list enumerates software-specific aspects of the censorship +analyzer. + +\begin{enumerate} + \item \textbf{Ease of use}: It is crucial that the analyzer is as easy to + use as possible. Ideally, it should be a self-contained click-and-go + executable, just like the Tor Browser Bundle. After all, the target + group consists mostly of ordinary users rather than developers. + + Ease of use also involves the analyzer's \emph{bundle size}. Ideally, + the analyzer would only be a few megabytes in size which would also make + is suitable for distribution via GetTor \cite{gettor}. + + \item \textbf{Configurable during build}: It should be possible to pass + configuration parameters to the analyzer during the build process. That + is necessary because certain information such as IP addresses of relays + to test or of \url{www.torproject.org} change over time or might be + white-listed by censors. As a result, it is not a good idea to hard-code + these things in the source code. + + \item \textbf{Least privilege}: Ideally, the analyzer should not require + Administrator/root access. + + \item \textbf{Existing framework}: There is no need to reinvent the wheel. + The analyzer should be implemented as tests for the open observatory of + network interference (OONI) \cite{Filasto2012}. OONI provides a Python + API which can be used to develop all of the above mentioned features. + + \item \textbf{Data delivery}: Eventually, the Tor project has to learn about + test results. There are two possible ways: + \begin{enumerate} + \item The analyzer could automatically transmit gathered data to + the Tor project. Automated uploads must require the user's + informed consent and the user must be given the choice to + review the report prior to submission. + \item The analyzer can create a report and then ask the user to + send the report to censorship-analyzer@torproject.org (which + does not exist yet). This could still be a fallback plan if an + automated upload fails. + \end{enumerate} + Further, the report should contain a \emph{message digest} which is + built over the report. This is particularly important when the report is + being sent over e-mail since it allows us to detect if the report is + incomplete or the user accidentally changed parts of it. + + \item \textbf{Testability}: All the features discussed in Section + \ref{sec:features} should be testable in an automated way. Otherwise, we + might end up shipping code which does not work in real environments or + we might not notice if improvements break existing code. +\end{enumerate} + +\section*{Acknowledgments} +Arturo Filast`{o}, George Kadianakis, Karsten Loesing and Runa A. Sandvik +provided valuable feedback for this technical report. + +\bibliography{censorship-analysis-tool} + +\end{document} diff --git a/2013/censorship-analysis-tool/tortechrep.cls b/2013/censorship-analysis-tool/tortechrep.cls new file mode 120000 index 0000000..4c24db2 --- /dev/null +++ b/2013/censorship-analysis-tool/tortechrep.cls @@ -0,0 +1 @@ +../../tortechrep.cls \ No newline at end of file
tor-commits@lists.torproject.org