August 2012 - tor-commits - lists.torproject.org

[flashproxy/master] Move facilitator files into a subdirectory.
by dcf＠torproject.org 31 Aug '12

31 Aug '12

commit 59ced2a86af9414115a594b499f7307bc7821b69 Author: David Fifield <david(a)bamsoftware.com> Date: Thu Aug 30 12:31:50 2012 -0700 Move facilitator files into a subdirectory. --- Makefile | 3 +- fac.py | 182 ---------------------- facilitator | 330 ---------------------------------------- facilitator-test | 124 --------------- facilitator.cgi | 115 -------------- facilitator/Makefile | 17 ++ facilitator/fac.py | 182 ++++++++++++++++++++++ facilitator/facilitator | 330 ++++++++++++++++++++++++++++++++++++++++ facilitator/facilitator-test | 124 +++++++++++++++ facilitator/facilitator.cgi | 115 ++++++++++++++ facilitator/init.d/facilitator | 67 ++++++++ init.d/facilitator | 67 -------- 12 files changed, 836 insertions(+), 820 deletions(-) diff --git a/Makefile b/Makefile index 3a15e54..78de922 100644 --- a/Makefile +++ b/Makefile @@ -10,14 +10,13 @@ all: install: mkdir -p $(BINDIR) - cp -f flashproxy-client flashproxy-reg-http facilitator $(BINDIR) + cp -f flashproxy-client flashproxy-reg-http $(BINDIR) clean: rm -f *.pyc rm -rf dist test: - ./facilitator-test ./flashproxy-client-test ./flashproxy-test.js diff --git a/fac.py b/fac.py deleted file mode 100644 index b9ad435..0000000 --- a/fac.py +++ /dev/null @@ -1,182 +0,0 @@ -import re -import socket - -def parse_addr_spec(spec, defhost = None, defport = None, resolve = False): - """Parse a host:port specification and return a 2-tuple ("host", port) as - understood by the Python socket functions. - >>> parse_addr_spec("192.168.0.1:9999") - ('192.168.0.1', 9999) - - If defhost or defport are given, those parts of the specification may be - omitted; if so, they will be filled in with defaults. - >>> parse_addr_spec("192.168.0.2:8888", defhost="192.168.0.1", defport=9999) - ('192.168.0.2', 8888) - >>> parse_addr_spec(":8888", defhost="192.168.0.1", defport=9999) - ('192.168.0.1', 9999) - >>> parse_addr_spec("192.168.0.2:", defhost="192.168.0.1", defport=9999) - ('192.168.0.2', 9999) - >>> parse_addr_spec(":", defhost="192.168.0.1", defport=9999) - ('192.168.0.1', 9999) - - If resolve is true, then the host in the specification or the defhost may be - a domain name, which will be resolved. If resolve is false, then the host - must be a numeric IPv4 or IPv6 address. - - IPv6 addresses must be enclosed in square brackets.""" - host = None - port = None - m = None - # IPv6 syntax. - if not m: - m = re.match(ur'^\[(.+)\]:(\d+)$', spec) - if m: - host, port = m.groups() - af = socket.AF_INET6 - if not m: - m = re.match(ur'^\[(.+)\]:?$', spec) - if m: - host, = m.groups() - af = socket.AF_INET6 - # IPv4 syntax. - if not m: - m = re.match(ur'^(.+):(\d+)$', spec) - if m: - host, port = m.groups() - af = socket.AF_INET - if not m: - m = re.match(ur'^:?(\d+)$', spec) - if m: - port, = m.groups() - af = 0 - if not m: - host = spec - af = 0 - host = host or defhost - port = port or defport - if host is None or port is None: - raise ValueError("Bad address specification \"%s\"" % spec) - - # Now we have split around the colon and have a guess at the address family. - # Forward-resolve the name into an addrinfo struct. Real DNS resolution is - # done only if resolve is true; otherwise the address must be numeric. - if resolve: - flags = 0 - else: - flags = socket.AI_NUMERICHOST - try: - addrs = socket.getaddrinfo(host, port, af, socket.SOCK_STREAM, socket.IPPROTO_TCP, flags) - except socket.gaierror, e: - raise ValueError("Bad host or port: \"%s\" \"%s\": %s" % (host, port, str(e))) - if not addrs: - raise ValueError("Bad host or port: \"%s\" \"%s\"" % (host, port)) - - # Convert the result of socket.getaddrinfo (which is a 2-tuple for IPv4 and - # a 4-tuple for IPv6) into a (host, port) 2-tuple. - host, port = socket.getnameinfo(addrs[0][4], socket.NI_NUMERICHOST | socket.NI_NUMERICSERV) - port = int(port) - return host, port - -def format_addr(addr): - host, port = addr - host_str = u"" - port_str = u"" - if host is not None: - # Numeric IPv6 address? - try: - addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP, socket.AI_NUMERICHOST) - af = addrs[0][0] - except socket.gaierror, e: - af = 0 - if af == socket.AF_INET6: - host_str = u"[%s]" % host - else: - host_str = u"%s" % host - if port is not None: - if not (0 < port <= 65535): - raise ValueError("port must be between 1 and 65535 (is %d)" % port) - port_str = u":%d" % port - - if not host_str and not port_str: - raise ValueError("host and port may not both be None") - return u"%s%s" % (host_str, port_str) - -def skip_space(pos, line): - """Skip a (possibly empty) sequence of space characters (the ASCII character - '\x20' exactly). Returns a pair (pos, num_skipped).""" - begin = pos - while pos < len(line) and line[pos] == "\x20": - pos += 1 - return pos, pos - begin - -TOKEN_CHARS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") -def get_token(pos, line): - begin = pos - while pos < len(line) and line[pos] in TOKEN_CHARS: - pos += 1 - if begin == pos: - raise ValueError("No token found at position %d" % pos) - return pos, line[begin:pos] - -def get_quoted_string(pos, line): - chars = [] - if not (pos < len(line) and line[pos] == '"'): - raise ValueError("Expected '\"' at beginning of quoted string.") - pos += 1 - while pos < len(line) and line[pos] != '"': - if line[pos] == '\\': - pos += 1 - if not (pos < len(line)): - raise ValueError("End of line after backslash in quoted string") - chars.append(line[pos]) - pos += 1 - if not (pos < len(line) and line[pos] == '"'): - raise ValueError("Expected '\"' at end of quoted string.") - pos += 1 - return pos, "".join(chars) - -def parse_transaction(line): - """A transaction is a command followed by zero or more key-value pairs. Like so: - COMMAND KEY="VALUE" KEY="\"ESCAPED\" VALUE" - Values must be quoted. Any byte value may be escaped with a backslash. - Returns a pair: (COMMAND, ((KEY1, VALUE1), (KEY2, VALUE2), ...)). - """ - pos = 0 - pos, skipped = skip_space(pos, line) - pos, command = get_token(pos, line) - - pairs = [] - while True: - pos, skipped = skip_space(pos, line) - if not (pos < len(line)): - break - if skipped == 0: - raise ValueError("Expected space before key-value pair") - pos, key = get_token(pos, line) - if not (pos < len(line) and line[pos] == '='): - raise ValueError("No '=' found after key") - pos += 1 - pos, value = get_quoted_string(pos, line) - pairs.append((key, value)) - return command, tuple(pairs) - -def param_first(key, params): - for k, v in params: - if key == k: - return v - return None - -def quote_string(s): - chars = [] - for c in s: - if c == "\\": - c = "\\\\" - elif c == "\"": - c = "\\\"" - chars.append(c) - return "\"" + "".join(chars) + "\"" - -def render_transaction(command, *params): - parts = [command] - for key, value in params: - parts.append("%s=%s" % (key, quote_string(value))) - return " ".join(parts) diff --git a/facilitator b/facilitator deleted file mode 100755 index 3695b52..0000000 --- a/facilitator +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env python - -import SocketServer -import errno -import getopt -import os -import socket -import sys -import threading -import time - -import fac - -LISTEN_ADDRESS = "127.0.0.1" -DEFAULT_LISTEN_PORT = 9002 -DEFAULT_RELAY_PORT = 9001 -DEFAULT_LOG_FILENAME = "facilitator.log" - -# Don't indulge clients for more than this many seconds. -CLIENT_TIMEOUT = 1.0 -# Buffer no many than this many bytes when trying to read a line. -READLINE_MAX_LENGTH = 10240 - -LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S" - -class options(object): - listen_port = DEFAULT_LISTEN_PORT - log_filename = DEFAULT_LOG_FILENAME - log_file = sys.stdout - relay_spec = None - daemonize = True - pid_filename = None - safe_logging = True - - @staticmethod - def set_relay_spec(spec): - spec = fac.parse_addr_spec(spec, defport = DEFAULT_RELAY_PORT, resolve = True) - options.relay_spec = fac.format_addr(spec) - -def usage(f = sys.stdout): - print >> f, """\ -Usage: %(progname)s -r RELAY <OPTIONS> -Flash proxy facilitator: Register client addresses and serve them out -again. Listen on 127.0.0.1 and port PORT (by default %(port)d). - -d, --debug don't daemonize, log to stdout. - -h, --help show this help. - -l, --log FILENAME write log to FILENAME (default \"%(log)s\"). - -p, --port PORT listen on PORT (by default %(port)d). - --pidfile FILENAME write PID to FILENAME after daemonizing. - -r, --relay RELAY send RELAY (host:port) to proxies as the relay to use. - --unsafe-logging don't scrub IP addresses from logs.\ -""" % { - "progname": sys.argv[0], - "port": DEFAULT_LISTEN_PORT, - "log": DEFAULT_LOG_FILENAME, -} - -def safe_str(s): - """Return s if options.safe_logging is true, and "[scrubbed]" otherwise.""" - if options.safe_logging: - return "[scrubbed]" - else: - return s - -log_lock = threading.Lock() -def log(msg): - log_lock.acquire() - try: - print >> options.log_file, (u"%s %s" % (time.strftime(LOG_DATE_FORMAT), msg)).encode("UTF-8") - options.log_file.flush() - finally: - log_lock.release() - -class TCPReg(object): - def __init__(self, host, port): - self.host = host - self.port = port - - def __unicode__(self): - return fac.format_addr((self.host, self.port)) - - def __str__(self): - return unicode(self).encode("UTF-8") - - def __cmp__(self, other): - if isinstance(other, TCPReg): - return cmp((self.host, self.port), (other.host, other.port)) - else: - return False - -class Reg(object): - @staticmethod - def parse(spec, defhost = None, defport = None): - host, port = fac.parse_addr_spec(spec, defhost, defport) - return TCPReg(host, port) - -class RegSet(object): - def __init__(self): - self.set = [] - self.cv = threading.Condition() - - def add(self, reg): - self.cv.acquire() - try: - if reg not in list(self.set): - self.set.append(reg) - self.cv.notify() - return True - else: - return False - finally: - self.cv.release() - - def fetch(self): - self.cv.acquire() - try: - if not self.set: - return None - return self.set.pop(0) - finally: - self.cv.release() - - def __len__(self): - self.cv.acquire() - try: - return len(self.set) - finally: - self.cv.release() - -# A decorator to ignore "broken pipe" errors. -def catch_epipe(fn): - def ret(self, *args): - try: - return fn(self, *args) - except socket.error, e: - try: - err_num = e.errno - except AttributeError: - # Before Python 2.6, exception can be a pair. - err_num, errstr = e - except: - raise - if err_num != errno.EPIPE: - raise - return ret - -class Handler(SocketServer.StreamRequestHandler): - def __init__(self, *args, **kwargs): - self.deadline = time.time() + CLIENT_TIMEOUT - # Buffer for readline. - self.buffer = "" - SocketServer.StreamRequestHandler.__init__(self, *args, **kwargs) - - def recv(self): - timeout = self.deadline - time.time() - self.connection.settimeout(timeout) - return self.connection.recv(1024) - - def readline(self): - # A line already buffered? - i = self.buffer.find("\n") - if i >= 0: - line = self.buffer[:i+1] - self.buffer = self.buffer[i+1:] - return line - - auxbuf = [] - buflen = len(self.buffer) - while True: - data = self.recv() - if not data: - if self.buffer or auxbuf: - raise socket.error("readline: stream does not end with a newline") - else: - return "" - i = data.find("\n") - if i >= 0: - line = self.buffer + "".join(auxbuf) + data[:i+1] - self.buffer = data[i+1:] - return line - else: - auxbuf.append(data) - buflen += len(data) - if buflen >= READLINE_MAX_LENGTH: - raise socket.error("readline: refusing to buffer %d bytes (last read was %d bytes)" % (buflen, len(data))) - - @catch_epipe - def handle(self): - num_lines = 0 - while True: - try: - line = self.readline() - if not line: - break - num_lines += 1 - except socket.error, e: - log("socket error after reading %d lines: %s" % (num_lines, str(e))) - break - if not self.handle_line(line): - break - - def handle_line(self, line): - if not (len(line) > 0 and line[-1] == '\n'): - raise ValueError("No newline at end of string returned by readline") - try: - command, params = fac.parse_transaction(line[:-1]) - except ValueError, e: - log("fac.parse_transaction: %s" % e) - self.send_error() - return False - - if command == "GET": - return self.do_GET(params) - if command == "PUT": - return self.do_PUT(params) - else: - self.send_error() - return False - - def send_ok(self): - print >> self.wfile, "OK" - - def send_error(self): - print >> self.wfile, "ERROR" - - def do_GET(self, params): - reg = REGS.fetch() - if reg: - log(u"proxy gets %s, relay %s (now %d)" % - (safe_str(unicode(reg)), options.relay_spec, len(REGS))) - print >> self.wfile, fac.render_transaction("OK", ("CLIENT", str(reg)), ("RELAY", options.relay_spec)) - else: - log(u"proxy gets none") - print >> self.wfile, fac.render_transaction("NONE") - return True - - def do_PUT(self, params): - client_spec = fac.param_first("CLIENT", params) - if client_spec is None: - log(u"PUT missing CLIENT param") - self.send_error() - return False - - # FROM - - try: - reg = Reg.parse(client_spec, self.client_address[0]) - except ValueError, e: - log(u"syntax error in %s: %s" % (safe_str(repr(client_spec)), repr(str(e)))) - self.send_error() - return False - - if REGS.add(reg): - log(u"client %s (now %d)" % (safe_str(unicode(reg)), len(REGS))) - else: - log(u"client %s (already present, now %d)" % (safe_str(unicode(reg)), len(REGS))) - - self.send_ok() - return True - - finish = catch_epipe(SocketServer.StreamRequestHandler.finish) - -class Server(SocketServer.ThreadingMixIn, SocketServer.TCPServer): - allow_reuse_address = True - -REGS = RegSet() - -def main(): - opts, args = getopt.gnu_getopt(sys.argv[1:], "dhl:p:r:", - ["debug", "help", "log=", "port=", "pidfile=", "relay=", "unsafe-logging"]) - for o, a in opts: - if o == "-d" or o == "--debug": - options.daemonize = False - options.log_filename = None - elif o == "-h" or o == "--help": - usage() - sys.exit() - elif o == "-l" or o == "--log": - options.log_filename = a - elif o == "-p" or o == "--port": - options.listen_port = int(a) - elif o == "--pidfile": - options.pid_filename = a - elif o == "-r" or o == "--relay": - try: - options.set_relay_spec(a) - except socket.gaierror, e: - print >> sys.stderr, u"Can't resolve relay %s: %s" % (repr(a), str(e)) - sys.exit(1) - elif o == "--unsafe-logging": - options.safe_logging = False - - if not options.relay_spec: - print >> sys.stderr, """\ -The -r option is required. Give it the relay that will be sent to proxies. - -r HOST[:PORT]\ - """ - sys.exit(1) - - if options.log_filename: - options.log_file = open(options.log_filename, "a") - # Send error tracebacks to the log. - sys.stderr = options.log_file - else: - options.log_file = sys.stdout - - addrinfo = socket.getaddrinfo(LISTEN_ADDRESS, options.listen_port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0] - - server = Server(addrinfo[4], Handler) - - log(u"start on %s" % fac.format_addr(addrinfo[4])) - log(u"using relay address %s" % options.relay_spec) - - if options.daemonize: - log(u"daemonizing") - pid = os.fork() - if pid != 0: - if options.pid_filename: - f = open(options.pid_filename, "w") - print >> f, pid - f.close() - sys.exit(0) - - try: - server.serve_forever() - except KeyboardInterrupt: - sys.exit(0) - -if __name__ == "__main__": - main() diff --git a/facilitator-test b/facilitator-test deleted file mode 100755 index b06f5d7..0000000 --- a/facilitator-test +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python - -import socket -import subprocess -import time -import unittest - -import fac - -FACILITATOR_HOST = "127.0.0.1" -FACILITATOR_PORT = 9002 - -def gimme_socket(host, port): - addrinfo = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0] - s = socket.socket(addrinfo[0], addrinfo[1], addrinfo[2]) - s.settimeout(10.0) - s.connect(addrinfo[4]) - return s - -class FacilitatorTest(unittest.TestCase): - def gimme_socket(self): - return gimme_socket(FACILITATOR_HOST, FACILITATOR_PORT) - - def setUp(self): - self.process = subprocess.Popen(["./facilitator", "-d", "-p", str(FACILITATOR_PORT), "-r", "0.0.1.0:1", "-l", "/dev/null"]) - time.sleep(0.1) - - def tearDown(self): - self.process.terminate() - - def test_timeout(self): - """Test that the socket will not accept slow writes indefinitely. - Successive sends should not reset the timeout counter.""" - s = self.gimme_socket() - time.sleep(0.3) - s.send("w") - time.sleep(0.3) - s.send("w") - time.sleep(0.3) - s.send("w") - time.sleep(0.3) - s.send("w") - time.sleep(0.3) - self.assertRaises(socket.error, s.send, "w") - - def test_readline_limit(self): - """Test that reads won't buffer indefinitely.""" - s = self.gimme_socket() - buflen = 0 - try: - while buflen + 1024 < 200000: - s.send("X" * 1024) - buflen += 1024 - self.fail("should have raised a socket error") - except socket.error: - pass - -# def test_same_proxy(self): -# """Test that the same proxy doesn't get the same client when asking -# twice.""" -# self.fail() -# -# def test_num_clients(self): -# """Test that the same proxy can pick up up to five different clients but -# no more. Test that a proxy ceasing to handle a client allows the proxy -# to handle another, different client.""" -# self.fail() -# -# def test_num_proxies(self): -# """Test that a single client is handed out to five different proxies but -# no more. Test that a proxy ceasing to handle a client reduces its count -# so another proxy can handle it.""" -# self.fail() -# -# def test_proxy_timeout(self): -# """Test that a proxy ceasing to connect for some time period causes that -# proxy's clients to be unhandled by that proxy.""" -# self.fail() -# -# def test_localhost_only(self): -# """Test that the facilitator doesn't listen on any external -# addresses.""" -# self.fail() -# -# def test_hostname(self): -# """Test that the facilitator rejects hostnames.""" -# self.fail() - -class ParseTransactionTest(unittest.TestCase): - def test_empty_string(self): - self.assertRaises(ValueError, fac.parse_transaction, "") - - def test_correct(self): - self.assertEqual(fac.parse_transaction("COMMAND"), ("COMMAND", ())) - self.assertEqual(fac.parse_transaction("COMMAND X=\"\""), ("COMMAND", (("X", ""),))) - self.assertEqual(fac.parse_transaction("COMMAND X=\"ABC\""), ("COMMAND", (("X", "ABC"),))) - self.assertEqual(fac.parse_transaction("COMMAND X=\"\\A\\B\\C\""), ("COMMAND", (("X", "ABC"),))) - self.assertEqual(fac.parse_transaction("COMMAND X=\"\\\\\\\"\""), ("COMMAND", (("X", "\\\""),))) - self.assertEqual(fac.parse_transaction("COMMAND X=\"ABC\" Y=\"DEF\""), ("COMMAND", (("X", "ABC"), ("Y", "DEF")))) - self.assertEqual(fac.parse_transaction("COMMAND KEY-NAME=\"ABC\""), ("COMMAND", (("KEY-NAME", "ABC"),))) - self.assertEqual(fac.parse_transaction("COMMAND KEY_NAME=\"ABC\""), ("COMMAND", (("KEY_NAME", "ABC"),))) - - def test_missing_command(self): - self.assertRaises(ValueError, fac.parse_transaction, "X=\"ABC\"") - self.assertRaises(ValueError, fac.parse_transaction, " X=\"ABC\"") - - def test_missing_space(self): - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND/X=\"ABC\"") - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\"Y=\"DEF\"") - - def test_bad_quotes(self): - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"") - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC") - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\" Y=\"ABC") - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\\") - - def test_truncated(self): - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=") - - def test_newline(self): - self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\" \nY=\"DEF\"") - -if __name__ == "__main__": - unittest.main() diff --git a/facilitator.cgi b/facilitator.cgi deleted file mode 100755 index 39566d3..0000000 --- a/facilitator.cgi +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python - -import cgi -import os -import socket -import sys -import urllib - -import fac - -FACILITATOR_ADDR = ("127.0.0.1", 9002) - -def exit_error(status): - print """\ -Status: %d\r -\r""" % status - sys.exit() - -def fac_socket(): - return socket.create_connection(FACILITATOR_ADDR, 1.0).makefile() - -def transact(f, command, *params): - transaction = fac.render_transaction(command, *params) - print >> f, transaction - f.flush() - line = f.readline() - if not (len(line) > 0 and line[-1] == '\n'): - raise ValueError("No newline at end of string returned by facilitator") - return fac.parse_transaction(line[:-1]) - -def put_reg(client_addr, registrant_addr): - f = fac_socket() - try: - command, params = transact(f, "PUT", ("CLIENT", fac.format_addr(client_addr)), ("FROM", fac.format_addr(registrant_addr))) - finally: - f.close() - if command == "OK": - pass - else: - exit_error(500) - -def get_reg(proxy_addr): - f = fac_socket() - try: - command, params = transact(f, "GET", ("FROM", fac.format_addr(proxy_addr))) - finally: - f.close() - if command == "NONE": - return { - "client": "" - } - elif command == "OK": - client_spec = fac.param_first("CLIENT", params) - relay_spec = fac.param_first("RELAY", params) - if not client_spec or not relay_spec: - exit_error(500) - try: - # Check the syntax returned by the backend. - client = fac.parse_addr_spec(client_spec) - relay = fac.parse_addr_spec(relay_spec) - except ValueError: - exit_error(500) - return { - "client": fac.format_addr(client), - "relay": fac.format_addr(relay), - } - else: - exit_error(500) - -method = os.environ.get("REQUEST_METHOD") -proxy_addr = (os.environ.get("REMOTE_ADDR"), None) - -if not method or not proxy_addr[0]: - exit_error(400) - -fs = cgi.FieldStorage() - -def do_get(): - try: - reg = get_reg(proxy_addr) or "" - except: - exit_error(500) - # Allow XMLHttpRequest from any domain. http://www.w3.org/TR/cors/. - print """\ -Status: 200\r -Content-Type: application/x-www-form-urlencoded\r -Cache-Control: no-cache\r -Access-Control-Allow-Origin: *\r -\r""" - sys.stdout.write(urllib.urlencode(reg)) - -def do_post(): - client_specs = fs.getlist("client") - if len(client_specs) != 1: - exit_error(400) - client_spec = client_specs[0] - try: - client_addr = fac.parse_addr_spec(client_spec, defhost=proxy_addr[0]) - except ValueError: - exit_error(400) - try: - put_reg(client_addr, proxy_addr) - except: - raise - exit_error(500) - print """\ -Status: 200\r -\r""" - -if method == "GET": - do_get() -elif method == "POST": - do_post() -else: - exit_error(405) diff --git a/facilitator/Makefile b/facilitator/Makefile new file mode 100644 index 0000000..1d9cb54 --- /dev/null +++ b/facilitator/Makefile @@ -0,0 +1,17 @@ +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin + +all: + : + +install: + mkdir -p $(BINDIR) + cp -f facilitator facilitator.cgi fac.py $(BINDIR) + +clean: + rm -f *.pyc + +test: + ./facilitator-test + +.PHONY: all install clean test diff --git a/facilitator/fac.py b/facilitator/fac.py new file mode 100644 index 0000000..b9ad435 --- /dev/null +++ b/facilitator/fac.py @@ -0,0 +1,182 @@ +import re +import socket + +def parse_addr_spec(spec, defhost = None, defport = None, resolve = False): + """Parse a host:port specification and return a 2-tuple ("host", port) as + understood by the Python socket functions. + >>> parse_addr_spec("192.168.0.1:9999") + ('192.168.0.1', 9999) + + If defhost or defport are given, those parts of the specification may be + omitted; if so, they will be filled in with defaults. + >>> parse_addr_spec("192.168.0.2:8888", defhost="192.168.0.1", defport=9999) + ('192.168.0.2', 8888) + >>> parse_addr_spec(":8888", defhost="192.168.0.1", defport=9999) + ('192.168.0.1', 9999) + >>> parse_addr_spec("192.168.0.2:", defhost="192.168.0.1", defport=9999) + ('192.168.0.2', 9999) + >>> parse_addr_spec(":", defhost="192.168.0.1", defport=9999) + ('192.168.0.1', 9999) + + If resolve is true, then the host in the specification or the defhost may be + a domain name, which will be resolved. If resolve is false, then the host + must be a numeric IPv4 or IPv6 address. + + IPv6 addresses must be enclosed in square brackets.""" + host = None + port = None + m = None + # IPv6 syntax. + if not m: + m = re.match(ur'^\[(.+)\]:(\d+)$', spec) + if m: + host, port = m.groups() + af = socket.AF_INET6 + if not m: + m = re.match(ur'^\[(.+)\]:?$', spec) + if m: + host, = m.groups() + af = socket.AF_INET6 + # IPv4 syntax. + if not m: + m = re.match(ur'^(.+):(\d+)$', spec) + if m: + host, port = m.groups() + af = socket.AF_INET + if not m: + m = re.match(ur'^:?(\d+)$', spec) + if m: + port, = m.groups() + af = 0 + if not m: + host = spec + af = 0 + host = host or defhost + port = port or defport + if host is None or port is None: + raise ValueError("Bad address specification \"%s\"" % spec) + + # Now we have split around the colon and have a guess at the address family. + # Forward-resolve the name into an addrinfo struct. Real DNS resolution is + # done only if resolve is true; otherwise the address must be numeric. + if resolve: + flags = 0 + else: + flags = socket.AI_NUMERICHOST + try: + addrs = socket.getaddrinfo(host, port, af, socket.SOCK_STREAM, socket.IPPROTO_TCP, flags) + except socket.gaierror, e: + raise ValueError("Bad host or port: \"%s\" \"%s\": %s" % (host, port, str(e))) + if not addrs: + raise ValueError("Bad host or port: \"%s\" \"%s\"" % (host, port)) + + # Convert the result of socket.getaddrinfo (which is a 2-tuple for IPv4 and + # a 4-tuple for IPv6) into a (host, port) 2-tuple. + host, port = socket.getnameinfo(addrs[0][4], socket.NI_NUMERICHOST | socket.NI_NUMERICSERV) + port = int(port) + return host, port + +def format_addr(addr): + host, port = addr + host_str = u"" + port_str = u"" + if host is not None: + # Numeric IPv6 address? + try: + addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP, socket.AI_NUMERICHOST) + af = addrs[0][0] + except socket.gaierror, e: + af = 0 + if af == socket.AF_INET6: + host_str = u"[%s]" % host + else: + host_str = u"%s" % host + if port is not None: + if not (0 < port <= 65535): + raise ValueError("port must be between 1 and 65535 (is %d)" % port) + port_str = u":%d" % port + + if not host_str and not port_str: + raise ValueError("host and port may not both be None") + return u"%s%s" % (host_str, port_str) + +def skip_space(pos, line): + """Skip a (possibly empty) sequence of space characters (the ASCII character + '\x20' exactly). Returns a pair (pos, num_skipped).""" + begin = pos + while pos < len(line) and line[pos] == "\x20": + pos += 1 + return pos, pos - begin + +TOKEN_CHARS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") +def get_token(pos, line): + begin = pos + while pos < len(line) and line[pos] in TOKEN_CHARS: + pos += 1 + if begin == pos: + raise ValueError("No token found at position %d" % pos) + return pos, line[begin:pos] + +def get_quoted_string(pos, line): + chars = [] + if not (pos < len(line) and line[pos] == '"'): + raise ValueError("Expected '\"' at beginning of quoted string.") + pos += 1 + while pos < len(line) and line[pos] != '"': + if line[pos] == '\\': + pos += 1 + if not (pos < len(line)): + raise ValueError("End of line after backslash in quoted string") + chars.append(line[pos]) + pos += 1 + if not (pos < len(line) and line[pos] == '"'): + raise ValueError("Expected '\"' at end of quoted string.") + pos += 1 + return pos, "".join(chars) + +def parse_transaction(line): + """A transaction is a command followed by zero or more key-value pairs. Like so: + COMMAND KEY="VALUE" KEY="\"ESCAPED\" VALUE" + Values must be quoted. Any byte value may be escaped with a backslash. + Returns a pair: (COMMAND, ((KEY1, VALUE1), (KEY2, VALUE2), ...)). + """ + pos = 0 + pos, skipped = skip_space(pos, line) + pos, command = get_token(pos, line) + + pairs = [] + while True: + pos, skipped = skip_space(pos, line) + if not (pos < len(line)): + break + if skipped == 0: + raise ValueError("Expected space before key-value pair") + pos, key = get_token(pos, line) + if not (pos < len(line) and line[pos] == '='): + raise ValueError("No '=' found after key") + pos += 1 + pos, value = get_quoted_string(pos, line) + pairs.append((key, value)) + return command, tuple(pairs) + +def param_first(key, params): + for k, v in params: + if key == k: + return v + return None + +def quote_string(s): + chars = [] + for c in s: + if c == "\\": + c = "\\\\" + elif c == "\"": + c = "\\\"" + chars.append(c) + return "\"" + "".join(chars) + "\"" + +def render_transaction(command, *params): + parts = [command] + for key, value in params: + parts.append("%s=%s" % (key, quote_string(value))) + return " ".join(parts) diff --git a/facilitator/facilitator b/facilitator/facilitator new file mode 100755 index 0000000..3695b52 --- /dev/null +++ b/facilitator/facilitator @@ -0,0 +1,330 @@ +#!/usr/bin/env python + +import SocketServer +import errno +import getopt +import os +import socket +import sys +import threading +import time + +import fac + +LISTEN_ADDRESS = "127.0.0.1" +DEFAULT_LISTEN_PORT = 9002 +DEFAULT_RELAY_PORT = 9001 +DEFAULT_LOG_FILENAME = "facilitator.log" + +# Don't indulge clients for more than this many seconds. +CLIENT_TIMEOUT = 1.0 +# Buffer no many than this many bytes when trying to read a line. +READLINE_MAX_LENGTH = 10240 + +LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + +class options(object): + listen_port = DEFAULT_LISTEN_PORT + log_filename = DEFAULT_LOG_FILENAME + log_file = sys.stdout + relay_spec = None + daemonize = True + pid_filename = None + safe_logging = True + + @staticmethod + def set_relay_spec(spec): + spec = fac.parse_addr_spec(spec, defport = DEFAULT_RELAY_PORT, resolve = True) + options.relay_spec = fac.format_addr(spec) + +def usage(f = sys.stdout): + print >> f, """\ +Usage: %(progname)s -r RELAY <OPTIONS> +Flash proxy facilitator: Register client addresses and serve them out +again. Listen on 127.0.0.1 and port PORT (by default %(port)d). + -d, --debug don't daemonize, log to stdout. + -h, --help show this help. + -l, --log FILENAME write log to FILENAME (default \"%(log)s\"). + -p, --port PORT listen on PORT (by default %(port)d). + --pidfile FILENAME write PID to FILENAME after daemonizing. + -r, --relay RELAY send RELAY (host:port) to proxies as the relay to use. + --unsafe-logging don't scrub IP addresses from logs.\ +""" % { + "progname": sys.argv[0], + "port": DEFAULT_LISTEN_PORT, + "log": DEFAULT_LOG_FILENAME, +} + +def safe_str(s): + """Return s if options.safe_logging is true, and "[scrubbed]" otherwise.""" + if options.safe_logging: + return "[scrubbed]" + else: + return s + +log_lock = threading.Lock() +def log(msg): + log_lock.acquire() + try: + print >> options.log_file, (u"%s %s" % (time.strftime(LOG_DATE_FORMAT), msg)).encode("UTF-8") + options.log_file.flush() + finally: + log_lock.release() + +class TCPReg(object): + def __init__(self, host, port): + self.host = host + self.port = port + + def __unicode__(self): + return fac.format_addr((self.host, self.port)) + + def __str__(self): + return unicode(self).encode("UTF-8") + + def __cmp__(self, other): + if isinstance(other, TCPReg): + return cmp((self.host, self.port), (other.host, other.port)) + else: + return False + +class Reg(object): + @staticmethod + def parse(spec, defhost = None, defport = None): + host, port = fac.parse_addr_spec(spec, defhost, defport) + return TCPReg(host, port) + +class RegSet(object): + def __init__(self): + self.set = [] + self.cv = threading.Condition() + + def add(self, reg): + self.cv.acquire() + try: + if reg not in list(self.set): + self.set.append(reg) + self.cv.notify() + return True + else: + return False + finally: + self.cv.release() + + def fetch(self): + self.cv.acquire() + try: + if not self.set: + return None + return self.set.pop(0) + finally: + self.cv.release() + + def __len__(self): + self.cv.acquire() + try: + return len(self.set) + finally: + self.cv.release() + +# A decorator to ignore "broken pipe" errors. +def catch_epipe(fn): + def ret(self, *args): + try: + return fn(self, *args) + except socket.error, e: + try: + err_num = e.errno + except AttributeError: + # Before Python 2.6, exception can be a pair. + err_num, errstr = e + except: + raise + if err_num != errno.EPIPE: + raise + return ret + +class Handler(SocketServer.StreamRequestHandler): + def __init__(self, *args, **kwargs): + self.deadline = time.time() + CLIENT_TIMEOUT + # Buffer for readline. + self.buffer = "" + SocketServer.StreamRequestHandler.__init__(self, *args, **kwargs) + + def recv(self): + timeout = self.deadline - time.time() + self.connection.settimeout(timeout) + return self.connection.recv(1024) + + def readline(self): + # A line already buffered? + i = self.buffer.find("\n") + if i >= 0: + line = self.buffer[:i+1] + self.buffer = self.buffer[i+1:] + return line + + auxbuf = [] + buflen = len(self.buffer) + while True: + data = self.recv() + if not data: + if self.buffer or auxbuf: + raise socket.error("readline: stream does not end with a newline") + else: + return "" + i = data.find("\n") + if i >= 0: + line = self.buffer + "".join(auxbuf) + data[:i+1] + self.buffer = data[i+1:] + return line + else: + auxbuf.append(data) + buflen += len(data) + if buflen >= READLINE_MAX_LENGTH: + raise socket.error("readline: refusing to buffer %d bytes (last read was %d bytes)" % (buflen, len(data))) + + @catch_epipe + def handle(self): + num_lines = 0 + while True: + try: + line = self.readline() + if not line: + break + num_lines += 1 + except socket.error, e: + log("socket error after reading %d lines: %s" % (num_lines, str(e))) + break + if not self.handle_line(line): + break + + def handle_line(self, line): + if not (len(line) > 0 and line[-1] == '\n'): + raise ValueError("No newline at end of string returned by readline") + try: + command, params = fac.parse_transaction(line[:-1]) + except ValueError, e: + log("fac.parse_transaction: %s" % e) + self.send_error() + return False + + if command == "GET": + return self.do_GET(params) + if command == "PUT": + return self.do_PUT(params) + else: + self.send_error() + return False + + def send_ok(self): + print >> self.wfile, "OK" + + def send_error(self): + print >> self.wfile, "ERROR" + + def do_GET(self, params): + reg = REGS.fetch() + if reg: + log(u"proxy gets %s, relay %s (now %d)" % + (safe_str(unicode(reg)), options.relay_spec, len(REGS))) + print >> self.wfile, fac.render_transaction("OK", ("CLIENT", str(reg)), ("RELAY", options.relay_spec)) + else: + log(u"proxy gets none") + print >> self.wfile, fac.render_transaction("NONE") + return True + + def do_PUT(self, params): + client_spec = fac.param_first("CLIENT", params) + if client_spec is None: + log(u"PUT missing CLIENT param") + self.send_error() + return False + + # FROM + + try: + reg = Reg.parse(client_spec, self.client_address[0]) + except ValueError, e: + log(u"syntax error in %s: %s" % (safe_str(repr(client_spec)), repr(str(e)))) + self.send_error() + return False + + if REGS.add(reg): + log(u"client %s (now %d)" % (safe_str(unicode(reg)), len(REGS))) + else: + log(u"client %s (already present, now %d)" % (safe_str(unicode(reg)), len(REGS))) + + self.send_ok() + return True + + finish = catch_epipe(SocketServer.StreamRequestHandler.finish) + +class Server(SocketServer.ThreadingMixIn, SocketServer.TCPServer): + allow_reuse_address = True + +REGS = RegSet() + +def main(): + opts, args = getopt.gnu_getopt(sys.argv[1:], "dhl:p:r:", + ["debug", "help", "log=", "port=", "pidfile=", "relay=", "unsafe-logging"]) + for o, a in opts: + if o == "-d" or o == "--debug": + options.daemonize = False + options.log_filename = None + elif o == "-h" or o == "--help": + usage() + sys.exit() + elif o == "-l" or o == "--log": + options.log_filename = a + elif o == "-p" or o == "--port": + options.listen_port = int(a) + elif o == "--pidfile": + options.pid_filename = a + elif o == "-r" or o == "--relay": + try: + options.set_relay_spec(a) + except socket.gaierror, e: + print >> sys.stderr, u"Can't resolve relay %s: %s" % (repr(a), str(e)) + sys.exit(1) + elif o == "--unsafe-logging": + options.safe_logging = False + + if not options.relay_spec: + print >> sys.stderr, """\ +The -r option is required. Give it the relay that will be sent to proxies. + -r HOST[:PORT]\ + """ + sys.exit(1) + + if options.log_filename: + options.log_file = open(options.log_filename, "a") + # Send error tracebacks to the log. + sys.stderr = options.log_file + else: + options.log_file = sys.stdout + + addrinfo = socket.getaddrinfo(LISTEN_ADDRESS, options.listen_port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0] + + server = Server(addrinfo[4], Handler) + + log(u"start on %s" % fac.format_addr(addrinfo[4])) + log(u"using relay address %s" % options.relay_spec) + + if options.daemonize: + log(u"daemonizing") + pid = os.fork() + if pid != 0: + if options.pid_filename: + f = open(options.pid_filename, "w") + print >> f, pid + f.close() + sys.exit(0) + + try: + server.serve_forever() + except KeyboardInterrupt: + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/facilitator/facilitator-test b/facilitator/facilitator-test new file mode 100755 index 0000000..b06f5d7 --- /dev/null +++ b/facilitator/facilitator-test @@ -0,0 +1,124 @@ +#!/usr/bin/env python + +import socket +import subprocess +import time +import unittest + +import fac + +FACILITATOR_HOST = "127.0.0.1" +FACILITATOR_PORT = 9002 + +def gimme_socket(host, port): + addrinfo = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0] + s = socket.socket(addrinfo[0], addrinfo[1], addrinfo[2]) + s.settimeout(10.0) + s.connect(addrinfo[4]) + return s + +class FacilitatorTest(unittest.TestCase): + def gimme_socket(self): + return gimme_socket(FACILITATOR_HOST, FACILITATOR_PORT) + + def setUp(self): + self.process = subprocess.Popen(["./facilitator", "-d", "-p", str(FACILITATOR_PORT), "-r", "0.0.1.0:1", "-l", "/dev/null"]) + time.sleep(0.1) + + def tearDown(self): + self.process.terminate() + + def test_timeout(self): + """Test that the socket will not accept slow writes indefinitely. + Successive sends should not reset the timeout counter.""" + s = self.gimme_socket() + time.sleep(0.3) + s.send("w") + time.sleep(0.3) + s.send("w") + time.sleep(0.3) + s.send("w") + time.sleep(0.3) + s.send("w") + time.sleep(0.3) + self.assertRaises(socket.error, s.send, "w") + + def test_readline_limit(self): + """Test that reads won't buffer indefinitely.""" + s = self.gimme_socket() + buflen = 0 + try: + while buflen + 1024 < 200000: + s.send("X" * 1024) + buflen += 1024 + self.fail("should have raised a socket error") + except socket.error: + pass + +# def test_same_proxy(self): +# """Test that the same proxy doesn't get the same client when asking +# twice.""" +# self.fail() +# +# def test_num_clients(self): +# """Test that the same proxy can pick up up to five different clients but +# no more. Test that a proxy ceasing to handle a client allows the proxy +# to handle another, different client.""" +# self.fail() +# +# def test_num_proxies(self): +# """Test that a single client is handed out to five different proxies but +# no more. Test that a proxy ceasing to handle a client reduces its count +# so another proxy can handle it.""" +# self.fail() +# +# def test_proxy_timeout(self): +# """Test that a proxy ceasing to connect for some time period causes that +# proxy's clients to be unhandled by that proxy.""" +# self.fail() +# +# def test_localhost_only(self): +# """Test that the facilitator doesn't listen on any external +# addresses.""" +# self.fail() +# +# def test_hostname(self): +# """Test that the facilitator rejects hostnames.""" +# self.fail() + +class ParseTransactionTest(unittest.TestCase): + def test_empty_string(self): + self.assertRaises(ValueError, fac.parse_transaction, "") + + def test_correct(self): + self.assertEqual(fac.parse_transaction("COMMAND"), ("COMMAND", ())) + self.assertEqual(fac.parse_transaction("COMMAND X=\"\""), ("COMMAND", (("X", ""),))) + self.assertEqual(fac.parse_transaction("COMMAND X=\"ABC\""), ("COMMAND", (("X", "ABC"),))) + self.assertEqual(fac.parse_transaction("COMMAND X=\"\\A\\B\\C\""), ("COMMAND", (("X", "ABC"),))) + self.assertEqual(fac.parse_transaction("COMMAND X=\"\\\\\\\"\""), ("COMMAND", (("X", "\\\""),))) + self.assertEqual(fac.parse_transaction("COMMAND X=\"ABC\" Y=\"DEF\""), ("COMMAND", (("X", "ABC"), ("Y", "DEF")))) + self.assertEqual(fac.parse_transaction("COMMAND KEY-NAME=\"ABC\""), ("COMMAND", (("KEY-NAME", "ABC"),))) + self.assertEqual(fac.parse_transaction("COMMAND KEY_NAME=\"ABC\""), ("COMMAND", (("KEY_NAME", "ABC"),))) + + def test_missing_command(self): + self.assertRaises(ValueError, fac.parse_transaction, "X=\"ABC\"") + self.assertRaises(ValueError, fac.parse_transaction, " X=\"ABC\"") + + def test_missing_space(self): + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND/X=\"ABC\"") + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\"Y=\"DEF\"") + + def test_bad_quotes(self): + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"") + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC") + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\" Y=\"ABC") + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\\") + + def test_truncated(self): + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=") + + def test_newline(self): + self.assertRaises(ValueError, fac.parse_transaction, "COMMAND X=\"ABC\" \nY=\"DEF\"") + +if __name__ == "__main__": + unittest.main() diff --git a/facilitator/facilitator.cgi b/facilitator/facilitator.cgi new file mode 100755 index 0000000..39566d3 --- /dev/null +++ b/facilitator/facilitator.cgi @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +import cgi +import os +import socket +import sys +import urllib + +import fac + +FACILITATOR_ADDR = ("127.0.0.1", 9002) + +def exit_error(status): + print """\ +Status: %d\r +\r""" % status + sys.exit() + +def fac_socket(): + return socket.create_connection(FACILITATOR_ADDR, 1.0).makefile() + +def transact(f, command, *params): + transaction = fac.render_transaction(command, *params) + print >> f, transaction + f.flush() + line = f.readline() + if not (len(line) > 0 and line[-1] == '\n'): + raise ValueError("No newline at end of string returned by facilitator") + return fac.parse_transaction(line[:-1]) + +def put_reg(client_addr, registrant_addr): + f = fac_socket() + try: + command, params = transact(f, "PUT", ("CLIENT", fac.format_addr(client_addr)), ("FROM", fac.format_addr(registrant_addr))) + finally: + f.close() + if command == "OK": + pass + else: + exit_error(500) + +def get_reg(proxy_addr): + f = fac_socket() + try: + command, params = transact(f, "GET", ("FROM", fac.format_addr(proxy_addr))) + finally: + f.close() + if command == "NONE": + return { + "client": "" + } + elif command == "OK": + client_spec = fac.param_first("CLIENT", params) + relay_spec = fac.param_first("RELAY", params) + if not client_spec or not relay_spec: + exit_error(500) + try: + # Check the syntax returned by the backend. + client = fac.parse_addr_spec(client_spec) + relay = fac.parse_addr_spec(relay_spec) + except ValueError: + exit_error(500) + return { + "client": fac.format_addr(client), + "relay": fac.format_addr(relay), + } + else: + exit_error(500) + +method = os.environ.get("REQUEST_METHOD") +proxy_addr = (os.environ.get("REMOTE_ADDR"), None) + +if not method or not proxy_addr[0]: + exit_error(400) + +fs = cgi.FieldStorage() + +def do_get(): + try: + reg = get_reg(proxy_addr) or "" + except: + exit_error(500) + # Allow XMLHttpRequest from any domain. http://www.w3.org/TR/cors/. + print """\ +Status: 200\r +Content-Type: application/x-www-form-urlencoded\r +Cache-Control: no-cache\r +Access-Control-Allow-Origin: *\r +\r""" + sys.stdout.write(urllib.urlencode(reg)) + +def do_post(): + client_specs = fs.getlist("client") + if len(client_specs) != 1: + exit_error(400) + client_spec = client_specs[0] + try: + client_addr = fac.parse_addr_spec(client_spec, defhost=proxy_addr[0]) + except ValueError: + exit_error(400) + try: + put_reg(client_addr, proxy_addr) + except: + raise + exit_error(500) + print """\ +Status: 200\r +\r""" + +if method == "GET": + do_get() +elif method == "POST": + do_post() +else: + exit_error(405) diff --git a/facilitator/init.d/facilitator b/facilitator/init.d/facilitator new file mode 100755 index 0000000..5510d11 --- /dev/null +++ b/facilitator/init.d/facilitator @@ -0,0 +1,67 @@ +#!/bin/sh +# +# facilitator This shell script takes care of starting and stopping +# the flash proxy facilitator. +# +# chkconfig: 2345 90 10 +# description: Flash proxy facilitator. +# processname: facilitator +# pidfile: /var/flashproxy/facilitator.pid + +# Installation instructions: +# cp facilitator /etc/init.d/facilitator +# chkconfig --add facilitator +# service facilitator start + +# Source function library. +. /etc/rc.d/init.d/functions + +# Replace this with the address of a Tor relay with a websocket pluggable +# transport. You can use host:port syntax. +RELAY=tor1.bamsoftware.com:9901 + +BINDIR=/usr/local/bin +VARDIR=/var/flashproxy +PROG=$BINDIR/facilitator +PIDFILE=$VARDIR/facilitator.pid +USER=flashproxy + +# See how we were called. +case "$1" in + start) + [ -x $PROG ] || exit 1 + echo -n $"Starting flash proxy facilitator: " + cd $VARDIR && daemon --user $USER --pidfile $PIDFILE $PROG --pidfile $PIDFILE -r $RELAY + RETVAL=$? + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/facilitator + ;; + stop) + # Stop daemon. + echo -n $"Shutting down flash proxy facilitator: " + killproc -p $PIDFILE + RETVAL=$? + echo + if [ $RETVAL -eq 0 ]; then + rm -f /var/lock/subsys/facilitator + rm -f $PIDFILE + fi + ;; + status) + status -p $PIDFILE facilitator + RETVAL=$? + ;; + restart|reload) + $0 stop + $0 start + ;; + condrestart) + [ -f /var/lock/subsys/facilitator ] && restart || : + ;; + *) + echo $"Usage: $0 {start|stop|status|restart}" + RETVAL=3 + ;; +esac + +exit $RETVAL diff --git a/init.d/facilitator b/init.d/facilitator deleted file mode 100755 index 5510d11..0000000 --- a/init.d/facilitator +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh -# -# facilitator This shell script takes care of starting and stopping -# the flash proxy facilitator. -# -# chkconfig: 2345 90 10 -# description: Flash proxy facilitator. -# processname: facilitator -# pidfile: /var/flashproxy/facilitator.pid - -# Installation instructions: -# cp facilitator /etc/init.d/facilitator -# chkconfig --add facilitator -# service facilitator start - -# Source function library. -. /etc/rc.d/init.d/functions - -# Replace this with the address of a Tor relay with a websocket pluggable -# transport. You can use host:port syntax. -RELAY=tor1.bamsoftware.com:9901 - -BINDIR=/usr/local/bin -VARDIR=/var/flashproxy -PROG=$BINDIR/facilitator -PIDFILE=$VARDIR/facilitator.pid -USER=flashproxy - -# See how we were called. -case "$1" in - start) - [ -x $PROG ] || exit 1 - echo -n $"Starting flash proxy facilitator: " - cd $VARDIR && daemon --user $USER --pidfile $PIDFILE $PROG --pidfile $PIDFILE -r $RELAY - RETVAL=$? - echo - [ $RETVAL -eq 0 ] && touch /var/lock/subsys/facilitator - ;; - stop) - # Stop daemon. - echo -n $"Shutting down flash proxy facilitator: " - killproc -p $PIDFILE - RETVAL=$? - echo - if [ $RETVAL -eq 0 ]; then - rm -f /var/lock/subsys/facilitator - rm -f $PIDFILE - fi - ;; - status) - status -p $PIDFILE facilitator - RETVAL=$? - ;; - restart|reload) - $0 stop - $0 start - ;; - condrestart) - [ -f /var/lock/subsys/facilitator ] && restart || : - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - RETVAL=3 - ;; -esac - -exit $RETVAL

1 0

[flashproxy/master] Make instructions for running a facilitator on Debian.
by dcf＠torproject.org 31 Aug '12

31 Aug '12

commit 14fa361ab133009f9f52c3ba89f4cb429d496388 Author: David Fifield <david(a)bamsoftware.com> Date: Thu Aug 30 14:19:14 2012 -0700 Make instructions for running a facilitator on Debian. --- README | 57 ++++------------- doc/facilitator-howto.txt | 147 +++++++++++++++++++++++++++++++++++++++++++++ facilitator/README | 3 + 3 files changed, 164 insertions(+), 43 deletions(-) diff --git a/README b/README index 0db5973..f5654cf 100644 --- a/README +++ b/README @@ -100,12 +100,20 @@ re-register: == How to run a relay The relay runs with a WebSocket proxy in external proxy mode. - git clone git://github.com/kanaka/websockify.git - git apply ~/flashproxy/patches/websockify-binary-default.patch -This command proxies WebSocket on port 9901 to a local ORPort on 9001. - ./websockify :9901 127.0.0.1:9001 -The init.d script described under "Installing so as to restart at boot" -does this automatically. + $ git clone git://github.com/kanaka/websockify.git + $ git apply ~/flashproxy/patches/websockify-binary-default.patch +Install websockify as a daemon that will restart at boot. These +instructions are for CentOS 5. + $ cd websockify + $ git apply ~/flashproxy/patches/websockify-binary-default.patch + $ su + # cp websockify websocket.py /usr/local/bin + # useradd -d /dev/null -s /bin/false websockify + # mkdir /var/websockify + # chown -R websockify.websockify /var/websockify + # cp ~/flashproxy/init.d/websockify /etc/init.d/websockify + # chkconfig --add websockify + # service websockify start Add this line to torrc to make the relay use the external proxy: ServerTransportPlugin websocket proxy 127.0.0.1:9901 @@ -115,40 +123,3 @@ Add this line to torrc to make the relay use the external proxy: Paste in this HTML where you want the badge to appear: <iframe src="//crypto.stanford.edu/flashproxy/embed.html" width="80" height="15" frameborder="0" scrolling="no"></iframe> - - -== For developers - -See design.txt for some more technical information. - -=== Running a facilitator - -On the facilitator, run - $ facilitator -r <relay-ip> -You can use "tor1.bamsoftware.com" for <relay-ip>. The facilitator runs -on port 9002 by default. - -=== Installing so as to restart at boot - -The directory init.d contains scripts to start the facilitator on -CentOS. "make install" copies files to /usr/local/bin. - - # make install - # useradd -d /dev/null -s /bin/false flashproxy - # mkdir /var/flashproxy - # chown -R flashproxy.flashproxy /var/flashproxy/ - - # cp init.d/facilitator /etc/init.d/facilitator - -- Edit /etc/init.d/facilitator to set $RELAY. - # chkconfig --add facilitator - # service facilitator start - - # cd websockify - # git apply ~/flashproxy/patches/websockify-binary-default.patch - # cp websockify websocket.py /usr/local/bin - # useradd -d /dev/null -s /bin/false websockify - # mkdir /var/websockify - # chown -R websockify.websockify /var/websockify - # cp ~/flashproxy/init.d/websockify /etc/init.d/websockify - # chkconfig --add websockify - # service websockify start diff --git a/doc/facilitator-howto.txt b/doc/facilitator-howto.txt new file mode 100644 index 0000000..c46902b --- /dev/null +++ b/doc/facilitator-howto.txt @@ -0,0 +1,147 @@ +This document describes how to run a flash proxy facilitator on Debian 6. +We will use the domain name tor-facilitator.example.com. + +== Overview + +The facilitator consists of three parts: an HTTP server, a CGI program, +and a backend. The HTTP server is responsible for speaking TLS and +invoking the CGI program. The CGI program receives client registrations +and proxy requests for clients, parses them, and forwards them to the +backend. The backend is stateful and is responsible for assigning +clients to proxies and remembering client registrations. + +We use Apache 2 as the HTTP server. The CGI script is facilitator.cgi. +The backend is facilitator. fac.py is a Python module containing code +common to facilitator.cgi and facilitator. + +== Basic and security setup + +Install some essential packages and configure a firewall. + + # cat >/etc/apt/apt.conf.d/90suggests<<EOF +APT::Install-Recommends "0"; +APT::Install-Suggests "0"; +EOF + # apt-get remove nfs-common portmap + # apt-get update + # apt-get upgrade + # apt-get install shorewall shorewall6 + +Away from the facilitator, generate an SSH key for authentication: + + $ ssh-keygen -f ~/.ssh/tor-facilitator + $ ssh-copy-id -i ~/.ssh/tor-facilitator.pub root(a)tor-facilitator.example.com + +Then log in and edit /etc/ssh/sshd_config to disable password +authentication: + + PasswordAuthentication no + +Configure the firewall to allow only SSH and HTTPS. + + # cd /etc/shorewall + # cp /usr/share/doc/shorewall/default-config/{interfaces,policy,rules,zones} . + Edit /etc/shorewall/interfaces: +net eth0 - tcpflags,norfc1918,routefilter,nosmurfs,logmartians + Edit /etc/shorewall/policy: +$FW all ACCEPT +net $FW DROP +all all DROP + Edit /etc/shorewall/rules: +SECTION NEW +SSH/ACCEPT all $FW +HTTPS/ACCEPT all $FW + Edit /etc/shorewall/zones: +fw firewall +net ipv4 + + # cd /etc/shorewall6 + # cp /usr/share/doc/shorewall6/default-config/{interfaces,policy,rules,zones} . + Edit /etc/shorewall/interfaces: +net eth0 - tcpflags + Edit /etc/shorewall/policy: +$FW all ACCEPT +net $FW DROP +all all DROP + Edit /etc/shorewall/rules: +SECTION NEW +SSH/ACCEPT all $FW +HTTPS/ACCEPT all $FW + Edit /etc/shorewall/zones: +fw firewall +net ipv6 + +Edit /etc/default/shorewall and /etc/default/shorewall6 and set + + startup=1 + +Restart servers. + + # /etc/init.d/ssh restart + # /etc/init.d/shorewall start + # /etc/init.d/shorewall6 start + +== Facilitator program installation + + # apt-get install git + # git clone https://git.torproject.org/flashproxy.git + # cd flashproxy/facilitator + # make install + +This installs facilitator.cgi, facilitator, and fac.py to +/usr/local/bin. It also installs a System V init file to +/etc/init.d/facilitator. + + # update-rc.d facilitator defaults + # /etc/init.d/facilitator start + +== HTTP server setup + +Apache is the web server that runs the CGI program. + + # apt-get install apache2 + # a2enmod ssl + +Edit /etc/apache2/ports.conf and comment out the port 80 configuration. + + # NameVirtualHost *:80 + # Listen 80 + +Create a new file /etc/apache2/sites-available/tor-facilitator with +these contents: + <VirtualHost *:443> + ServerName tor-facilitator.bamsoftware.com + DocumentRoot /dev/null + ScriptAliasMatch ^(.*) /usr/local/bin/facilitator.cgi$1 + ErrorLog ${APACHE_LOG_DIR}/error.log + LogLevel warn + SSLEngine on + SSLCertificateFile /etc/apache2/tor-facilitator.pem + </VirtualHost> + +We purposely do not include a CustomLog directive so as to not to log +client and proxy IP addresses. + +=== HTTPS setup + +The HTTP server should serve only over HTTPS and not unencrypted HTTP. +You will need a certificate and private key from a certificate +authority. An article on making a certificate signing request and +getting it signed is here: + http://www.debian-administration.org/articles/284 +The instructions below assume you have an offline private key +in tor-facilitator.key and a certificate in tor-facilitator.crt. + +Remove any passphrase from the private key. + + $ openssl rsa -in tor-facilitator.key -out tor-facilitator.pem + +Make a file containing both the private key and a certificate. + + $ cat tor-facilitator.crt >> tor-facilitator.pem + $ chmod 400 tor-facilitator.pem + +Copy the new tor-facilitator.pem to the facilitator server as +/etc/apache2/tor-facilitator.pem. + + # /etc/init.d/apache2 restart diff --git a/facilitator/README b/facilitator/README new file mode 100644 index 0000000..d3c5593 --- /dev/null +++ b/facilitator/README @@ -0,0 +1,3 @@ +This directory contains files needed to run a flash proxy facilitator. +Normal users don't need any of these files. For instructions on setting +up a facilitator, see doc/faciliator.txt.

1 0

[flashproxy/master] Increase version to 0.4.
by dcf＠torproject.org 31 Aug '12

31 Aug '12

commit 8a6084dff33fcdb9ae9cf610112fc047cf4f657c Author: David Fifield <david(a)bamsoftware.com> Date: Fri Aug 31 04:38:11 2012 -0700 Increase version to 0.4. --- Makefile | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/Makefile b/Makefile index 78de922..f79918f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ PREFIX = /usr/local BINDIR = $(PREFIX)/bin -VERSION = 0.3 +VERSION = 0.4 CLIENT_DIST_FILES = flashproxy-client flashproxy-reg-http README LICENSE torrc

1 0

[flashproxy/master] Use HTTPS in flashproxy.js.
by dcf＠torproject.org 31 Aug '12

31 Aug '12

commit 82736119d8bad82ecd3ee5db4a006f28e8647306 Author: David Fifield <david(a)bamsoftware.com> Date: Thu Aug 30 16:15:53 2012 -0700 Use HTTPS in flashproxy.js. --- flashproxy.js | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flashproxy.js b/flashproxy.js index 6d70fe7..b0bdf21 100644 --- a/flashproxy.js +++ b/flashproxy.js @@ -51,7 +51,7 @@ var DEFAULT_FACILITATOR_ADDR = { host: "tor-facilitator.bamsoftware.com", - port: 9002 + port: 443 }; var DEFAULT_MAX_NUM_PROXY_PAIRS = 10; @@ -372,7 +372,7 @@ function FlashProxy() { return; } - fac_url = "http://" + encodeURIComponent(this.fac_addr.host) + fac_url = "https://" + encodeURIComponent(this.fac_addr.host) + ":" + encodeURIComponent(this.fac_addr.port) + "/"; xhr = new XMLHttpRequest(); try {

1 0

[flashproxy/master] Add skeleton facilitator.cgi.
by dcf＠torproject.org 31 Aug '12

31 Aug '12

commit cde2e638c1aa03826cf662cc1ccd57e3ca28e24b Author: David Fifield <david(a)bamsoftware.com> Date: Fri Jul 6 18:10:10 2012 -0700 Add skeleton facilitator.cgi. --- facilitator.cgi | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 50 insertions(+), 0 deletions(-) diff --git a/facilitator.cgi b/facilitator.cgi new file mode 100755 index 0000000..061b847 --- /dev/null +++ b/facilitator.cgi @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import cgi +import sys +import os +import urllib + +FACILITATOR_ADDR = ("127.0.0.1", 9002) + +def exit_error(status): + print """\ +Status: %d\r +\r""" % status + sys.exit() + +def put_reg(client_addr, registrant_addr): + # Pretending to register client_addr as reported by registrant_addr. + pass + +def get_reg(proxy_addr): + # Pretending to ask for a client for the proxy at proxy_addr. + return { + "client": "2.2.2.2:2222", + "relay": "199.1.1.1:9001", + } + +method = os.environ.get("REQUEST_METHOD") +proxy_addr = (os.environ.get("REMOTE_ADDR"), None) + +if not method or not proxy_addr[0]: + exit_error(400) + +def do_get(): + try: + reg = get_reg(proxy_addr) or "" + except: + exit_error(500) + # Allow XMLHttpRequest from any domain. http://www.w3.org/TR/cors/. + print """\ +Status: 200\r +Content-Type: application/x-www-form-urlencoded\r +Cache-Control: no-cache\r +Access-Control-Allow-Origin: *\r +\r""" + print urllib.urlencode(reg) + +if method == "GET": + do_get() +else: + exit_error(405)

1 0

[research-web/master] Add five more reports (co-)authored by Roger.
by karsten＠torproject.org 31 Aug '12

31 Aug '12

commit 4aa30817da37b14bbc8379ddfdf2624d6fbf581a Author: Karsten Loesing <karsten.loesing(a)gmx.net> Date: Fri Aug 31 12:34:58 2012 +0200 Add five more reports (co-)authored by Roger. --- techreports.html | 37 +++++++++++++++++++++++++++++++++++++ 1 files changed, 37 insertions(+), 0 deletions(-) diff --git a/techreports.html b/techreports.html index bbe6942..b249c33 100644 --- a/techreports.html +++ b/techreports.html @@ -148,9 +148,22 @@ Tor Tech Report 2010-11-001. November 30, 2010. <a href="techreports/countingusers-2010-11-30.pdf">PDF</a>.</p> +<p>Roger Dingledine. +<i>Adaptive throttling of Tor clients by entry guards.</i> +Tor Tech Report 2010-09-001. +September 19, 2010. +<a href="techreports/adaptive-throttling-tor-clients-entry-guards-2010-09-19.pdf">PDF</a>.</p> + <h3>2009</h3> <br> +<p>Roger Dingledine and Steven J. Murdoch. +<i>Performance Improvements on Tor or, Why Tor is slow and what we're +going to do about it.</i> +Tor Tech Report 2009-11-001. +November 9, 2009. +<a href="techreports/performance-2009-11-09.pdf">PDF</a>.</p> + <p>Karsten Loesing. <i>Comparison of GeoIP Databases for Tor.</i> Tor Tech Report 2009-10-001. @@ -202,6 +215,30 @@ Tor Tech Report 2009-04-001. April 11, 2009. <a href="techreports/flagrequirements-2009-04-11.pdf">PDF</a>.</p> +<p>Roger Dingledine. +<i>Overhead from directory info: past, present, future.</i> +Tor Tech Report 2009-02-001. +February 16, 2009. +<a href="techreports/overhead-directory-info-2009-02-16.pdf">PDF</a>.</p> + +<h3>2006</h3> +<br> + +<p>Roger Dingledine and Nick Mathewson. +<i>Design of a blocking-resistant anonymity system.</i> +Tor Tech Report 2006-11-001. +November 2006. +<a href="techreports/blocking-2006-11.pdf">PDF</a>.</p> + +<h3>2005</h3> +<br> + +<p>Roger Dingledine, Nick Mathewson, and Paul Syverson. +<i>Challenges in deploying low-latency anonymity.</i> +Tor Tech Report 2005-02-001. +February 2005. +<a href="techreports/challenges-2005-02.pdf">PDF</a>.</p> + </div> </div> </body>

1 0

[tech-reports/master] Add adaptive-throttling-tor-clients-entry-guards blog post.
by karsten＠torproject.org 31 Aug '12

31 Aug '12

commit b1f18355f738b64a5ac56e171f10694365914114 Author: Karsten Loesing <karsten.loesing(a)gmx.net> Date: Thu Aug 30 10:22:08 2012 +0200 Add adaptive-throttling-tor-clients-entry-guards blog post. --- .../.gitignore | 3 + .../1750-torperf-experiment-ecdf2.png | Bin 0 -> 31301 bytes .../1750-torperf-experiment3.png | Bin 0 -> 62659 bytes ...daptive-throttling-tor-clients-entry-guards.bib | 25 ++ ...daptive-throttling-tor-clients-entry-guards.tex | 231 ++++++++++++++++++++ .../tortechrep.cls | 1 + 6 files changed, 260 insertions(+), 0 deletions(-) diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/.gitignore b/2010/adaptive-throttling-tor-clients-entry-guards/.gitignore new file mode 100644 index 0000000..fe7c582 --- /dev/null +++ b/2010/adaptive-throttling-tor-clients-entry-guards/.gitignore @@ -0,0 +1,3 @@ +adaptive-throttling-tor-clients-entry-guards.pdf +adaptive-throttling-tor-clients-entry-guards-2010-09-19.pdf + diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment-ecdf2.png b/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment-ecdf2.png new file mode 100644 index 0000000..00f0218 Binary files /dev/null and b/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment-ecdf2.png differ diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment3.png b/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment3.png new file mode 100644 index 0000000..3ac6fae Binary files /dev/null and b/2010/adaptive-throttling-tor-clients-entry-guards/1750-torperf-experiment3.png differ diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.bib b/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.bib new file mode 100644 index 0000000..1188107 --- /dev/null +++ b/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.bib @@ -0,0 +1,25 @@ +@inproceedings{ccs10-scheduling, + title = {An Improved Algorithm for {T}or Circuit Scheduling}, + author = {Can Tang and Ian Goldberg}, + booktitle = {Proceedings of the 2010 ACM Conference on Computer and + Communications Security, CCS 2010, Chicago, Illinois, USA, October + 4--8, 2010}, + year = {2010}, + editor = {Angelos D. Keromytis and Vitaly Shmatikov}, + publisher = {ACM}, +} + +@inproceedings{timing-esorics10, + author = {Sambuddho Chakravarty and Angelos Stavrou and Angelos D. + Keromytis}, + title = {Traffic analysis against low-latency anonymity networks using + available bandwidth estimation}, + booktitle = {Proceedings of the 15th European conference on Research in + computer security}, + series = {ESORICS'10}, + year = {2010}, + pages = {249--267}, + publisher = {Springer-Verlag}, + address = {Berlin, Heidelberg}, +} + diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.tex b/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.tex new file mode 100644 index 0000000..a66b788 --- /dev/null +++ b/2010/adaptive-throttling-tor-clients-entry-guards/adaptive-throttling-tor-clients-entry-guards.tex @@ -0,0 +1,231 @@ +\documentclass{tortechrep} +\usepackage{graphicx} +\begin{document} + +\author{Roger Dingledine} +\contact{arma(a)torproject.org} +\reportid{2010-09-001} +\date{September 19, 2010} +\title{Adaptive throttling of Tor clients by entry guards} +\maketitle + +\begin{abstract} +Looking for a paper topic (or a thesis topic)? Here's a Tor research area +that needs more attention. +The short version is: if we prevent the really loud users from using too +much of the Tor network, how much can it help? + +We've instrumented Tor's entry relays so they can rate-limit connections +from users, and we've instrumented the directory authorities so they can +change the rate-limiting parameters globally across the network. +Which parameter values improve performance for the Tor network as a whole? +How should relays adapt their rate-limiting parameters based on their +capacity and based on the network load they see, and what rate-limiting +algorithms will work best? + +We'd love to work with you to help answer these questions. +\end{abstract} + +One of the reasons why Tor is slow is that some people use it for +file-sharing or other high-volume transfers. +That means if you want to get your instant message cell through, it +sometimes needs to wait in line behind a pile of other cells---leading to +high latency and, maybe even worse, highly variable latency. + +One way to improve the situation is Can and Goldberg's ``An Improved +Algorithm for Tor Circuit Scheduling''~\cite{ccs10-scheduling}, which was +integrated into Tor as of 0.2.2.11-alpha. +The idea is to track how many cells the relay has handled for each circuit +lately, and give priority to cells from quieter circuits. + +But while that puts some cells in front of others, it can only work so +many miracles: if many cells have been placed in front of your cell it +still has to wait, and the more overall load there is in the network, the +more often that will happen. + +Which leads to the research problem: if we work to keep the really loud +flows off the network in the first place, how much can it help? + +Tor 0.2.2.15-alpha lets you set the PerConnBWRate and PerConnBWBurst +config options in your relay, to use token buckets% +\footnote{\url{https://en.wikipedia.org/wiki/Token\_bucket}} to rate limit +connections from non-relays. +Tor 0.2.2.16-alpha added the capability for the directory authorities to +broadcast network-wide token bucket parameters, so we can change how much +throttling there is and then observe the results on the network. + +So the first question is to model how this should work to improve +performance at a single entry guard. +Say we do periodic performance tests fetching files of size 50KB, 1MB, and +5MB using that relay as our first hop, and say the relay sets +PerConnBWRate to 5KB/s and PerConnBWBurst to 2MB. +That is, bursts of up to 2 megabytes on the connection are unthrottled, +but after that it's squeezed to a long-term average of 5 kilobytes per +second, until the flow lets up at which point the allowed burst slowly +builds back up to 2MB. +We would expect the 5MB tests to show horrible performance, since they'll +need at least $3000/5=600$ seconds to fetch the last 3MB of the file. +We would expect the 50KB and 1MB tests to look \emph{better}, though: if +we're squeezing out the really big flows, there's more space for what's +left. + +We actually performed this experiment, using Sebastian's relay fluxe3. +You can see his performance graphs over time in +Figure~\ref{fig:torperf-experiment3}. +The black dots are individual fetches, the y axis is how many seconds it +took to fetch the file, and the x axis is time (the green-shaded areas are +when the feature is turned on). +Don't pay too much attention to the blue smoothing line; it's probably not +matching the actual distribution well. + +\begin{figure} +\centering +\includegraphics[width=0.8\textwidth]{1750-torperf-experiment3.png} +\caption{Influence of PerConnBW* settings on user-perceived Tor +performance over time} +\label{fig:torperf-experiment3} +\end{figure} + +Figure~\ref{fig:torperf-experiment-ecdf2} has the cumulative distribution +functions for the same data. +The performance gets significantly better both for the 50KB and the 1MB +downloads, and as expected gets significantly worse for the 5MB downloads. + +\begin{figure} +\centering +\includegraphics[width=0.8\textwidth]{1750-torperf-experiment-ecdf2.png} +\caption{Empirical cumulative distribution functions of the influence of +PerConnBW* settings on user-perceived Tor performance} +\label{fig:torperf-experiment-ecdf2} +\end{figure} + +% Here's the raw data: 50kb, 1mb, 5mb. See the Torperf howto for help interpreting it. +%% As of August 30, the links http://siv.sunet.se/sebastian/[50kb.data, +%% 1mb.data,5mb.data] don't work anymore. + +So far so good; we've done the proof of concept, and now we need a +research group to step in, make it rigorous, and help tackle the real +research questions. + +The next question is: how well does this trick work under various +conditions? +It would seem that if there's plenty of spare bandwidth on the relay, it +should have little effect. +If we choose parameters that are too lenient, it also would have little +effect. +But choosing parameters that are too low will hurt normal web browsing +users too. +Are there rate and burst values that would cleanly separate web browsers +from bulk downloaders? +Will certain relays (that is, relays with certain characteristics) provide +clearer performance improvements than others? + +How often is it the case, for various relay capacities and various user +loads, that at least one connection is being throttled? +The CDFs appear to show improved performance spread out pretty evenly +relative to download time. +What statistics should we make relays track over time so we can get a +better intuition about the load they're really seeing? + +Now is where it gets tricky. +If we're only thinking about one relay turning on this feature in a +vacuum, then if that relay has enough capacity to handle all its flows, it +should---no sense slowing down \emph{anybody} if you can handle all the +traffic. +But instead think of the entire Tor network as a system: your Tor requests +might also slow down because a loud Tor flow entered at some other relay +and is colliding with yours somewhere along the path. +So there's a reason to squeeze incoming connections even if you have +enough capacity to handle them: to reduce the effects of bottlenecks +elsewhere in the system. +If all relays squeeze client connections, what changes in the performance +test results do we expect to see for various rate limiting parameter +values? + +We have the capability of doing network-wide experiments to validate your +theory, by putting the rate and burst in the hourly networkstatus +consensus and letting relays pick it up from there. +That is, you can modify the global network-wide parameters and then +observe the effects in the network. +Note that the experiment will be complicated by the fact that only relays +running a sufficiently recent version of Tor will honor the parameters; +that fraction should increase significantly when Tor 0.2.2.x becomes the +new stable release (sometime in late 2010). + +And finally, once you have a good intuition about how throttling flows at +point X affects flow performance at point Y, we get to the really hard +research questions. +It's probably the case that no fixed network-wide rate limiting parameters +are going to provide optimal behavior: the parameters ought to be a +function of the load patterns on the network and of the capacity of the +given relay. +Should relays track the flows they've seen recently and adapt their +throttling parameters over time? +Is there some flow distribution that relays can learn the parameters of, +such that we e.g.\ throttle the loudest 10\% of flows? +Can we approximate optimal behavior when relays base their parameters on +only the local behavior they see, or do we need a more global view of the +system to choose good parameters? +And what does ``optimal'' mean here anyway? +Or said another way, how much flexibility do we have to define optimal, or +do the facts here railroad us into prefering certain definitions? + +What are the anonymity implications of letting the behavior of user A +influence the performance of user B, in the local-view case or the +global-view case? +We could imagine active attacks that aim to influence the parameters; can +those impact anonymity? + +As a nice side effect, this feature may provide a defense against +Sambuddho's bandwidth-based link congestion attack~\cite{timing-esorics10}, +which relies on a sustained high-volume flow---if the attack ever gets +precise enough that we can try out our defense and compare. + +Here are some other constraints to keep in mind: + +\begin{itemize} +\item We need the Burst to be big enough to handle directory fetches (up +to a megabyte or two), or things will get really ugly because clients will +get slowed down while bootstrapping. + +\item Mike's bandwidth authority% +\footnote{\url{https://blog.torproject.org/blog/torflow-node-capacity-integrity-and-reliability-measurements-hotpets}} +measurement scripts% +\footnote{\url{https://gitweb.torproject.org/torflow.git/blob/HEAD:/NetworkScanners/BwAuthority/README.BwAuthorities}} +send traffic over relays to discover their actual capacity relative to +their advertised capacity. +The larger the claimed capacity, the more they send---up to several +megabytes for the fast relays. +If relays throttle these bandwidth tests, the directory authorities will +assign less traffic to them, making them appear to provide better +performance when in fact they're just being less useful to the network. +This is an example of a case where it would be easy to misinterpret your +results if you don't understand the rest of the Tor system. +A short-term workaround would be to turn the bandwidth authority Tors into +relays so they don't get throttled. + +\item You'll want to do experiments on an established relay with the Guard +flag, or it probably won't see many client connections except for clients +fetching directory updates. +Note that the longer a relay has had the Guard flag, the more users it +will have attracted; but after a month this effect falls off. + +\item Thinking from an economic perspective, it may turn out that the +performance for a given user doesn't actually get better if we throttle +the high-volume users, yet we've still improved things. +That is, if the available capacity of the Tor network improves and thus it +is supporting more users, we've made a better Tor network even if we +didn't make it faster for individual users. +Fortunately, this feedback effect shouldn't happen for short-term +experiments; but it's something to keep in mind for the long term. + +\item It sure would be nice if whatever rate limiting algorithm you +recommend is efficient to calculate and update over time. +Some relays are seeing tens of thousands of users, and can't afford much +processing time on each. +\end{itemize} + +\bibliography{adaptive-throttling-tor-clients-entry-guards} + +\end{document} + diff --git a/2010/adaptive-throttling-tor-clients-entry-guards/tortechrep.cls b/2010/adaptive-throttling-tor-clients-entry-guards/tortechrep.cls new file mode 120000 index 0000000..4c24db2 --- /dev/null +++ b/2010/adaptive-throttling-tor-clients-entry-guards/tortechrep.cls @@ -0,0 +1 @@ +../../tortechrep.cls \ No newline at end of file

1 0

[tech-reports/master] Make blocking report conform with tortechrep style.
by karsten＠torproject.org 31 Aug '12

31 Aug '12

commit 4b28ccd3a96f5eacc3a7e7d893960d711fc3ccbd Author: Karsten Loesing <karsten.loesing(a)gmx.net> Date: Thu Aug 30 11:38:04 2012 +0200 Make blocking report conform with tortechrep style. --- 2006/blocking/blocking.tex | 39 +++++++++++++++++---------------------- 2006/blocking/tortechrep.cls | 1 + 2006/blocking/usenixsubmit.cls | 7 ------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/2006/blocking/blocking.tex b/2006/blocking/blocking.tex index 3d46ef1..acbf90c 100644 --- a/2006/blocking/blocking.tex +++ b/2006/blocking/blocking.tex @@ -1,5 +1,6 @@ +\documentclass{tortechrep} %\documentclass{llncs} -\documentclass{usenixsubmit} +%-\documentclass{usenixsubmit} %\documentclass[twocolumn]{article} %usepackage{usenix} @@ -7,12 +8,6 @@ \usepackage{amsmath} \usepackage{epsfig} -\setlength{\textwidth}{6.0in} -\setlength{\textheight}{8.5in} -\setlength{\topmargin}{.5cm} -\setlength{\oddsidemargin}{1cm} -\setlength{\evensidemargin}{1cm} - \newenvironment{tightlist}{\begin{list}{$\bullet$}{ \setlength{\itemsep}{0mm} \setlength{\parsep}{0mm} @@ -24,16 +19,16 @@ \newcommand{\workingnote}[1]{} % The version that hides the note. %\newcommand{\workingnote}[1]{(**#1)} % makes the note visible. -\date{} -\title{Design of a blocking-resistant anonymity system\\Tor Project technical report, Nov 2006} +\date{November 2006} +\title{Design of a blocking-resistant anonymity system} %\author{Roger Dingledine\inst{1} \and Nick Mathewson\inst{1}} -\author{Roger Dingledine \\ The Tor Project \\ arma(a)torproject.org \and -Nick Mathewson \\ The Tor Project \\ nickm(a)torproject.org} +\author{Roger Dingledine and Nick Mathewson} +\contact{arma@torproject.org,nickm@torproject.org} +\reportid{2006-11-001} \begin{document} \maketitle -\pagestyle{plain} \begin{abstract} @@ -140,7 +135,7 @@ adversaries will be in different stages of the arms race at each location, so an address blocked in one locale can still be useful in others. We focus on an attacker with somewhat complex goals: -\begin{tightlist} +\begin{itemize} \item The attacker would like to restrict the flow of certain kinds of information, particularly when this information is seen as embarrassing to those in power (such as information about rights violations or corruption), @@ -185,12 +180,12 @@ We focus on an attacker with somewhat complex goals: (like the bulk of a newspaper's reporting) in order to censor other content distributed through the same channels (like that newspaper's coverage of the censored country). -\end{tightlist} +\end{itemize} We assume there are three main technical network attacks in use by censors currently~\cite{clayton:pet2006}: -\begin{tightlist} +\begin{itemize} \item Block a destination or type of traffic by automatically searching for certain strings or patterns in TCP packets. Offending packets can be dropped, or can trigger a response like closing the @@ -199,7 +194,7 @@ currently~\cite{clayton:pet2006}: firewall or other routing control point. \item Intercept DNS requests and give bogus responses for certain destination hostnames. -\end{tightlist} +\end{itemize} We assume the network firewall has limited CPU and memory per connection~\cite{clayton:pet2006}. Against an adversary who could carefully @@ -265,14 +260,14 @@ with particular emphasis to how we can take advantage of these properties for a blocking-resistance design. Tor aims to provide three security properties: -\begin{tightlist} -\item 1. A local network attacker can't learn, or influence, your +\begin{enumerate} +\item A local network attacker can't learn, or influence, your destination. -\item 2. No single router in the Tor network can link you to your +\item No single router in the Tor network can link you to your destination. -\item 3. The destination, or somebody watching the destination, +\item The destination, or somebody watching the destination, can't learn your location. -\end{tightlist} +\end{enumerate} For blocking-resistance, we care most clearly about the first property. But as the arms race progresses, the second property @@ -1411,7 +1406,7 @@ We also need to examine how entry guards fit in. Entry guards step in a circuit) help protect against certain attacks where the attacker runs a few Tor relays and waits for the user to choose these relays as the beginning and end of her -circuit\footnote{\url{http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ#EntryGuards}}. +circuit\footnote{\url{http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ\#EntryGuards}}. If the blocked user doesn't use the bridge's entry guards, then the bridge doesn't gain as much cover benefit. On the other hand, what design changes are needed for the blocked user to use the bridge's entry guards without diff --git a/2006/blocking/tortechrep.cls b/2006/blocking/tortechrep.cls new file mode 120000 index 0000000..4c24db2 --- /dev/null +++ b/2006/blocking/tortechrep.cls @@ -0,0 +1 @@ +../../tortechrep.cls \ No newline at end of file diff --git a/2006/blocking/usenixsubmit.cls b/2006/blocking/usenixsubmit.cls deleted file mode 100644 index 743ffcf..0000000 --- a/2006/blocking/usenixsubmit.cls +++ /dev/null @@ -1,7 +0,0 @@ -% Created by Anil Somayaji - -\ProvidesClass{usenixsubmit} -\LoadClass[11pt,letterpaper]{article} -\usepackage{times} -\usepackage[margin=1in]{geometry} -

1 0

[tech-reports/master] Add unchanged blocking tech report sources.
by karsten＠torproject.org 31 Aug '12

31 Aug '12

commit 59a46d6edd7bea6df8fb5cb9779054340ac1a408 Author: Karsten Loesing <karsten.loesing(a)gmx.net> Date: Thu Aug 30 11:26:49 2012 +0200 Add unchanged blocking tech report sources. --- 2006/blocking/.gitignore | 3 + 2006/blocking/blocking.tex | 1894 ++++++++++++++++++++++++++++++++++++++++ 2006/blocking/tor-design.bib | 1493 +++++++++++++++++++++++++++++++ 2006/blocking/usenixsubmit.cls | 7 + 4 files changed, 3397 insertions(+), 0 deletions(-) diff --git a/2006/blocking/.gitignore b/2006/blocking/.gitignore new file mode 100644 index 0000000..cca8809 --- /dev/null +++ b/2006/blocking/.gitignore @@ -0,0 +1,3 @@ +blocking.pdf +blocking-2006-11.pdf + diff --git a/2006/blocking/blocking.tex b/2006/blocking/blocking.tex new file mode 100644 index 0000000..3d46ef1 --- /dev/null +++ b/2006/blocking/blocking.tex @@ -0,0 +1,1894 @@ +%\documentclass{llncs} +\documentclass{usenixsubmit} +%\documentclass[twocolumn]{article} +%usepackage{usenix} + +\usepackage{url} +\usepackage{amsmath} +\usepackage{epsfig} + +\setlength{\textwidth}{6.0in} +\setlength{\textheight}{8.5in} +\setlength{\topmargin}{.5cm} +\setlength{\oddsidemargin}{1cm} +\setlength{\evensidemargin}{1cm} + +\newenvironment{tightlist}{\begin{list}{$\bullet$}{ + \setlength{\itemsep}{0mm} + \setlength{\parsep}{0mm} + % \setlength{\labelsep}{0mm} + % \setlength{\labelwidth}{0mm} + % \setlength{\topsep}{0mm} + }}{\end{list}} + +\newcommand{\workingnote}[1]{} % The version that hides the note. +%\newcommand{\workingnote}[1]{(**#1)} % makes the note visible. + +\date{} +\title{Design of a blocking-resistant anonymity system\\Tor Project technical report, Nov 2006} + +%\author{Roger Dingledine\inst{1} \and Nick Mathewson\inst{1}} +\author{Roger Dingledine \\ The Tor Project \\ arma(a)torproject.org \and +Nick Mathewson \\ The Tor Project \\ nickm(a)torproject.org} + +\begin{document} +\maketitle +\pagestyle{plain} + +\begin{abstract} + +Internet censorship is on the rise as websites around the world are +increasingly blocked by government-level firewalls. Although popular +anonymizing networks like Tor were originally designed to keep attackers from +tracing people's activities, many people are also using them to evade local +censorship. But if the censor simply denies access to the Tor network +itself, blocked users can no longer benefit from the security Tor offers. + +Here we describe a design that builds upon the current Tor network +to provide an anonymizing network that resists blocking +by government-level attackers. % We have implemented and deployed this +%design, and talk briefly about early use. + +\end{abstract} + +\section{Introduction} + +Anonymizing networks like Tor~\cite{tor-design} bounce traffic around a +network of encrypting relays. Unlike encryption, which hides only {\it what} +is said, these networks also aim to hide who is communicating with whom, which +users are using which websites, and so on. These systems have a +broad range of users, including ordinary citizens who want to avoid being +profiled for targeted advertisements, corporations who don't want to reveal +information to their competitors, and law enforcement and government +intelligence agencies who need to do operations on the Internet without being +noticed. + +Historical anonymity research has focused on an +attacker who monitors the user (call her Alice) and tries to discover her +activities, yet lets her reach any piece of the network. In more modern +threat models such as Tor's, the adversary is allowed to perform active +attacks such as modifying communications to trick Alice +into revealing her destination, or intercepting some connections +to run a man-in-the-middle attack. But these systems still assume that +Alice can eventually reach the anonymizing network. + +An increasing number of users are using the Tor software +less for its anonymity properties than for its censorship +resistance properties---if they use Tor to access Internet sites like +Wikipedia +and Blogspot, they are no longer affected by local censorship +and firewall rules. In fact, an informal user study +%(described in Appendix~\ref{app:geoip}) +showed that a few hundred thousand users people access the Tor network +each day, with about 20\% of them coming from China. %~\cite{something}. + +The current Tor design is easy to block if the attacker controls Alice's +connection to the Tor network---by blocking the directory authorities, +by blocking all the relay IP addresses in the directory, or by filtering +based on the network fingerprint of the Tor TLS handshake. Here we +describe an +extended design that builds upon the current Tor network to provide an +anonymizing +network that resists censorship as well as anonymity-breaking attacks. +In section~\ref{sec:adversary} we discuss our threat model---that is, +the assumptions we make about our adversary. Section~\ref{sec:current-tor} +describes the components of the current Tor design and how they can be +leveraged for a new blocking-resistant design. Section~\ref{sec:related} +explains the features and drawbacks of the currently deployed solutions. +In sections~\ref{sec:bridges} through~\ref{sec:discovery}, we explore the +components of our designs in detail. Section~\ref{sec:security} considers +security implications and Section~\ref{sec:reachability} presents other +issues with maintaining connectivity and sustainability for the design. +%Section~\ref{sec:future} speculates about future more complex designs, +Finally section~\ref{sec:conclusion} summarizes our next steps and +recommendations. + +% The other motivation is for places where we're concerned they will +% try to enumerate a list of Tor users. So even if they're not blocking +% the Tor network, it may be smart to not be visible as connecting to it. + +%And adding more different classes of users and goals to the Tor network +%improves the anonymity for all Tor users~\cite{econymics,usability:weis2006}. + +% Adding use classes for countering blocking as well as anonymity has +% benefits too. Should add something about how providing undetected +% access to Tor would facilitate people talking to, e.g., govt. authorities +% about threats to public safety etc. in an environment where Tor use +% is not otherwise widespread and would make one stand out. + +\section{Adversary assumptions} +\label{sec:adversary} + +To design an effective anti-censorship tool, we need a good model for the +goals and resources of the censors we are evading. Otherwise, we risk +spending our effort on keeping the adversaries from doing things they have no +interest in doing, and thwarting techniques they do not use. +The history of blocking-resistance designs is littered with conflicting +assumptions about what adversaries to expect and what problems are +in the critical path to a solution. Here we describe our best +understanding of the current situation around the world. + +In the traditional security style, we aim to defeat a strong +attacker---if we can defend against this attacker, we inherit protection +against weaker attackers as well. After all, we want a general design +that will work for citizens of China, Thailand, and other censored +countries; for +whistleblowers in firewalled corporate networks; and for people in +unanticipated oppressive situations. In fact, by designing with +a variety of adversaries in mind, we can take advantage of the fact that +adversaries will be in different stages of the arms race at each location, +so an address blocked in one locale can still be useful in others. +We focus on an attacker with somewhat complex goals: + +\begin{tightlist} +\item The attacker would like to restrict the flow of certain kinds of + information, particularly when this information is seen as embarrassing to + those in power (such as information about rights violations or corruption), + or when it enables or encourages others to oppose them effectively (such as + information about opposition movements or sites that are used to organize + protests). +\item As a second-order effect, censors aim to chill citizens' behavior by + creating an impression that their online activities are monitored. +\item In some cases, censors make a token attempt to block a few sites for + obscenity, blasphemy, and so on, but their efforts here are mainly for + show. In other cases, they really do try hard to block such content. +\item Complete blocking (where nobody at all can ever download censored + content) is not a + goal. Attackers typically recognize that perfect censorship is not only + impossible, it is unnecessary: if ``undesirable'' information is known only + to a small few, further censoring efforts can be focused elsewhere. +\item Similarly, the censors do not attempt to shut down or block {\it + every} anti-censorship tool---merely the tools that are popular and + effective (because these tools impede the censors' information restriction + goals) and those tools that are highly visible (thus making the censors + look ineffectual to their citizens and their bosses). +\item Reprisal against {\it most} passive consumers of {\it most} kinds of + blocked information is also not a goal, given the broadness of most + censorship regimes. This seems borne out by fact.\footnote{So far in places + like China, the authorities mainly go after people who publish materials + and coordinate organized movements~\cite{mackinnon-personal}. + If they find that a + user happens to be reading a site that should be blocked, the typical + response is simply to block the site. Of course, even with an encrypted + connection, the adversary may be able to distinguish readers from + publishers by observing whether Alice is mostly downloading bytes or mostly + uploading them---we discuss this issue more in + Section~\ref{subsec:upload-padding}.} +\item Producers and distributors of targeted information are in much + greater danger than consumers; the attacker would like to not only block + their work, but identify them for reprisal. +\item The censors (or their governments) would like to have a working, useful + Internet. There are economic, political, and social factors that prevent + them from ``censoring'' the Internet by outlawing it entirely, or by + blocking access to all but a tiny list of sites. + Nevertheless, the censors {\it are} willing to block innocuous content + (like the bulk of a newspaper's reporting) in order to censor other content + distributed through the same channels (like that newspaper's coverage of + the censored country). +\end{tightlist} + +We assume there are three main technical network attacks in use by censors +currently~\cite{clayton:pet2006}: + +\begin{tightlist} +\item Block a destination or type of traffic by automatically searching for + certain strings or patterns in TCP packets. Offending packets can be + dropped, or can trigger a response like closing the + connection. +\item Block certain IP addresses or destination ports at a + firewall or other routing control point. +\item Intercept DNS requests and give bogus responses for certain + destination hostnames. +\end{tightlist} + +We assume the network firewall has limited CPU and memory per +connection~\cite{clayton:pet2006}. Against an adversary who could carefully +examine the contents of every packet and correlate the packets in every +stream on the network, we would need some stronger mechanism such as +steganography, which introduces its own +problems~\cite{active-wardens,tcpstego}. But we make a ``weak +steganography'' assumption here: to remain unblocked, it is necessary to +remain unobservable only by computational resources on par with a modern +router, firewall, proxy, or IDS. + +We assume that while various different regimes can coordinate and share +notes, there will be a time lag between one attacker learning how to overcome +a facet of our design and other attackers picking it up. (The most common +vector of transmission seems to be commercial providers of censorship tools: +once a provider adds a feature to meet one country's needs or requests, the +feature is available to all of the provider's customers.) Conversely, we +assume that insider attacks become a higher risk only after the early stages +of network development, once the system has reached a certain level of +success and visibility. + +We do not assume that government-level attackers are always uniform +across the country. For example, users of different ISPs in China +experience different censorship policies and mechanisms. %~\cite{china-ccs07}. +%there is no single centralized place in China +%that coordinates its specific censorship decisions and steps. + +We assume that the attacker may be able to use political and economic +resources to secure the cooperation of extraterritorial or multinational +corporations and entities in investigating information sources. +For example, the censors can threaten the service providers of +troublesome blogs with economic reprisals if they do not reveal the +authors' identities. + +We assume that our users have control over their hardware and +software---they don't have any spyware installed, there are no +cameras watching their screens, etc. Unfortunately, in many situations +these threats are real~\cite{zuckerman-threatmodels}; yet +software-based security systems like ours are poorly equipped to handle +a user who is entirely observed and controlled by the adversary. See +Section~\ref{subsec:cafes-and-livecds} for more discussion of what little +we can do about this issue. + +Similarly, we assume that the user will be able to fetch a genuine +version of Tor, rather than one supplied by the adversary; see +Section~\ref{subsec:trust-chain} for discussion on helping the user +confirm that he has a genuine version and that he can connect to the +real Tor network. + +\section{Adapting the current Tor design to anti-censorship} +\label{sec:current-tor} + +Tor is popular and sees a lot of use---it's the largest anonymity +network of its kind, and has +attracted more than 1500 volunteer-operated routers from around the +world. Tor protects each user by routing their traffic through a multiply +encrypted ``circuit'' built of a few randomly selected relay, each of which +can remove only a single layer of encryption. Each relay sees only the step +before it and the step after it in the circuit, and so no single relay can +learn the connection between a user and her chosen communication partners. +In this section, we examine some of the reasons why Tor has become popular, +with particular emphasis to how we can take advantage of these properties +for a blocking-resistance design. + +Tor aims to provide three security properties: +\begin{tightlist} +\item 1. A local network attacker can't learn, or influence, your +destination. +\item 2. No single router in the Tor network can link you to your +destination. +\item 3. The destination, or somebody watching the destination, +can't learn your location. +\end{tightlist} + +For blocking-resistance, we care most clearly about the first +property. But as the arms race progresses, the second property +will become important---for example, to discourage an adversary +from volunteering a relay in order to learn that Alice is reading +or posting to certain websites. The third property helps keep users safe from +collaborating websites: consider websites and other Internet services +that have been pressured +recently into revealing the identity of bloggers +%~\cite{arrested-bloggers} +or treating clients differently depending on their network +location~\cite{netauth}. +%~\cite{google-geolocation}. + +The Tor design provides other features as well that are not typically +present in manual or ad hoc circumvention techniques. + +First, Tor has a well-analyzed and well-understood way to distribute +information about relay. +Tor directory authorities automatically aggregate, test, +and publish signed summaries of the available Tor routers. Tor clients +can fetch these summaries to learn which routers are available and +which routers are suitable for their needs. Directory information is cached +throughout the Tor network, so once clients have bootstrapped they never +need to interact with the authorities directly. (To tolerate a minority +of compromised directory authorities, we use a threshold trust scheme--- +see Section~\ref{subsec:trust-chain} for details.) + +Second, the list of directory authorities is not hard-wired. +Clients use the default authorities if no others are specified, +but it's easy to start a separate (or even overlapping) Tor network just +by running a different set of authorities and convincing users to prefer +a modified client. For example, we could launch a distinct Tor network +inside China; some users could even use an aggregate network made up of +both the main network and the China network. (But we should not be too +quick to create other Tor networks---part of Tor's anonymity comes from +users behaving like other users, and there are many unsolved anonymity +questions if different users know about different pieces of the network.) + +Third, in addition to automatically learning from the chosen directories +which Tor routers are available and working, Tor takes care of building +paths through the network and rebuilding them as needed. So the user +never has to know how paths are chosen, never has to manually pick +working proxies, and so on. More generally, at its core the Tor protocol +is simply a tool that can build paths given a set of routers. Tor is +quite flexible about how it learns about the routers and how it chooses +the paths. Harvard's Blossom project~\cite{blossom-thesis} makes this +flexibility more concrete: Blossom makes use of Tor not for its security +properties but for its reachability properties. It runs a separate set +of directory authorities, its own set of Tor routers (called the Blossom +network), and uses Tor's flexible path-building to let users view Internet +resources from any point in the Blossom network. + +Fourth, Tor separates the role of \emph{internal relay} from the +role of \emph{exit relay}. That is, some volunteers choose just to relay +traffic between Tor users and Tor routers, and others choose to also allow +connections to external Internet resources. Because we don't force all +volunteers to play both roles, we end up with more relays. This increased +diversity in turn is what gives Tor its security: the more options the +user has for her first hop, and the more options she has for her last hop, +the less likely it is that a given attacker will be watching both ends +of her circuit~\cite{tor-design}. As a bonus, because our design attracts +more internal relays that want to help out but don't want to deal with +being an exit relay, we end up providing more options for the first +hop---the one most critical to being able to reach the Tor network. + +Fifth, Tor is sustainable. Zero-Knowledge Systems offered the commercial +but now defunct Freedom Network~\cite{freedom21-security}, a design with +security comparable to Tor's, but its funding model relied on collecting +money from users to pay relay operators. Modern commercial proxy systems +similarly +need to keep collecting money to support their infrastructure. On the +other hand, Tor has built a self-sustaining community of volunteers who +donate their time and resources. This community trust is rooted in Tor's +open design: we tell the world exactly how Tor works, and we provide all +the source code. Users can decide for themselves, or pay any security +expert to decide, whether it is safe to use. Further, Tor's modularity +as described above, along with its open license, mean that its impact +will continue to grow. + +Sixth, Tor has an established user base of hundreds of +thousands of people from around the world. This diversity of +users contributes to sustainability as above: Tor is used by +ordinary citizens, activists, corporations, law enforcement, and +even government and military users, +%\footnote{\url{https://www.torproject.org/overview}} +and they can +only achieve their security goals by blending together in the same +network~\cite{econymics,usability:weis2006}. This user base also provides +something else: hundreds of thousands of different and often-changing +addresses that we can leverage for our blocking-resistance design. + +Finally and perhaps most importantly, Tor provides anonymity and prevents any +single relay from linking users to their communication partners. Despite +initial appearances, {\it distributed-trust anonymity is critical for +anti-censorship efforts}. If any single relay can expose dissident bloggers +or compile a list of users' behavior, the censors can profitably compromise +that relay's operator, perhaps by applying economic pressure to their +employers, +breaking into their computer, pressuring their family (if they have relatives +in the censored area), or so on. Furthermore, in designs where any relay can +expose its users, the censors can spread suspicion that they are running some +of the relays and use this belief to chill use of the network. + +We discuss and adapt these components further in +Section~\ref{sec:bridges}. But first we examine the strengths and +weaknesses of other blocking-resistance approaches, so we can expand +our repertoire of building blocks and ideas. + +\section{Current proxy solutions} +\label{sec:related} + +Relay-based blocking-resistance schemes generally have two main +components: a relay component and a discovery component. The relay part +encompasses the process of establishing a connection, sending traffic +back and forth, and so on---everything that's done once the user knows +where she's going to connect. Discovery is the step before that: the +process of finding one or more usable relays. + +For example, we can divide the pieces of Tor in the previous section +into the process of building paths and sending +traffic over them (relay) and the process of learning from the directory +authorities about what routers are available (discovery). With this +distinction +in mind, we now examine several categories of relay-based schemes. + +\subsection{Centrally-controlled shared proxies} + +Existing commercial anonymity solutions (like Anonymizer.com) are based +on a set of single-hop proxies. In these systems, each user connects to +a single proxy, which then relays traffic between the user and her +destination. These public proxy +systems are typically characterized by two features: they control and +operate the proxies centrally, and many different users get assigned +to each proxy. + +In terms of the relay component, single proxies provide weak security +compared to systems that distribute trust over multiple relays, since a +compromised proxy can trivially observe all of its users' actions, and +an eavesdropper only needs to watch a single proxy to perform timing +correlation attacks against all its users' traffic and thus learn where +everyone is connecting. Worse, all users +need to trust the proxy company to have good security itself as well as +to not reveal user activities. + +On the other hand, single-hop proxies are easier to deploy, and they +can provide better performance than distributed-trust designs like Tor, +since traffic only goes through one relay. They're also more convenient +from the user's perspective---since users entirely trust the proxy, +they can just use their web browser directly. + +Whether public proxy schemes are more or less scalable than Tor is +still up for debate: commercial anonymity systems can use some of their +revenue to provision more bandwidth as they grow, whereas volunteer-based +anonymity systems can attract thousands of fast relays to spread the load. + +The discovery piece can take several forms. Most commercial anonymous +proxies have one or a handful of commonly known websites, and their users +log in to those websites and relay their traffic through them. When +these websites get blocked (generally soon after the company becomes +popular), if the company cares about users in the blocked areas, they +start renting lots of disparate IP addresses and rotating through them +as they get blocked. They notify their users of new addresses (by email, +for example). It's an arms race, since attackers can sign up to receive the +email too, but operators have one nice trick available to them: because they +have a list of paying subscribers, they can notify certain subscribers +about updates earlier than others. + +Access control systems on the proxy let them provide service only to +users with certain characteristics, such as paying customers or people +from certain IP address ranges. + +Discovery in the face of a government-level firewall is a complex and +unsolved +topic, and we're stuck in this same arms race ourselves; we explore it +in more detail in Section~\ref{sec:discovery}. But first we examine the +other end of the spectrum---getting volunteers to run the proxies, +and telling only a few people about each proxy. + +\subsection{Independent personal proxies} + +Personal proxies such as Circumventor~\cite{circumventor} and +CGIProxy~\cite{cgiproxy} use the same technology as the public ones as +far as the relay component goes, but they use a different strategy for +discovery. Rather than managing a few centralized proxies and constantly +getting new addresses for them as the old addresses are blocked, they +aim to have a large number of entirely independent proxies, each managing +its own (much smaller) set of users. + +As the Circumventor site explains, ``You don't +actually install the Circumventor \emph{on} the computer that is blocked +from accessing Web sites. You, or a friend of yours, has to install the +Circumventor on some \emph{other} machine which is not censored.'' + +This tactic has great advantages in terms of blocking-resistance---recall +our assumption in Section~\ref{sec:adversary} that the attention +a system attracts from the attacker is proportional to its number of +users and level of publicity. If each proxy only has a few users, and +there is no central list of proxies, most of them will never get noticed by +the censors. + +On the other hand, there's a huge scalability question that so far has +prevented these schemes from being widely useful: how does the fellow +in China find a person in Ohio who will run a Circumventor for him? In +some cases he may know and trust some people on the outside, but in many +cases he's just out of luck. Just as hard, how does a new volunteer in +Ohio find a person in China who needs it? + +% another key feature of a proxy run by your uncle is that you +% self-censor, so you're unlikely to bring abuse complaints onto +% your uncle. self-censoring clearly has a downside too, though. + +This challenge leads to a hybrid design---centrally-distributed +personal proxies---which we will investigate in more detail in +Section~\ref{sec:discovery}. + +\subsection{Open proxies} + +Yet another currently used approach to bypassing firewalls is to locate +open and misconfigured proxies on the Internet. A quick Google search +for ``open proxy list'' yields a wide variety of freely available lists +of HTTP, HTTPS, and SOCKS proxies. Many small companies have sprung up +providing more refined lists to paying customers. + +There are some downsides to using these open proxies though. First, +the proxies are of widely varying quality in terms of bandwidth and +stability, and many of them are entirely unreachable. Second, unlike +networks of volunteers like Tor, the legality of routing traffic through +these proxies is questionable: it's widely believed that most of them +don't realize what they're offering, and probably wouldn't allow it if +they realized. Third, in many cases the connection to the proxy is +unencrypted, so firewalls that filter based on keywords in IP packets +will not be hindered. Fourth, in many countries (including China), the +firewall authorities hunt for open proxies as well, to preemptively +block them. And last, many users are suspicious that some +open proxies are a little \emph{too} convenient: are they run by the +adversary, in which case they get to monitor all the user's requests +just as single-hop proxies can? + +A distributed-trust design like Tor resolves each of these issues for +the relay component, but a constantly changing set of thousands of open +relays is clearly a useful idea for a discovery component. For example, +users might be able to make use of these proxies to bootstrap their +first introduction into the Tor network. + +\subsection{Blocking resistance and JAP} + +K\"{o}psell and Hilling's Blocking Resistance +design~\cite{koepsell:wpes2004} is probably +the closest related work, and is the starting point for the design in this +paper. In this design, the JAP anonymity system~\cite{web-mix} is used +as a base instead of Tor. Volunteers operate a large number of access +points that relay traffic to the core JAP +network, which in turn anonymizes users' traffic. The software to run these +relays is, as in our design, included in the JAP client software and enabled +only when the user decides to enable it. Discovery is handled with a +CAPTCHA-based mechanism; users prove that they aren't an automated process, +and are given the address of an access point. (The problem of a determined +attacker with enough manpower to launch many requests and enumerate all the +access points is not considered in depth.) There is also some suggestion +that information about access points could spread through existing social +networks. + +\subsection{Infranet} + +The Infranet design~\cite{infranet} uses one-hop relays to deliver web +content, but disguises its communications as ordinary HTTP traffic. Requests +are split into multiple requests for URLs on the relay, which then encodes +its responses in the content it returns. The relay needs to be an actual +website with plausible content and a number of URLs which the user might want +to access---if the Infranet software produced its own cover content, it would +be far easier for censors to identify. To keep the censors from noticing +that cover content changes depending on what data is embedded, Infranet needs +the cover content to have an innocuous reason for changing frequently: the +paper recommends watermarked images and webcams. + +The attacker and relay operators in Infranet's threat model are significantly +different than in ours. Unlike our attacker, Infranet's censor can't be +bypassed with encrypted traffic (presumably because the censor blocks +encrypted traffic, or at least considers it suspicious), and has more +computational resources to devote to each connection than ours (so it can +notice subtle patterns over time). Unlike our bridge operators, Infranet's +operators (and users) have more bandwidth to spare; the overhead in typical +steganography schemes is far higher than Tor's. + +The Infranet design does not include a discovery element. Discovery, +however, is a critical point: if whatever mechanism allows users to learn +about relays also allows the censor to do so, he can trivially discover and +block their addresses, even if the steganography would prevent mere traffic +observation from revealing the relays' addresses. + +\subsection{RST-evasion and other packet-level tricks} + +In their analysis of China's firewall's content-based blocking, Clayton, +Murdoch and Watson discovered that rather than blocking all packets in a TCP +streams once a forbidden word was noticed, the firewall was simply forging +RST packets to make the communicating parties believe that the connection was +closed~\cite{clayton:pet2006}. They proposed altering operating systems +to ignore forged RST packets. This approach might work in some cases, but +in practice it appears that many firewalls start filtering by IP address +once a sufficient number of RST packets have been sent. + +Other packet-level responses to filtering include splitting +sensitive words across multiple TCP packets, so that the censors' +firewalls can't notice them without performing expensive stream +reconstruction~\cite{ptacek98insertion}. This technique relies on the +same insight as our weak steganography assumption. + +%\subsection{Internal caching networks} + +%Freenet~\cite{freenet-pets00} is an anonymous peer-to-peer data store. +%Analyzing Freenet's security can be difficult, as its design is in flux as +%new discovery and routing mechanisms are proposed, and no complete +%specification has (to our knowledge) been written. Freenet servers relay +%requests for specific content (indexed by a digest of the content) +%``toward'' the server that hosts it, and then cache the content as it +%follows the same path back to +%the requesting user. If Freenet's routing mechanism is successful in +%allowing nodes to learn about each other and route correctly even as some +%node-to-node links are blocked by firewalls, then users inside censored areas +%can ask a local Freenet server for a piece of content, and get an answer +%without having to connect out of the country at all. Of course, operators of +%servers inside the censored area can still be targeted, and the addresses of +%external servers can still be blocked. + +%\subsection{Skype} + +%The popular Skype voice-over-IP software uses multiple techniques to tolerate +%restrictive networks, some of which allow it to continue operating in the +%presence of censorship. By switching ports and using encryption, Skype +%attempts to resist trivial blocking and content filtering. Even if no +%encryption were used, it would still be expensive to scan all voice +%traffic for sensitive words. Also, most current keyloggers are unable to +%store voice traffic. Nevertheless, Skype can still be blocked, especially at +%its central login server. + +%*sjmurdoch* "we consider the login server to be the only central component in +%the Skype p2p network." +%*sjmurdoch* http://www1.cs.columbia.edu/~salman/publications/skype1_4.pdf +%-> *sjmurdoch* ok. what is the login server's role? +%-> *sjmurdoch* and do you need to reach it directly to use skype? +%*sjmurdoch* It checks the username and password +%*sjmurdoch* It is necessary in the current implementation, but I don't know if +%it is a fundemental limitation of the architecture + +\subsection{Tor itself} + +And last, we include Tor itself in the list of current solutions +to firewalls. Tens of thousands of people use Tor from countries that +routinely filter their Internet. Tor's website has been blocked in most +of them. But why hasn't the Tor network been blocked yet? + +We have several theories. The first is the most straightforward: tens of +thousands of people are simply too few to matter. It may help that Tor is +perceived to be for experts only, and thus not worth attention yet. The +more subtle variant on this theory is that we've positioned Tor in the +public eye as a tool for retaining civil liberties in more free countries, +so perhaps blocking authorities don't view it as a threat. (We revisit +this idea when we consider whether and how to publicize a Tor variant +that improves blocking-resistance---see Section~\ref{subsec:publicity} +for more discussion.) + +The broader explanation is that the maintenance of most government-level +filters is aimed at stopping widespread information flow and appearing to be +in control, not by the impossible goal of blocking all possible ways to bypass +censorship. Censors realize that there will always +be ways for a few people to get around the firewall, and as long as Tor +has not publically threatened their control, they see no urgent need to +block it yet. + +We should recognize that we're \emph{already} in the arms race. These +constraints can give us insight into the priorities and capabilities of +our various attackers. + +\section{The relay component of our blocking-resistant design} +\label{sec:bridges} + +Section~\ref{sec:current-tor} describes many reasons why Tor is +well-suited as a building block in our context, but several changes will +allow the design to resist blocking better. The most critical changes are +to get more relay addresses, and to distribute them to users differently. + +%We need to address three problems: +%- adapting the relay component of Tor so it resists blocking better. +%- Discovery. +%- Tor's network fingerprint. + +%Here we describe the new pieces we need to add to the current Tor design. + +\subsection{Bridge relays} + +Today, Tor relays operate on a few thousand distinct IP addresses; +an adversary +could enumerate and block them all with little trouble. To provide a +means of ingress to the network, we need a larger set of entry points, most +of which an adversary won't be able to enumerate easily. Fortunately, we +have such a set: the Tor users. + +Hundreds of thousands of people around the world use Tor. We can leverage +our already self-selected user base to produce a list of thousands of +frequently-changing IP addresses. Specifically, we can give them a little +button in the GUI that says ``Tor for Freedom'', and users who click +the button will turn into \emph{bridge relays} (or just \emph{bridges} +for short). They can rate limit relayed connections to 10 KB/s (almost +nothing for a broadband user in a free country, but plenty for a user +who otherwise has no access at all), and since they are just relaying +bytes back and forth between blocked users and the main Tor network, they +won't need to make any external connections to Internet sites. Because +of this separation of roles, and because we're making use of software +that the volunteers have already installed for their own use, we expect +our scheme to attract and maintain more volunteers than previous schemes. + +As usual, there are new anonymity and security implications from running a +bridge relay, particularly from letting people relay traffic through your +Tor client; but we leave this discussion for Section~\ref{sec:security}. + +%...need to outline instructions for a Tor config that will publish +%to an alternate directory authority, and for controller commands +%that will do this cleanly. + +\subsection{The bridge directory authority} + +How do the bridge relays advertise their existence to the world? We +introduce a second new component of the design: a specialized directory +authority that aggregates and tracks bridges. Bridge relays periodically +publish relay descriptors (summaries of their keys, locations, etc, +signed by their long-term identity key), just like the relays in the +``main'' Tor network, but in this case they publish them only to the +bridge directory authorities. + +The main difference between bridge authorities and the directory +authorities for the main Tor network is that the main authorities provide +a list of every known relay, but the bridge authorities only give +out a relay descriptor if you already know its identity key. That is, +you can keep up-to-date on a bridge's location and other information +once you know about it, but you can't just grab a list of all the bridges. + +The identity key, IP address, and directory port for each bridge +authority ship by default with the Tor software, so the bridge relays +can be confident they're publishing to the right location, and the +blocked users can establish an encrypted authenticated channel. See +Section~\ref{subsec:trust-chain} for more discussion of the public key +infrastructure and trust chain. + +Bridges use Tor to publish their descriptors privately and securely, +so even an attacker monitoring the bridge directory authority's network +can't make a list of all the addresses contacting the authority. +Bridges may publish to only a subset of the +authorities, to limit the potential impact of an authority compromise. + + +%\subsection{A simple matter of engineering} +% +%Although we've described bridges and bridge authorities in simple terms +%above, some design modifications and features are needed in the Tor +%codebase to add them. We describe the four main changes here. +% +%Firstly, we need to get smarter about rate limiting: +%Bandwidth classes +% +%Secondly, while users can in fact configure which directory authorities +%they use, we need to add a new type of directory authority and teach +%bridges to fetch directory information from the main authorities while +%publishing relay descriptors to the bridge authorities. We're most of +%the way there, since we can already specify attributes for directory +%authorities: +%add a separate flag named ``blocking''. +% +%Thirdly, need to build paths using bridges as the first +%hop. One more hole in the non-clique assumption. +% +%Lastly, since bridge authorities don't answer full network statuses, +%we need to add a new way for users to learn the current status for a +%single relay or a small set of relays---to answer such questions as +%``is it running?'' or ``is it behaving correctly?'' We describe in +%Section~\ref{subsec:enclave-dirs} a way for the bridge authority to +%publish this information without resorting to signing each answer +%individually. + +\subsection{Putting them together} +\label{subsec:relay-together} + +If a blocked user knows the identity keys of a set of bridge relays, and +he has correct address information for at least one of them, he can use +that one to make a secure connection to the bridge authority and update +his knowledge about the other bridge relays. He can also use it to make +secure connections to the main Tor network and directory authorities, so he +can build circuits and connect to the rest of the Internet. All of these +updates happen in the background: from the blocked user's perspective, +he just accesses the Internet via his Tor client like always. + +So now we've reduced the problem from how to circumvent the firewall +for all transactions (and how to know that the pages you get have not +been modified by the local attacker) to how to learn about a working +bridge relay. + +There's another catch though. We need to make sure that the network +traffic we generate by simply connecting to a bridge relay doesn't stand +out too much. + +%The following section describes ways to bootstrap knowledge of your first +%bridge relay, and ways to maintain connectivity once you know a few +%bridge relays. + +% (See Section~\ref{subsec:first-bridge} for a discussion +%of exactly what information is sufficient to characterize a bridge relay.) + + + +\section{Hiding Tor's network fingerprint} +\label{sec:network-fingerprint} +\label{subsec:enclave-dirs} + +Currently, Tor uses two protocols for its network communications. The +main protocol uses TLS for encrypted and authenticated communication +between Tor instances. The second protocol is standard HTTP, used for +fetching directory information. All Tor relays listen on their ``ORPort'' +for TLS connections, and some of them opt to listen on their ``DirPort'' +as well, to serve directory information. Tor relays choose whatever port +numbers they like; the relay descriptor they publish to the directory +tells users where to connect. + +One format for communicating address information about a bridge relay is +its IP address and DirPort. From there, the user can ask the bridge's +directory cache for an up-to-date copy of its relay descriptor, and +learn its current circuit keys, its ORPort, and so on. + +However, connecting directly to the directory cache involves a plaintext +HTTP request. A censor could create a network fingerprint (known as a +\emph{signature} in the intrusion detection field) for the request +and/or its response, thus preventing these connections. To resolve this +vulnerability, we've modified the Tor protocol so that users can connect +to the directory cache via the main Tor port---they establish a TLS +connection with the bridge as normal, and then send a special ``begindir'' +relay command to establish an internal connection to its directory cache. + +Therefore a better way to summarize a bridge's address is by its IP +address and ORPort, so all communications between the client and the +bridge will use ordinary TLS. But there are other details that need +more investigation. + +What port should bridges pick for their ORPort? We currently recommend +that they listen on port 443 (the default HTTPS port) if they want to +be most useful, because clients behind standard firewalls will have +the best chance to reach them. Is this the best choice in all cases, +or should we encourage some fraction of them pick random ports, or other +ports commonly permitted through firewalls like 53 (DNS) or 110 +(POP)? Or perhaps we should use other ports where TLS traffic is +expected, like 993 (IMAPS) or 995 (POP3S). We need more research on our +potential users, and their current and anticipated firewall restrictions. + +Furthermore, we need to look at the specifics of Tor's TLS handshake. +Right now Tor uses some predictable strings in its TLS handshakes. For +example, it sets the X.509 organizationName field to ``Tor'', and it puts +the Tor relay's nickname in the certificate's commonName field. We +should tweak the handshake protocol so it doesn't rely on any unusual details +in the certificate, yet it remains secure; the certificate itself +should be made to resemble an ordinary HTTPS certificate. We should also try +to make our advertised cipher-suites closer to what an ordinary web server +would support. + +Tor's TLS handshake uses two-certificate chains: one certificate +contains the self-signed identity key for +the router, and the second contains a current TLS key, signed by the +identity key. We use these to authenticate that we're talking to the right +router, and to limit the impact of TLS-key exposure. Most (though far from +all) consumer-oriented HTTPS services provide only a single certificate. +These extra certificates may help identify Tor's TLS handshake; instead, +bridges should consider using only a single TLS key certificate signed by +their identity key, and providing the full value of the identity key in an +early handshake cell. More significantly, Tor currently has all clients +present certificates, so that clients are harder to distinguish from relays. +But in a blocking-resistance environment, clients should not present +certificates at all. + +Last, what if the adversary starts observing the network traffic even +more closely? Even if our TLS handshake looks innocent, our traffic timing +and volume still look different than a user making a secure web connection +to his bank. The same techniques used in the growing trend to build tools +to recognize encrypted Bittorrent traffic +%~\cite{bt-traffic-shaping} +could be used to identify Tor communication and recognize bridge +relays. Rather than trying to look like encrypted web traffic, we may be +better off trying to blend with some other encrypted network protocol. The +first step is to compare typical network behavior for a Tor client to +typical network behavior for various other protocols. This statistical +cat-and-mouse game is made more complex by the fact that Tor transports a +variety of protocols, and we'll want to automatically handle web browsing +differently from, say, instant messaging. + +% Tor cells are 512 bytes each. So TLS records will be roughly +% multiples of this size? How bad is this? -RD +% Look at ``Inferring the Source of Encrypted HTTP Connections'' +% by Marc Liberatore and Brian Neil Levine (CCS 2006) +% They substantially flesh out the numbers for the web fingerprinting +% attack. -PS +% Yes, but I meant detecting the fingerprint of Tor traffic itself, not +% learning what websites we're going to. I wouldn't be surprised to +% learn that these are related problems, but it's not obvious to me. -RD + +\subsection{Identity keys as part of addressing information} +\label{subsec:id-address} + +We have described a way for the blocked user to bootstrap into the +network once he knows the IP address and ORPort of a bridge. What about +local spoofing attacks? That is, since we never learned an identity +key fingerprint for the bridge, a local attacker could intercept our +connection and pretend to be the bridge we had in mind. It turns out +that giving false information isn't that bad---since the Tor client +ships with trusted keys for the bridge directory authority and the Tor +network directory authorities, the user can learn whether he's being +given a real connection to the bridge authorities or not. (After all, +if the adversary intercepts every connection the user makes and gives +him a bad connection each time, there's nothing we can do.) + +What about anonymity-breaking attacks from observing traffic, if the +blocked user doesn't start out knowing the identity key of his intended +bridge? The vulnerabilities aren't so bad in this case either---the +adversary could do similar attacks just by monitoring the network +traffic. +% cue paper by steven and george + +Once the Tor client has fetched the bridge's relay descriptor, it should +remember the identity key fingerprint for that bridge relay. Thus if +the bridge relay moves to a new IP address, the client can query the +bridge directory authority to look up a fresh relay descriptor using +this fingerprint. + +So we've shown that it's \emph{possible} to bootstrap into the network +just by learning the IP address and ORPort of a bridge, but are there +situations where it's more convenient or more secure to learn the bridge's +identity fingerprint as well as instead, while bootstrapping? We keep +that question in mind as we next investigate bootstrapping and discovery. + +\section{Discovering working bridge relays} +\label{sec:discovery} + +Tor's modular design means that we can develop a better relay component +independently of developing the discovery component. This modularity's +great promise is that we can pick any discovery approach we like; but the +unfortunate fact is that we have no magic bullet for discovery. We're +in the same arms race as all the other designs we described in +Section~\ref{sec:related}. + +In this section we describe a variety of approaches to adding discovery +components for our design. + +\subsection{Bootstrapping: finding your first bridge.} +\label{subsec:first-bridge} + +In Section~\ref{subsec:relay-together}, we showed that a user who knows +a working bridge address can use it to reach the bridge authority and +to stay connected to the Tor network. But how do new users reach the +bridge authority in the first place? After all, the bridge authority +will be one of the first addresses that a censor blocks. + +First, we should recognize that most government firewalls are not +perfect. That is, they may allow connections to Google cache or some +open proxy servers, or they let file-sharing traffic, Skype, instant +messaging, or World-of-Warcraft connections through. Different users will +have different mechanisms for bypassing the firewall initially. Second, +we should remember that most people don't operate in a vacuum; users will +hopefully know other people who are in other situations or have other +resources available. In the rest of this section we develop a toolkit +of different options and mechanisms, so that we can enable users in a +diverse set of contexts to bootstrap into the system. + +(For users who can't use any of these techniques, hopefully they know +a friend who can---for example, perhaps the friend already knows some +bridge relay addresses. If they can't get around it at all, then we +can't help them---they should go meet more people or learn more about +the technology running the firewall in their area.) + +By deploying all the schemes in the toolkit at once, we let bridges and +blocked users employ the discovery approach that is most appropriate +for their situation. + +\subsection{Independent bridges, no central discovery} + +The first design is simply to have no centralized discovery component at +all. Volunteers run bridges, and we assume they have some blocked users +in mind and communicate their address information to them out-of-band +(for example, through Gmail). This design allows for small personal +bridges that have only one or a handful of users in mind, but it can +also support an entire community of users. For example, Citizen Lab's +upcoming Psiphon single-hop proxy tool~\cite{psiphon} plans to use this +\emph{social network} approach as its discovery component. + +There are several ways to do bootstrapping in this design. In the simple +case, the operator of the bridge informs each chosen user about his +bridge's address information and/or keys. A different approach involves +blocked users introducing new blocked users to the bridges they know. +That is, somebody in the blocked area can pass along a bridge's address to +somebody else they trust. This scheme brings in appealing but complex game +theoretic properties: the blocked user making the decision has an incentive +only to delegate to trustworthy people, since an adversary who learns +the bridge's address and filters it makes it unavailable for both of them. +Also, delegating known bridges to members of your social network can be +dangerous: an the adversary who can learn who knows which bridges may +be able to reconstruct the social network. + +Note that a central set of bridge directory authorities can still be +compatible with a decentralized discovery process. That is, how users +first learn about bridges is entirely up to the bridges, but the process +of fetching up-to-date descriptors for them can still proceed as described +in Section~\ref{sec:bridges}. Of course, creating a central place that +knows about all the bridges may not be smart, especially if every other +piece of the system is decentralized. Further, if a user only knows +about one bridge and he loses track of it, it may be quite a hassle to +reach the bridge authority. We address these concerns next. + +\subsection{Families of bridges, no central discovery} + +Because the blocked users are running our software too, we have many +opportunities to improve usability or robustness. Our second design builds +on the first by encouraging volunteers to run several bridges at once +(or coordinate with other bridge volunteers), such that some +of the bridges are likely to be available at any given time. + +The blocked user's Tor client would periodically fetch an updated set of +recommended bridges from any of the working bridges. Now the client can +learn new additions to the bridge pool, and can expire abandoned bridges +or bridges that the adversary has blocked, without the user ever needing +to care. To simplify maintenance of the community's bridge pool, each +community could run its own bridge directory authority---reachable via +the available bridges, and also mirrored at each bridge. + +\subsection{Public bridges with central discovery} + +What about people who want to volunteer as bridges but don't know any +suitable blocked users? What about people who are blocked but don't +know anybody on the outside? Here we describe how to make use of these +\emph{public bridges} in a way that still makes it hard for the attacker +to learn all of them. + +The basic idea is to divide public bridges into a set of pools based on +identity key. Each pool corresponds to a \emph{distribution strategy}: +an approach to distributing its bridge addresses to users. Each strategy +is designed to exercise a different scarce resource or property of +the user. + +How do we divide bridges between these strategy pools such that they're +evenly distributed and the allocation is hard to influence or predict, +but also in a way that's amenable to creating more strategies later +on without reshuffling all the pools? We assign a given bridge +to a strategy pool by hashing the bridge's identity key along with a +secret that only the bridge authority knows: the first $n$ bits of this +hash dictate the strategy pool number, where $n$ is a parameter that +describes how many strategy pools we want at this point. We choose $n=3$ +to start, so we divide bridges between 8 pools; but as we later invent +new distribution strategies, we can increment $n$ to split the 8 into +16. Since a bridge can't predict the next bit in its hash, it can't +anticipate which identity key will correspond to a certain new pool +when the pools are split. Further, since the bridge authority doesn't +provide any feedback to the bridge about which strategy pool it's in, +an adversary who signs up bridges with the goal of filling a certain +pool~\cite{casc-rep} will be hindered. + +% This algorithm is not ideal. When we split pools, each existing +% pool is cut in half, where half the bridges remain with the +% old distribution policy, and half will be under what the new one +% is. So the new distribution policy inherits a bunch of blocked +% bridges if the old policy was too loose, or a bunch of unblocked +% bridges if its policy was still secure. -RD +% +% I think it should be more chordlike. +% Bridges are allocated to wherever on the ring which is divided +% into arcs (buckets). +% If a bucket gets too full, you can just split it. +% More on this below. -PFS + +The first distribution strategy (used for the first pool) publishes bridge +addresses in a time-release fashion. The bridge authority divides the +available bridges into partitions, and each partition is deterministically +available only in certain time windows. That is, over the course of a +given time slot (say, an hour), each requester is given a random bridge +from within that partition. When the next time slot arrives, a new set +of bridges from the pool are available for discovery. Thus some bridge +address is always available when a new +user arrives, but to learn about all bridges the attacker needs to fetch +all new addresses at every new time slot. By varying the length of the +time slots, we can make it harder for the attacker to guess when to check +back. We expect these bridges will be the first to be blocked, but they'll +help the system bootstrap until they \emph{do} get blocked. Further, +remember that we're dealing with different blocking regimes around the +world that will progress at different rates---so this pool will still +be useful to some users even as the arms races progress. + +The second distribution strategy publishes bridge addresses based on the IP +address of the requesting user. Specifically, the bridge authority will +divide the available bridges in the pool into a bunch of partitions +(as in the first distribution scheme), hash the requester's IP address +with a secret of its own (as in the above allocation scheme for creating +pools), and give the requester a random bridge from the appropriate +partition. To raise the bar, we should discard the last octet of the +IP address before inputting it to the hash function, so an attacker +who only controls a single ``/24'' network only counts as one user. A +large attacker like China will still be able to control many addresses, +but the hassle of establishing connections from each network (or spoofing +TCP connections) may still slow them down. Similarly, as a special case, +we should treat IP addresses that are Tor exit nodes as all being on +the same network. + +The third strategy combines the time-based and location-based +strategies to further constrain and rate-limit the available bridge +addresses. Specifically, the bridge address provided in a given time +slot to a given network location is deterministic within the partition, +rather than chosen randomly each time from the partition. Thus, repeated +requests during that time slot from a given network are given the same +bridge address as the first request. + +The fourth strategy is based on Circumventor's discovery strategy. +The Circumventor project, realizing that its adoption will remain limited +if it has no central coordination mechanism, has started a mailing list to +distribute new proxy addresses every few days. From experimentation it +seems they have concluded that sending updates every three or four days +is sufficient to stay ahead of the current attackers. + +The fifth strategy provides an alternative approach to a mailing list: +users provide an email address and receive an automated response +listing an available bridge address. We could limit one response per +email address. To further rate limit queries, we could require a CAPTCHA +solution +%~\cite{captcha} +in each case too. In fact, we wouldn't need to +implement the CAPTCHA on our side: if we only deliver bridge addresses +to Yahoo or GMail addresses, we can leverage the rate-limiting schemes +that other parties already impose for account creation. + +The sixth strategy ties in the social network design with public +bridges and a reputation system. We pick some seeds---trusted people in +blocked areas---and give them each a few dozen bridge addresses and a few +\emph{delegation tokens}. We run a website next to the bridge authority, +where users can log in (they connect via Tor, and they don't need to +provide actual identities, just persistent pseudonyms). Users can delegate +trust to other people they know by giving them a token, which can be +exchanged for a new account on the website. Accounts in ``good standing'' +then accrue new bridge addresses and new tokens. As usual, reputation +schemes bring in a host of new complexities~\cite{rep-anon}: how do we +decide that an account is in good standing? We could tie reputation +to whether the bridges they're told about have been blocked---see +Section~\ref{subsec:geoip} below for initial thoughts on how to discover +whether bridges have been blocked. We could track reputation between +accounts (if you delegate to somebody who screws up, it impacts you too), +or we could use blinded delegation tokens~\cite{chaum-blind} to prevent +the website from mapping the seeds' social network. We put off deeper +discussion of the social network reputation strategy for future work. + +Pools seven and eight are held in reserve, in case our currently deployed +tricks all fail at once and the adversary blocks all those bridges---so +we can adapt and move to new approaches quickly, and have some bridges +immediately available for the new schemes. New strategies might be based +on some other scarce resource, such as relaying traffic for others or +other proof of energy spent. (We might also worry about the incentives +for bridges that sign up and get allocated to the reserve pools: will they +be unhappy that they're not being used? But this is a transient problem: +if Tor users are bridges by default, nobody will mind not being used yet. +See also Section~\ref{subsec:incentives}.) + +%Is it useful to load balance which bridges are handed out? The above +%pool concept makes some bridges wildly popular and others less so. +%But I guess that's the point. + +\subsection{Public bridges with coordinated discovery} + +We presented the above discovery strategies in the context of a single +bridge directory authority, but in practice we will want to distribute the +operations over several bridge authorities---a single point of failure +or attack is a bad move. The first answer is to run several independent +bridge directory authorities, and bridges gravitate to one based on +their identity key. The better answer would be some federation of bridge +authorities that work together to provide redundancy but don't introduce +new security issues. We could even imagine designs where the bridge +authorities have encrypted versions of the bridge's relay descriptors, +and the users learn a decryption key that they keep private when they +first hear about the bridge---this way the bridge authorities would not +be able to learn the IP address of the bridges. + +We leave this design question for future work. + +\subsection{Assessing whether bridges are useful} + +Learning whether a bridge is useful is important in the bridge authority's +decision to include it in responses to blocked users. For example, if +we end up with a list of thousands of bridges and only a few dozen of +them are reachable right now, most blocked users will not end up knowing +about working bridges. + +There are three components for assessing how useful a bridge is. First, +is it reachable from the public Internet? Second, what proportion of +the time is it available? Third, is it blocked in certain jurisdictions? + +The first component can be tested just as we test reachability of +ordinary Tor relays. Specifically, the bridges do a self-test---connect +to themselves via the Tor network---before they are willing to +publish their descriptor, to make sure they're not obviously broken or +misconfigured. Once the bridges publish, the bridge authority also tests +reachability to make sure they're not confused or outright lying. + +The second component can be measured and tracked by the bridge authority. +By doing periodic reachability tests, we can get a sense of how often the +bridge is available. More complex tests will involve bandwidth-intensive +checks to force the bridge to commit resources in order to be counted as +available. We need to evaluate how the relationship of uptime percentage +should weigh into our choice of which bridges to advertise. We leave +this to future work. + +The third component is perhaps the trickiest: with many different +adversaries out there, how do we keep track of which adversaries have +blocked which bridges, and how do we learn about new blocks as they +occur? We examine this problem next. + +\subsection{How do we know if a bridge relay has been blocked?} +\label{subsec:geoip} + +There are two main mechanisms for testing whether bridges are reachable +from inside each blocked area: active testing via users, and passive +testing via bridges. + +In the case of active testing, certain users inside each area +sign up as testing relays. The bridge authorities can then use a +Blossom-like~\cite{blossom-thesis} system to build circuits through them +to each bridge and see if it can establish the connection. But how do +we pick the users? If we ask random users to do the testing (or if we +solicit volunteers from the users), the adversary should sign up so he +can enumerate the bridges we test. Indeed, even if we hand-select our +testers, the adversary might still discover their location and monitor +their network activity to learn bridge addresses. + +Another answer is not to measure directly, but rather let the bridges +report whether they're being used. +%If they periodically report to their +%bridge directory authority how much use they're seeing, perhaps the +%authority can make smart decisions from there. +Specifically, bridges should install a GeoIP database such as the public +IP-To-Country list~\cite{ip-to-country}, and then periodically report to the +bridge authorities which countries they're seeing use from. This data +would help us track which countries are making use of the bridge design, +and can also let us learn about new steps the adversary has taken in +the arms race. (The compressed GeoIP database is only several hundred +kilobytes, and we could even automate the update process by serving it +from the bridge authorities.) +More analysis of this passive reachability +testing design is needed to resolve its many edge cases: for example, +if a bridge stops seeing use from a certain area, does that mean the +bridge is blocked or does that mean those users are asleep? + +There are many more problems with the general concept of detecting whether +bridges are blocked. First, different zones of the Internet are blocked +in different ways, and the actual firewall jurisdictions do not match +country borders. Our bridge scheme could help us map out the topology +of the censored Internet, but this is a huge task. More generally, +if a bridge relay isn't reachable, is that because of a network block +somewhere, because of a problem at the bridge relay, or just a temporary +outage somewhere in between? And last, an attacker could poison our +bridge database by signing up already-blocked bridges. In this case, +if we're stingy giving out bridge addresses, users in that country won't +learn working bridges. + +All of these issues are made more complex when we try to integrate this +testing into our social network reputation system above. +Since in that case we punish or reward users based on whether bridges +get blocked, the adversary has new attacks to trick or bog down the +reputation tracking. Indeed, the bridge authority doesn't even know +what zone the blocked user is in, so do we blame him for any possible +censored zone, or what? + +Clearly more analysis is required. The eventual solution will probably +involve a combination of passive measurement via GeoIP and active +measurement from trusted testers. More generally, we can use the passive +feedback mechanism to track usage of the bridge network as a whole---which +would let us respond to attacks and adapt the design, and it would also +let the general public track the progress of the project. + +%Worry: the adversary could choose not to block bridges but just record +%connections to them. So be it, I guess. + +\subsection{Advantages of deploying all solutions at once} + +For once, we're not in the position of the defender: we don't have to +defend against every possible filtering scheme; we just have to defend +against at least one. On the flip side, the attacker is forced to guess +how to allocate his resources to defend against each of these discovery +strategies. So by deploying all of our strategies at once, we not only +increase our chances of finding one that the adversary has difficulty +blocking, but we actually make \emph{all} of the strategies more robust +in the face of an adversary with limited resources. + +%\subsection{Remaining unsorted notes} + +%In the first subsection we describe how to find a first bridge. + +%Going to be an arms race. Need a bag of tricks. Hard to say +%which ones will work. Don't spend them all at once. + +%Some techniques are sufficient to get us an IP address and a port, +%and others can get us IP:port:key. Lay out some plausible options +%for how users can bootstrap into learning their first bridge. + +%\section{The account / reputation system} +%\section{Social networks with directory-side support} +%\label{sec:accounts} + +%One answer is to measure based on whether the bridge addresses +%we give it end up blocked. But how do we decide if they get blocked? + +%Perhaps each bridge should be known by a single bridge directory +%authority. This makes it easier to trace which users have learned about +%it, so easier to blame or reward. It also makes things more brittle, +%since loss of that authority means its bridges aren't advertised until +%they switch, and means its bridge users are sad too. +%(Need a slick hash algorithm that will map our identity key to a +%bridge authority, in a way that's sticky even when we add bridge +%directory authorities, but isn't sticky when our authority goes +%away. Does this exist?) +% [[Ian says: What about just using something like hash table chaining? +% This should work, so long as the client knows which authorities currently +% exist.]] + +%\subsection{Discovery based on social networks} + +%A token that can be exchanged at the bridge authority (assuming you +%can reach it) for a new bridge address. + +%The account server runs as a Tor controller for the bridge authority. + +%Users can establish reputations, perhaps based on social network +%connectivity, perhaps based on not getting their bridge relays blocked, + +%Probably the most critical lesson learned in past work on reputation +%systems in privacy-oriented environments~\cite{rep-anon} is the need for +%verifiable transactions. That is, the entity computing and advertising +%reputations for participants needs to actually learn in a convincing +%way that a given transaction was successful or unsuccessful. + +%(Lesson from designing reputation systems~\cite{rep-anon}: easy to +%reward good behavior, hard to punish bad behavior. + +\section{Security considerations} +\label{sec:security} + +\subsection{Possession of Tor in oppressed areas} + +Many people speculate that installing and using a Tor client in areas with +particularly extreme firewalls is a high risk---and the risk increases +as the firewall gets more restrictive. This notion certainly has merit, but +there's +a counter pressure as well: as the firewall gets more restrictive, more +ordinary people behind it end up using Tor for more mainstream activities, +such as learning +about Wall Street prices or looking at pictures of women's ankles. So +as the restrictive firewall pushes up the number of Tor users, the +``typical'' Tor user becomes more mainstream, and therefore mere +use or possession of the Tor software is not so surprising. + +It's hard to say which of these pressures will ultimately win out, +but we should keep both sides of the issue in mind. + +%Nick, want to rewrite/elaborate on this section? + +%Ian suggests: +% Possession of Tor: this is totally of-the-cuff, and there are lots of +% security issues to think about, but what about an ActiveX version of +% Tor? The magic you learn (as opposed to a bridge address) is a plain +% old HTTPS server, which feeds you an ActiveX applet pre-configured with +% some bridge address (possibly on the same machine). For bonus points, +% somehow arrange that (a) the applet is signed in some way the user can +% reliably check, but (b) don't end up with anything like an incriminating +% long-term cert stored on the user's computer. This may be marginally +% useful in some Internet-cafe situations as well, though (a) is even +% harder to get right there. + + +\subsection{Observers can tell who is publishing and who is reading} +\label{subsec:upload-padding} + +Tor encrypts traffic on the local network, and it obscures the eventual +destination of the communication, but it doesn't do much to obscure the +traffic volume. In particular, a user publishing a home video will have a +different network fingerprint than a user reading an online news article. +Based on our assumption in Section~\ref{sec:adversary} that users who +publish material are in more danger, should we work to improve Tor's +security in this situation? + +In the general case this is an extremely challenging task: +effective \emph{end-to-end traffic confirmation attacks} +are known where the adversary observes the origin and the +destination of traffic and confirms that they are part of the +same communication~\cite{danezis:pet2004,e2e-traffic}. Related are +\emph{website fingerprinting attacks}, where the adversary downloads +a few hundred popular websites, makes a set of "fingerprints" for each +site, and then observes the target Tor client's traffic to look for +a match~\cite{pet05-bissias,defensive-dropping}. But can we do better +against a limited adversary who just does coarse-grained sweeps looking +for unusually prolific publishers? + +One answer is for bridge users to automatically send bursts of padding +traffic periodically. (This traffic can be implemented in terms of +long-range drop cells, which are already part of the Tor specification.) +Of course, convincingly simulating an actual human publishing interesting +content is a difficult arms race, but it may be worthwhile to at least +start the race. More research remains. + +\subsection{Anonymity effects from acting as a bridge relay} + +Against some attacks, relaying traffic for others can improve +anonymity. The simplest example is an attacker who owns a small number +of Tor relays. He will see a connection from the bridge, but he won't +be able to know whether the connection originated there or was relayed +from somebody else. More generally, the mere uncertainty of whether the +traffic originated from that user may be helpful. + +There are some cases where it doesn't seem to help: if an attacker can +watch all of the bridge's incoming and outgoing traffic, then it's easy +to learn which connections were relayed and which started there. (In this +case he still doesn't know the final destinations unless he is watching +them too, but in this case bridges are no better off than if they were +an ordinary client.) + +There are also some potential downsides to running a bridge. First, while +we try to make it hard to enumerate all bridges, it's still possible to +learn about some of them, and for some people just the fact that they're +running one might signal to an attacker that they place a higher value +on their anonymity. Second, there are some more esoteric attacks on Tor +relays that are not as well-understood or well-tested---for example, an +attacker may be able to ``observe'' whether the bridge is sending traffic +even if he can't actually watch its network, by relaying traffic through +it and noticing changes in traffic timing~\cite{attack-tor-oak05}. On +the other hand, it may be that limiting the bandwidth the bridge is +willing to relay will allow this sort of attacker to determine if it's +being used as a bridge but not easily learn whether it is adding traffic +of its own. + +We also need to examine how entry guards fit in. Entry guards +(a small set of nodes that are always used for the first +step in a circuit) help protect against certain attacks +where the attacker runs a few Tor relays and waits for +the user to choose these relays as the beginning and end of her +circuit\footnote{\url{http://wiki.noreply.org/noreply/TheOnionRouter/TorFAQ#EntryGuards}}. +If the blocked user doesn't use the bridge's entry guards, then the bridge +doesn't gain as much cover benefit. On the other hand, what design changes +are needed for the blocked user to use the bridge's entry guards without +learning what they are (this seems hard), and even if we solve that, +do they then need to use the guards' guards and so on down the line? + +It is an open research question whether the benefits of running a bridge +outweigh the risks. A lot of the decision rests on which attacks the +users are most worried about. For most users, we don't think running a +bridge relay will be that damaging, and it could help quite a bit. + +\subsection{Trusting local hardware: Internet cafes and LiveCDs} +\label{subsec:cafes-and-livecds} + +Assuming that users have their own trusted hardware is not +always reasonable. + +For Internet cafe Windows computers that let you attach your own USB key, +a USB-based Tor image would be smart. There's Torpark, and hopefully +there will be more thoroughly analyzed and trustworthy options down the +road. Worries remain about hardware or software keyloggers and other +spyware, as well as physical surveillance. + +If the system lets you boot from a CD or from a USB key, you can gain +a bit more security by bringing a privacy LiveCD with you. (This +approach isn't foolproof either of course, since hardware +keyloggers and physical surveillance are still a worry). + +In fact, LiveCDs are also useful if it's your own hardware, since it's +easier to avoid leaving private data and logs scattered around the +system. + +%\subsection{Forward compatibility and retiring bridge authorities} +% +%Eventually we'll want to change the identity key and/or location +%of a bridge authority. How do we do this mostly cleanly? + +\subsection{The trust chain} +\label{subsec:trust-chain} + +Tor's ``public key infrastructure'' provides a chain of trust to +let users verify that they're actually talking to the right relays. +There are four pieces to this trust chain. + +First, when Tor clients are establishing circuits, at each step +they demand that the next Tor relay in the path prove knowledge of +its private key~\cite{tor-design}. This step prevents the first node +in the path from just spoofing the rest of the path. Second, the +Tor directory authorities provide a signed list of relays along with +their public keys---so unless the adversary can control a threshold +of directory authorities, he can't trick the Tor client into using other +Tor relays. Third, the location and keys of the directory authorities, +in turn, is hard-coded in the Tor source code---so as long as the user +got a genuine version of Tor, he can know that he is using the genuine +Tor network. And last, the source code and other packages are signed +with the GPG keys of the Tor developers, so users can confirm that they +did in fact download a genuine version of Tor. + +In the case of blocked users contacting bridges and bridge directory +authorities, the same logic applies in parallel: the blocked users fetch +information from both the bridge authorities and the directory authorities +for the `main' Tor network, and they combine this information locally. + +How can a user in an oppressed country know that he has the correct +key fingerprints for the developers? As with other security systems, it +ultimately comes down to human interaction. The keys are signed by dozens +of people around the world, and we have to hope that our users have met +enough people in the PGP web of trust +%~\cite{pgp-wot} +that they can learn +the correct keys. For users that aren't connected to the global security +community, though, this question remains a critical weakness. + +%\subsection{Security through obscurity: publishing our design} + +%Many other schemes like dynaweb use the typical arms race strategy of +%not publishing their plans. Our goal here is to produce a design---a +%framework---that can be public and still secure. Where's the tradeoff? + +%\section{Performance improvements} +%\label{sec:performance} +% +%\subsection{Fetch relay descriptors just-in-time} +% +%I guess we should encourage most places to do this, so blocked +%users don't stand out. +% +% +%network-status and directory optimizations. caching better. partitioning +%issues? + +\section{Maintaining reachability} +\label{sec:reachability} + +\subsection{How many bridge relays should you know about?} + +The strategies described in Section~\ref{sec:discovery} talked about +learning one bridge address at a time. But if most bridges are ordinary +Tor users on cable modem or DSL connection, many of them will disappear +and/or move periodically. How many bridge relays should a blocked user +know about so that she is likely to have at least one reachable at any +given point? This is already a challenging problem if we only consider +natural churn: the best approach is to see what bridges we attract in +reality and measure their churn. We may also need to factor in a parameter +for how quickly bridges get discovered and blocked by the attacker; +we leave this for future work after we have more deployment experience. + +A related question is: if the bridge relays change IP addresses +periodically, how often does the blocked user need to fetch updates in +order to keep from being cut out of the loop? + +Once we have more experience and intuition, we should explore technical +solutions to this problem too. For example, if the discovery strategies +give out $k$ bridge addresses rather than a single bridge address, perhaps +we can improve robustness from the user perspective without significantly +aiding the adversary. Rather than giving out a new random subset of $k$ +addresses at each point, we could bind them together into \emph{bridge +families}, so all users that learn about one member of the bridge family +are told about the rest as well. + +This scheme may also help defend against attacks to map the set of +bridges. That is, if all blocked users learn a random subset of bridges, +the attacker should learn about a few bridges, monitor the country-level +firewall for connections to them, then watch those users to see what +other bridges they use, and repeat. By segmenting the bridge address +space, we can limit the exposure of other users. + +\subsection{Cablemodem users don't usually provide important websites} +\label{subsec:block-cable} + +Another attacker we might be concerned about is that the attacker could +just block all DSL and cablemodem network addresses, on the theory that +they don't run any important services anyway. If most of our bridges +are on these networks, this attack could really hurt. + +The first answer is to aim to get volunteers both from traditionally +``consumer'' networks and also from traditionally ``producer'' networks. +Since bridges don't need to be Tor exit nodes, as we improve our usability +it seems quite feasible to get a lot of websites helping out. + +The second answer (not as practical) would be to encourage more use of +consumer networks for popular and useful Internet services. +%(But P2P exists; +%minor websites exist; gaming exists; IM exists; ...) + +A related attack we might worry about is based on large countries putting +economic pressure on companies that want to expand their business. For +example, what happens if Verizon wants to sell services in China, and +China pressures Verizon to discourage its users in the free world from +running bridges? + +\subsection{Scanning resistance: making bridges more subtle} + +If it's trivial to verify that a given address is operating as a bridge, +and most bridges run on a predictable port, then it's conceivable our +attacker could scan the whole Internet looking for bridges. (In fact, +he can just concentrate on scanning likely networks like cablemodem +and DSL services---see Section~\ref{subsec:block-cable} above for +related attacks.) It would be nice to slow down this attack. It would +be even nicer to make it hard to learn whether we're a bridge without +first knowing some secret. We call this general property \emph{scanning +resistance}, and it goes along with normalizing Tor's TLS handshake and +network fingerprint. + +We could provide a password to the blocked user, and she (or her Tor +client) provides a nonced hash of this password when she connects. We'd +need to give her an ID key for the bridge too (in addition to the IP +address and port---see Section~\ref{subsec:id-address}), and wait to +present the password until we've finished the TLS handshake, else it +would look unusual. If Alice can authenticate the bridge before she +tries to send her password, we can resist an adversary who pretends +to be the bridge and launches a man-in-the-middle attack to learn the +password. But even if she can't, we still resist against widespread +scanning. + +How should the bridge behave if accessed without the correct +authorization? Perhaps it should act like an unconfigured HTTPS server +(``welcome to the default Apache page''), or maybe it should mirror +and act like common websites, or websites randomly chosen from Google. + +We might assume that the attacker can recognize HTTPS connections that +use self-signed certificates. (This process would be resource-intensive +but not out of the realm of possibility.) But even in this case, many +popular websites around the Internet use self-signed or just plain broken +SSL certificates. + +%to unknown servers. It can then attempt to connect to them and block +%connections to servers that seem suspicious. It may be that password +%protected web sites will not be suspicious in general, in which case +%that may be the easiest way to give controlled access to the bridge. +%If such sites that have no other overt features are automatically +%blocked when detected, then we may need to be more subtle. +%Possibilities include serving an innocuous web page if a TLS encrypted +%request is received without the authorization needed to access the Tor +%network and only responding to a requested access to the Tor network +%of proper authentication is given. If an unauthenticated request to +%access the Tor network is sent, the bridge should respond as if +%it has received a message it does not understand (as would be the +%case were it not a bridge). + +% Ian suggests a ``socialist millionaires'' protocol here, for something. + +% Did we once mention knocking here? it's a good idea, but we should clarify +% what we mean. Ian also notes that knocking itself is very fingerprintable, +% and we should beware. + +\subsection{How to motivate people to run bridge relays} +\label{subsec:incentives} + +One of the traditional ways to get people to run software that benefits +others is to give them motivation to install it themselves. An often +suggested approach is to install it as a stunning screensaver so everybody +will be pleased to run it. We take a similar approach here, by leveraging +the fact that these users are already interested in protecting their +own Internet traffic, so they will install and run the software. + +Eventually, we may be able to make all Tor users become bridges if they +pass their self-reachability tests---the software and installers need +more work on usability first, but we're making progress. + +In the mean time, we can make a snazzy network graph with +Vidalia\footnote{\url{http://vidalia-project.net/}} that +emphasizes the connections the bridge user is currently relaying. +%(Minor +%anonymity implications, but hey.) (In many cases there won't be much +%activity, so this may backfire. Or it may be better suited to full-fledged +%Tor relay.) + +% Also consider everybody-a-relay. Many of the scalability questions +% are easier when you're talking about making everybody a bridge. + +%\subsection{What if the clients can't install software?} + +%[this section should probably move to the related work section, +%or just disappear entirely.] + +%Bridge users without Tor software + +%Bridge relays could always open their socks proxy. This is bad though, +%first +%because bridges learn the bridge users' destinations, and second because +%we've learned that open socks proxies tend to attract abusive users who +%have no idea they're using Tor. + +%Bridges could require passwords in the socks handshake (not supported +%by most software including Firefox). Or they could run web proxies +%that require authentication and then pass the requests into Tor. This +%approach is probably a good way to help bootstrap the Psiphon network, +%if one of its barriers to deployment is a lack of volunteers willing +%to exit directly to websites. But it clearly drops some of the nice +%anonymity and security features Tor provides. + +%A hybrid approach where the user gets his anonymity from Tor but his +%software-less use from a web proxy running on a trusted machine on the +%free side. + +\subsection{Publicity attracts attention} +\label{subsec:publicity} + +Many people working on this field want to publicize the existence +and extent of censorship concurrently with the deployment of their +circumvention software. The easy reason for this two-pronged push is +to attract volunteers for running proxies in their systems; but in many +cases their main goal is not to focus on getting more users signed up, +but rather to educate the rest of the world about the +censorship. The media also tries to do its part by broadcasting the +existence of each new circumvention system. + +But at the same time, this publicity attracts the attention of the +censors. We can slow down the arms race by not attracting as much +attention, and just spreading by word of mouth. If our goal is to +establish a solid social network of bridges and bridge users before +the adversary gets involved, does this extra attention work to our +disadvantage? + +\subsection{The Tor website: how to get the software} + +One of the first censoring attacks against a system like ours is to +block the website and make the software itself hard to find. Our system +should work well once the user is running an authentic +copy of Tor and has found a working bridge, but to get to that point +we rely on their individual skills and ingenuity. + +Right now, most countries that block access to Tor block only the main +website and leave mirrors and the network itself untouched. +Falling back on word-of-mouth is always a good last resort, but we should +also take steps to make sure it's relatively easy for users to get a copy, +such as publicizing the mirrors more and making copies available through +other media. We might also mirror the latest version of the software on +each bridge, so users who hear about an honest bridge can get a good +copy. +See Section~\ref{subsec:first-bridge} for more discussion. + +% Ian suggests that we have every tor relay distribute a signed copy of the +% software. + +\section{Next Steps} +\label{sec:conclusion} + +Technical solutions won't solve the whole censorship problem. After all, +the firewalls in places like China are \emph{socially} very +successful, even if technologies and tricks exist to get around them. +However, having a strong technical solution is still necessary as one +important piece of the puzzle. + +In this paper, we have shown that Tor provides a great set of building +blocks to start from. The next steps are to deploy prototype bridges and +bridge authorities, implement some of the proposed discovery strategies, +and then observe the system in operation and get more intuition about +the actual requirements and adversaries we're up against. + +\bibliographystyle{plain} \bibliography{tor-design} + +%\appendix + +%\section{Counting Tor users by country} +%\label{app:geoip} + +\end{document} + + + +\section{Future designs} +\label{sec:future} + +\subsection{Bridges inside the blocked network too} + +Assuming actually crossing the firewall is the risky part of the +operation, can we have some bridge relays inside the blocked area too, +and more established users can use them as relays so they don't need to +communicate over the firewall directly at all? A simple example here is +to make new blocked users into internal bridges also---so they sign up +on the bridge authority as part of doing their query, and we give out +their addresses +rather than (or along with) the external bridge addresses. This design +is a lot trickier because it brings in the complexity of whether the +internal bridges will remain available, can maintain reachability with +the outside world, etc. + +More complex future designs involve operating a separate Tor network +inside the blocked area, and using \emph{hidden service bridges}---bridges +that can be accessed by users of the internal Tor network but whose +addresses are not published or findable, even by these users---to get +from inside the firewall to the rest of the Internet. But this design +requires directory authorities to run inside the blocked area too, +and they would be a fine target to take down the network. + +% Hidden services as bridge directory authorities. + + +------------------------------------------ + +ship geoip db to bridges. they look up users who tls to them in the db, +and upload a signed list of countries and number-of-users each day. the +bridge authority aggregates them and publishes stats. + +bridge relays have buddies +they ask a user to test the reachability of their buddy. +leaks O(1) bridges, but not O(n). + +we should not be blockable by ordinary cisco censorship features. +that is, if they want to block our new design, they will need to +add a feature to block exactly this. +strategically speaking, this may come in handy. + +Bridges come in clumps of 4 or 8 or whatever. If you know one bridge +in a clump, the authority will tell you the rest. Now bridges can +ask users to test reachability of their buddies. + +Giving out clumps helps with dynamic IP addresses too. Whether it +should be 4 or 8 depends on our churn. + +the account server. let's call it a database, it doesn't have to +be a thing that human interacts with. + +so how do we reward people for being good? + +\subsubsection{Public Bridges with Coordinated Discovery} + +****Pretty much this whole subsubsection will probably need to be +deferred until ``later'' and moved to after end document, but I'm leaving +it here for now in case useful.****** + +Rather than be entirely centralized, we can have a coordinated +collection of bridge authorities, analogous to how Tor network +directory authorities now work. + +Key components +``Authorities'' will distribute caches of what they know to overlapping +collections of nodes so that no one node is owned by one authority. +Also so that it is impossible to DoS info maintained by one authority +simply by making requests to it. + +Where a bridge gets assigned is not predictable by the bridge? + +If authorities don't know the IP addresses of the bridges they +are responsible for, they can't abuse that info (or be attacked for +having it). But, they also can't, e.g., control being sent massive +lists of nodes that were never good. This raises another question. +We generally decry use of IP address for location, etc. but we +need to do that to limit the introduction of functional but useless +IP addresses because, e.g., they are in China and the adversary +owns massive chunks of the IP space there. + +We don't want an arbitrary someone to be able to contact the +authorities and say an IP address is bad because it would be easy +for an adversary to take down all the suspicious bridges +even if they provide good cover websites, etc. Only the bridge +itself and/or the directory authority can declare a bridge blocked +from somewhere. + + +9. Bridge directories must not simply be a handful of nodes that +provide the list of bridges. They must flood or otherwise distribute +information out to other Tor nodes as mirrors. That way it becomes +difficult for censors to flood the bridge directory authorities with +requests, effectively denying access for others. But, there's lots of +churn and a much larger size than Tor directories. We are forced to +handle the directory scaling problem here much sooner than for the +network in general. Authorities can pass their bridge directories +(and policy info) to some moderate number of unidentified Tor nodes. +Anyone contacting one of those nodes can get bridge info. the nodes +must remain somewhat synched to prevent the adversary from abusing, +e.g., a timed release policy or the distribution to those nodes must +be resilient even if they are not coordinating. + +I think some kind of DHT like scheme would work here. A Tor node is +assigned a chunk of the directory. Lookups in the directory should be +via hashes of keys (fingerprints) and that should determine the Tor +nodes responsible. Ordinary directories can publish lists of Tor nodes +responsible for fingerprint ranges. Clients looking to update info on +some bridge will make a Tor connection to one of the nodes responsible +for that address. Instead of shutting down a circuit after getting +info on one address, extend it to another that is responsible for that +address (the node from which you are extending knows you are doing so +anyway). Keep going. This way you can amortize the Tor connection. + +10. We need some way to give new identity keys out to those who need +them without letting those get immediately blocked by authorities. One +way is to give a fingerprint that gets you more fingerprints, as +already described. These are meted out/updated periodically but allow +us to keep track of which sources are compromised: if a distribution +fingerprint repeatedly leads to quickly blocked bridges, it should be +suspect, dropped, etc. Since we're using hashes, there shouldn't be a +correlation with bridge directory mirrors, bridges, portions of the +network observed, etc. It should just be that the authorities know +about that key that leads to new addresses. + +This last point is very much like the issues in the valet nodes paper, +which is essentially about blocking resistance wrt exiting the Tor network, +while this paper is concerned with blocking the entering to the Tor network. +In fact the tickets used to connect to the IPo (Introduction Point), +could serve as an example, except that instead of authorizing +a connection to the Hidden Service, it's authorizing the downloading +of more fingerprints. + +Also, the fingerprints can follow the hash(q + '1' + cookie) scheme of +that paper (where q = hash(PK + salt) gave the q.onion address). This +allows us to control and track which fingerprint was causing problems. + +Note that, unlike many settings, the reputation problem should not be +hard here. If a bridge says it is blocked, then it might as well be. +If an adversary can say that the bridge is blocked wrt +$\mathit{censor}_i$, then it might as well be, since +$\mathit{censor}_i$ can presumably then block that bridge if it so +chooses. + +11. How much damage can the adversary do by running nodes in the Tor +network and watching for bridge nodes connecting to it? (This is +analogous to an Introduction Point watching for Valet Nodes connecting +to it.) What percentage of the network do you need to own to do how +much damage. Here the entry-guard design comes in helpfully. So we +need to have bridges use entry-guards, but (cf. 3 above) not use +bridges as entry-guards. Here's a serious tradeoff (again akin to the +ratio of valets to IPos) the more bridges/client the worse the +anonymity of that client. The fewer bridges/client the worse the +blocking resistance of that client. + + + diff --git a/2006/blocking/tor-design.bib b/2006/blocking/tor-design.bib new file mode 100644 index 0000000..981761e --- /dev/null +++ b/2006/blocking/tor-design.bib @@ -0,0 +1,1493 @@ +% hs-attack +@inproceedings{hs-attack, + title = {Locating Hidden Servers}, + author = {Lasse {\O}verlier and Paul Syverson}, + booktitle = {Proceedings of the 2006 IEEE Symposium on Security and Privacy}, + year = {2006}, + month = {May}, + publisher = {IEEE CS}, +} + + +@TechReport{bauer:tr2007, + author = {Kevin Bauer and Damon McCoy and Dirk Grunwald and Tadayoshi Kohno and Douglas Sicker}, + title = {Low-Resource Routing Attacks Against Anonymous Systems}, + institution = {University of Colorado at Boulder}, + year = 2007, + number = {CU-CS-1025-07} +} + +@inproceedings{bauer:wpes2007, + title = {Low-Resource Routing Attacks Against Tor}, + author = {Kevin Bauer and Damon McCoy and Dirk Grunwald and Tadayoshi Kohno and Douglas Sicker}, + booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2007)}}, + year = {2007}, + month = {October}, + address = {Washington, DC, USA}, +} + +% fix me +@misc{tannenbaum96, + author = "Andrew Tannenbaum", + title = "Computer Networks", + year = "1996", + publisher = "Prentice Hall, 3rd edition", +} + +@article{ meadows96, + author = "Catherine Meadows", + title = "The {NRL} Protocol Analyzer: An Overview", + journal = "Journal of Logic Programming", + volume = "26", + number = "2", + pages = "113--131", + year = "1996", +} +@inproceedings{kesdogan:pet2002, + title = {Unobservable Surfing on the World Wide Web: Is Private Information Retrieval an + alternative to the MIX based Approach?}, + author = {Dogan Kesdogan and Mark Borning and Michael Schmeink}, + booktitle = {Privacy Enhancing Technologies (PET 2002)}, + year = {2002}, + month = {April}, + editor = {Roger Dingledine and Paul Syverson}, + publisher = {Springer-Verlag, LNCS 2482}, +} + +@inproceedings{statistical-disclosure, + title = {Statistical Disclosure Attacks}, + author = {George Danezis}, + booktitle = {Security and Privacy in the Age of Uncertainty ({SEC2003})}, + organization = {{IFIP TC11}}, + year = {2003}, + month = {May}, + address = {Athens}, + pages = {421--426}, + publisher = {Kluwer}, +} + +@inproceedings{limits-open, + title = {Limits of Anonymity in Open Environments}, + author = {Dogan Kesdogan and Dakshi Agrawal and Stefan Penz}, + booktitle = {Information Hiding Workshop (IH 2002)}, + year = {2002}, + month = {October}, + editor = {Fabien Petitcolas}, + publisher = {Springer-Verlag, LNCS 2578}, +} + +@inproceedings{isdn-mixes, + title = {{ISDN-mixes: Untraceable communication with very small bandwidth overhead}}, + author = {Andreas Pfitzmann and Birgit Pfitzmann and Michael Waidner}, + booktitle = {GI/ITG Conference on Communication in Distributed Systems}, + year = {1991}, + month = {February}, + pages = {451-463}, +} + + +@Article{jerichow-jsac98, + author = {Anja Jerichow and Jan M\"{u}ller and Andreas + Pfitzmann and Birgit Pfitzmann and Michael Waidner}, + title = {Real-Time Mixes: A Bandwidth-Efficient Anonymity Protocol}, + journal = {IEEE Journal on Selected Areas in Communications}, + year = 1998, + volume = 16, + number = 4, + pages = {495--509}, + month = {May} +} + +@inproceedings{tarzan:ccs02, + title = {Tarzan: A Peer-to-Peer Anonymizing Network Layer}, + author = {Michael J. Freedman and Robert Morris}, + booktitle = {9th {ACM} {C}onference on {C}omputer and {C}ommunications + {S}ecurity ({CCS 2002})}, + year = {2002}, + month = {November}, + address = {Washington, DC}, +} + +@inproceedings{cebolla, + title = {{Cebolla: Pragmatic IP Anonymity}}, + author = {Zach Brown}, + booktitle = {Ottawa Linux Symposium}, + year = {2002}, + month = {June}, +} + +@inproceedings{eax, + author = "M. Bellare and P. Rogaway and D. Wagner", + title = {The {EAX} Mode of Operation: A Two-Pass Authenticated-Encryption Scheme Optimized for Simplicity and Efficiency}, + booktitle = {Fast Software Encryption 2004}, + month = {February}, + year = {2004}, +} + +@misc{darkside, + title = {{The Dark Side of the Web: An Open Proxy's View}}, + author = {Vivek S. Pai and Limin Wang and KyoungSoo Park and Ruoming Pang and Larry Peterson}, + note = {\newline \url{http://codeen.cs.princeton.edu/}}, +} +% note = {Submitted to HotNets-II. \url{http://codeen.cs.princeton.edu/}}, + +@Misc{anonymizer, + key = {anonymizer}, + title = {The {Anonymizer}}, + note = {\url{http://anonymizer.com/}} +} + +@Misc{privoxy, + key = {privoxy}, + title = {{Privoxy}}, + note = {\url{http://www.privoxy.org/}} +} + +@Misc{i2p, + key = {i2p}, + title = {{I2P}}, + note = {\url{http://www.i2p.net/}} +} + +@Misc{nym, + author = {Jason Holt}, + title = {nym: practical pseudonymity for anonymous networks}, + note = {Paper and source code at \url{http://www.lunkwill.org/src/nym/}} +} + +@InProceedings{nymble, + author = {Peter C. Johnson and Apu Kapadia and Patrick P. Tsang and Sean W. Smith}, + title = {Nymble: Anonymous {IP}-address Blocking}, + booktitle = {Privacy Enhancing Technologies (PET 2007)}, + year = 2007, + publisher = {Springer-Verlag, LNCS 4776} +} + +@inproceedings{anonnet, + title = {{Analysis of an Anonymity Network for Web Browsing}}, + author = {Marc Rennhard and Sandro Rafaeli and Laurent Mathy and Bernhard Plattner and + David Hutchison}, + booktitle = {{IEEE 7th Intl. Workshop on Enterprise Security (WET ICE + 2002)}}, + year = {2002}, + month = {June}, + address = {Pittsburgh, USA}, +} +% pages = {49--54}, + +@inproceedings{econymics, + title = {On the Economics of Anonymity}, + author = {Alessandro Acquisti and Roger Dingledine and Paul Syverson}, + booktitle = {Financial Cryptography}, + year = {2003}, + editor = {Rebecca N. Wright}, + publisher = {Springer-Verlag, LNCS 2742}, +} + +@inproceedings{defensive-dropping, + title = {Timing Analysis in Low-Latency Mix-Based Systems}, + author = {Brian N. Levine and Michael K. Reiter and Chenxi Wang and Matthew Wright}, + booktitle = {Financial Cryptography}, + year = {2004}, + editor = {Ari Juels}, + publisher = {Springer-Verlag, LNCS (forthcoming)}, +} + +@inproceedings{morphmix:fc04, + title = {Practical Anonymity for the Masses with MorphMix}, + author = {Marc Rennhard and Bernhard Plattner}, + booktitle = {Financial Cryptography}, + year = {2004}, + editor = {Ari Juels}, + publisher = {Springer-Verlag, LNCS (forthcoming)}, +} + +@inproceedings{eternity, + title = {The Eternity Service}, + author = {Ross Anderson}, + booktitle = {Pragocrypt '96}, + year = {1996}, +} + %note = {\url{http://www.cl.cam.ac.uk/users/rja14/eternity/eternity.html}}, + + +@inproceedings{minion-design, + title = {Mixminion: Design of a Type {III} Anonymous Remailer Protocol}, + author = {George Danezis and Roger Dingledine and Nick Mathewson}, + booktitle = {2003 IEEE Symposium on Security and Privacy}, + year = {2003}, + month = {May}, + publisher = {IEEE CS}, + pages = {2--15}, +} + %note = {\url{http://mixminion.net/minion-design.pdf}}, + +@inproceedings{ rao-pseudonymity, + author = "Josyula R. Rao and Pankaj Rohatgi", + title = "Can Pseudonymity Really Guarantee Privacy?", + booktitle = "Proceedings of the Ninth USENIX Security Symposium", + year = {2000}, + month = Aug, + publisher = {USENIX}, + pages = "85--96", +} + %note = {\url{http://www.usenix.org/publications/library/proceedings/sec2000/ +%full_papers/rao/rao.pdf}}, + +@InProceedings{pfitzmann90how, + author = "Birgit Pfitzmann and Andreas Pfitzmann", + title = "How to Break the Direct {RSA}-Implementation of {MIXes}", + booktitle = {Eurocrypt 89}, + publisher = {Springer-Verlag, LNCS 434}, + year = {1990}, + note = {\url{http://citeseer.nj.nec.com/pfitzmann90how.html}}, +} + +@Misc{tor-spec, + author = {Roger Dingledine and Nick Mathewson}, + title = {Tor Protocol Specifications}, + note = {\url{https://www.torproject.org/svn/trunk/doc/tor-spec.txt}}, +} + +@Misc{incentives-txt, + author = {Roger Dingledine and Nick Mathewson}, + title = {Tor Incentives Design Brainstorms}, + note = {\url{https://www.torproject.org/svn/trunk/doc/incentives.txt}}, +} + +@InProceedings{BM:mixencrypt, + author = {M{\"o}ller, Bodo}, + title = {Provably Secure Public-Key Encryption for Length-Preserving Chaumian Mixes}, + booktitle = {{CT-RSA} 2003}, + publisher = {Springer-Verlag, LNCS 2612}, + year = 2003, +} + +@InProceedings{back01, + author = {Adam Back and Ulf M\"oller and Anton Stiglic}, + title = {Traffic Analysis Attacks and Trade-Offs in Anonymity Providing Systems}, + booktitle = {Information Hiding (IH 2001)}, + pages = {245--257}, + year = 2001, + editor = {Ira S. Moskowitz}, + publisher = {Springer-Verlag, LNCS 2137}, +} + %note = {\newline \url{http://www.cypherspace.org/adam/pubs/traffic.pdf}}, + +@InProceedings{rackoff93cryptographic, + author = {Charles Rackoff and Daniel R. Simon}, + title = {Cryptographic Defense Against Traffic Analysis}, + booktitle = {{ACM} Symposium on Theory of Computing}, + pages = {672--681}, + year = {1993}, +} + %note = {\url{http://research.microsoft.com/crypto/dansimon/me.htm}}, + +@InProceedings{freehaven-berk, + author = {Roger Dingledine and Michael J. Freedman and David Molnar}, + title = {The Free Haven Project: Distributed Anonymous Storage Service}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + year = 2000, + month = {July}, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, +} + + @InProceedings{move-ndss05, + author = {Angelos Stavrou and Angelos D. Keromytis and Jason Nieh and Vishal Misra and Dan Rubenstein}, + title = {MOVE: An End-to-End Solution To Network Denial of Service}, + booktitle = {{ISOC Network and Distributed System Security Symposium (NDSS05)}}, + year = 2005, + month = {February}, + publisher = {Internet Society} +} + +%note = {\url{http://freehaven.net/papers.html}}, + + + + +@InProceedings{raymond00, + author = {J. F. Raymond}, + title = {{Traffic Analysis: Protocols, Attacks, Design Issues, + and Open Problems}}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + year = 2000, + month = {July}, + pages = {10-29}, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, +} + +@InProceedings{sybil, + author = "John Douceur", + title = {{The Sybil Attack}}, + booktitle = "Proceedings of the 1st International Peer To Peer Systems Workshop (IPTPS)", + month = Mar, + year = 2002, +} + + +@InCollection{price-privacy, + author = {Paul Syverson and Adam Shostack}, + editor = {L. Jean Camp and Stephen Lewis}, + title = {What Price Privacy? (and why identity theft is about neither identity nor theft)}, + booktitle = {Economics of Information Security}, + chapter = 10, + publisher = {Kluwer}, + year = 2004, + pages = {129--142} +} + + +@InProceedings{trickle02, + author = {Andrei Serjantov and Roger Dingledine and Paul Syverson}, + title = {From a Trickle to a Flood: Active Attacks on Several + Mix Types}, + booktitle = {Information Hiding (IH 2002)}, + year = {2002}, + editor = {Fabien Petitcolas}, + publisher = {Springer-Verlag, LNCS 2578}, +} + +@InProceedings{langos02, + author = {Oliver Berthold and Heinrich Langos}, + title = {Dummy Traffic Against Long Term Intersection Attacks}, + booktitle = {Privacy Enhancing Technologies (PET 2002)}, + year = {2002}, + editor = {Roger Dingledine and Paul Syverson}, + publisher = {Springer-Verlag, LNCS 2482} +} + + +@InProceedings{hintz-pet02, + author = {Andrew Hintz}, + title = {Fingerprinting Websites Using Traffic Analysis}, + booktitle = {Privacy Enhancing Technologies (PET 2002)}, + pages = {171--178}, + year = 2002, + editor = {Roger Dingledine and Paul Syverson}, + publisher = {Springer-Verlag, LNCS 2482} +} + +@InProceedings{or-discex00, + author = {Paul Syverson and Michael Reed and David Goldschlag}, + title = {{O}nion {R}outing Access Configurations}, + booktitle = {DARPA Information Survivability Conference and + Exposition (DISCEX 2000)}, + year = {2000}, + publisher = {IEEE CS Press}, + pages = {34--40}, + volume = {1}, +} + %note = {\newline \url{http://www.onion-router.net/Publications.html}}, + +@Inproceedings{or-pet00, + title = {{Towards an Analysis of Onion Routing Security}}, + author = {Paul Syverson and Gene Tsudik and Michael Reed and + Carl Landwehr}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + year = 2000, + month = {July}, + pages = {96--114}, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, +} + %note = {\url{http://www.onion-router.net/Publications/WDIAU-2000.ps.gz}}, + +@Inproceedings{freenet-pets00, + title = {Freenet: A Distributed Anonymous Information Storage + and Retrieval System}, + author = {Ian Clarke and Oskar Sandberg and Brandon Wiley and + Theodore W. Hong}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + year = 2000, + month = {July}, + pages = {46--66}, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, +} + %note = {\url{http://citeseer.nj.nec.com/clarke00freenet.html}}, + +@InProceedings{or-ih96, + author = {David M. Goldschlag and Michael G. Reed and Paul + F. Syverson}, + title = {Hiding Routing Information}, + booktitle = {Information Hiding, First International Workshop}, + pages = {137--150}, + year = 1996, + editor = {R. Anderson}, + month = {May}, + publisher = {Springer-Verlag, LNCS 1174}, +} + +@InProceedings{federrath-ih96, + author = {Hannes Federrath and Anja Jerichow and Andreas Pfitzmann}, + title = {{MIXes} in Mobile Communication Systems: Location + Management with Privacy}, + booktitle = {Information Hiding, First International Workshop}, + pages = {121--135}, + year = 1996, + editor = {R. Anderson}, + month = {May}, + publisher = {Springer-Verlag, LNCS 1174}, +} + + +@InProceedings{reed-protocols97, + author = {Michael G. Reed and Paul F. Syverson and David + M. Goldschlag}, + title = {Protocols Using Anonymous Connections: Mobile Applications}, + booktitle = {Security Protocols: 5th International Workshop}, + pages = {13--23}, + year = 1997, + editor = {Bruce Christianson and Bruno Crispo and Mark Lomas + and Michael Roe}, + month = {April}, + publisher = {Springer-Verlag, LNCS 1361} +} + + + +@Article{or-jsac98, + author = {Michael G. Reed and Paul F. Syverson and David + M. Goldschlag}, + title = {Anonymous Connections and Onion Routing}, + journal = {IEEE Journal on Selected Areas in Communications}, + year = 1998, + volume = 16, + number = 4, + pages = {482--494}, + month = {May}, +} + %note = {\url{http://www.onion-router.net/Publications/JSAC-1998.ps.gz}} + +@Misc{TLS, + author = {T. Dierks and C. Allen}, + title = {The {TLS} {P}rotocol --- {V}ersion 1.0}, + howpublished = {IETF RFC 2246}, + month = {January}, + year = {1999}, +} +%note = {\url{http://www.rfc-editor.org/rfc/rfc2246.txt}}, + +@Misc{SMTP, + author = {J. Postel}, + title = {Simple {M}ail {T}ransfer {P}rotocol}, + howpublished = {IETF RFC 2821 (also STD0010)}, + month = {April}, + year = {2001}, + note = {\url{http://www.rfc-editor.org/rfc/rfc2821.txt}}, +} + +@Misc{IMAP, + author = {M. Crispin}, + title = {Internet {M}essage {A}ccess {P}rotocol --- {V}ersion 4rev1}, + howpublished = {IETF RFC 2060}, + month = {December}, + year = {1996}, + note = {\url{http://www.rfc-editor.org/rfc/rfc2060.txt}}, +} + +@misc{pipenet, + title = {PipeNet 1.1}, + author = {Wei Dai}, + year = 1996, + month = {August}, + howpublished = {Usenet post}, + note = {\url{http://www.eskimo.com/~weidai/pipenet.txt} First mentioned + in a post to the cypherpunks list, Feb.\ 1995.}, +} + + +@Misc{POP3, + author = {J. Myers and M. Rose}, + title = {Post {O}ffice {P}rotocol --- {V}ersion 3}, + howpublished = {IETF RFC 1939 (also STD0053)}, + month = {May}, + year = {1996}, + note = {\url{http://www.rfc-editor.org/rfc/rfc1939.txt}}, +} + + +@InProceedings{shuffle, + author = {C. Andrew Neff}, + title = {A Verifiable Secret Shuffle and its Application to E-Voting}, + booktitle = {8th ACM Conference on Computer and Communications + Security (CCS-8)}, + pages = {116--125}, + year = 2001, + editor = {P. Samarati}, + month = {November}, + publisher = {ACM Press}, +} + %note = {\url{http://www.votehere.net/ada_compliant/ourtechnology/ + % technicaldocs/shuffle.pdf}}, + +@InProceedings{dolev91, + author = {Danny Dolev and Cynthia Dwork and Moni Naor}, + title = {Non-Malleable Cryptography}, + booktitle = {23rd ACM Symposium on the Theory of Computing (STOC)}, + pages = {542--552}, + year = 1991, + note = {Updated version at + \url{http://citeseer.nj.nec.com/dolev00nonmalleable.html}}, +} + +@TechReport{rsw96, + author = {Ronald L. Rivest and Adi Shamir and David A. Wagner}, + title = {Time-lock puzzles and timed-release Crypto}, + year = 1996, + type = {MIT LCS technical memo}, + number = {MIT/LCS/TR-684}, + month = {February}, + note = {\newline \url{http://citeseer.nj.nec.com/rivest96timelock.html}}, +} + +@InProceedings{web-mix, + author = {Oliver Berthold and Hannes Federrath and Stefan K\"opsell}, + title = {Web {MIX}es: A system for anonymous and unobservable + {I}nternet access}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, + year = {2000}, +} +% pages = {115--129}, + +@InProceedings{disad-free-routes, + author = {Oliver Berthold and Andreas Pfitzmann and Ronny Standtke}, + title = {The disadvantages of free {MIX} routes and how to overcome + them}, + booktitle = {Designing Privacy Enhancing Technologies: Workshop + on Design Issue in Anonymity and Unobservability}, + pages = {30--45}, + year = 2000, + editor = {H. Federrath}, + publisher = {Springer-Verlag, LNCS 2009}, +} + %note = {\url{http://www.tik.ee.ethz.ch/~weiler/lehre/netsec/Unterlagen/anon/ + % disadvantages_berthold.pdf}}, + +@InProceedings{boneh00, + author = {Dan Boneh and Moni Naor}, + title = {Timed Commitments}, + booktitle = {Advances in Cryptology -- {CRYPTO} 2000}, + pages = {236--254}, + year = 2000, + publisher = {Springer-Verlag, LNCS 1880}, + note = {\newline \url{http://crypto.stanford.edu/~dabo/abstracts/timedcommit.html}}, +} + +@InProceedings{goldschlag98, + author = {David M. Goldschlag and Stuart G. Stubblebine}, + title = {Publicly Verifiable Lotteries: Applications of + Delaying Functions}, + booktitle = {Financial Cryptography}, + pages = {214--226}, + year = 1998, + publisher = {Springer-Verlag, LNCS 1465}, + note = {\newline \url{http://citeseer.nj.nec.com/goldschlag98publicly.html}}, +} + +@InProceedings{syverson98, + author = {Paul Syverson}, + title = {Weakly Secret Bit Commitment: Applications to + Lotteries and Fair Exchange}, + booktitle = {Computer Security Foundations Workshop (CSFW11)}, + pages = {2--13}, + year = 1998, + address = {Rockport Massachusetts}, + month = {June}, + publisher = {IEEE CS Press}, + note = {\newline \url{http://chacs.nrl.navy.mil/publications/CHACS/1998/}}, +} + +@Misc{shoup-iso, + author = {Victor Shoup}, + title = {A Proposal for an {ISO} {S}tandard for Public Key Encryption (version 2.1)}, + note = {Revised December 20, 2001. \url{http://www.shoup.net/papers/}}, +} + +@Misc{shoup-oaep, + author = {Victor Shoup}, + title = {{OAEP} Reconsidered}, + howpublished = {{IACR} e-print 2000/060}, + note = {\newline \url{http://eprint.iacr.org/2000/060/}}, +} + +@Misc{oaep-still-alive, + author = {E. Fujisaki and D. Pointcheval and T. Okamoto and J. Stern}, + title = {{RSA}-{OAEP} is Still Alive!}, + howpublished = {{IACR} e-print 2000/061}, + note = {\newline \url{http://eprint.iacr.org/2000/061/}}, +} + +@misc{echolot, + author = {Peter Palfrader}, + title = {Echolot: a pinger for anonymous remailers}, + note = {\url{http://www.palfrader.org/echolot/}}, +} + +@Misc{mixmaster-attacks, + author = {Lance Cottrell}, + title = {Mixmaster and Remailer Attacks}, + note = {\url{http://www.obscura.com/~loki/remailer/remailer-essay.html}}, +} + +@Misc{mixmaster-spec, + author = {Ulf M{\"o}ller and Lance Cottrell and Peter + Palfrader and Len Sassaman}, + title = {Mixmaster {P}rotocol --- {V}ersion 2}, + year = {2003}, + month = {July}, + howpublished = {Draft}, + note = {\url{http://www.abditum.com/mixmaster-spec.txt}}, +} + +@InProceedings{puzzles-tls, + author = "Drew Dean and Adam Stubblefield", + title = {{Using Client Puzzles to Protect TLS}}, + booktitle = "Proceedings of the 10th USENIX Security Symposium", + year = {2001}, + month = Aug, + publisher = {USENIX}, +} + +@InProceedings{breadpudding, + author = {Markus Jakobsson and Ari Juels}, + title = {Proofs of Work and Bread Pudding Protocols}, + booktitle = {Proceedings of the IFIP TC6 and TC11 Joint Working + Conference on Communications and Multimedia Security + (CMS '99)}, + year = 1999, + month = {September}, + publisher = {Kluwer} +} + +@Misc{hashcash, + author = {Adam Back}, + title = {Hash cash}, + note = {\newline \url{http://www.cypherspace.org/~adam/hashcash/}}, +} + +@InProceedings{oreilly-acc, + author = {Roger Dingledine and Michael J. Freedman and David Molnar}, + title = {Accountability}, + booktitle = {Peer-to-peer: Harnessing the Benefits of a Disruptive + Technology}, + year = {2001}, + publisher = {O'Reilly and Associates}, +} + + +@InProceedings{han, + author = {Yongfei Han}, + title = {Investigation of non-repudiation protocols}, + booktitle = {ACISP '96}, + year = 1996, + publisher = {Springer-Verlag}, +} + + +@Misc{socks5, + key = {socks5}, + title = {{SOCKS} {P}rotocol {V}ersion 5}, + howpublished= {IETF RFC 1928}, + month = {March}, + year = 1996, + note = {\url{http://www.ietf.org/rfc/rfc1928.txt}} +} + +@InProceedings{abe, + author = {Masayuki Abe}, + title = {Universally Verifiable {MIX} With Verification Work Independent of + The Number of {MIX} Servers}, + booktitle = {{EUROCRYPT} 1998}, + year = {1998}, + publisher = {Springer-Verlag, LNCS 1403}, +} + +@InProceedings{desmedt, + author = {Yvo Desmedt and Kaoru Kurosawa}, + title = {How To Break a Practical {MIX} and Design a New One}, + booktitle = {{EUROCRYPT} 2000}, + year = {2000}, + publisher = {Springer-Verlag, LNCS 1803}, + note = {\url{http://citeseer.nj.nec.com/447709.html}}, +} + +@InProceedings{mitkuro, + author = {M. Mitomo and K. Kurosawa}, + title = {{Attack for Flash MIX}}, + booktitle = {{ASIACRYPT} 2000}, + year = {2000}, + publisher = {Springer-Verlag, LNCS 1976}, + note = {\newline \url{http://citeseer.nj.nec.com/450148.html}}, +} + +@InProceedings{hybrid-mix, + author = {M. Ohkubo and M. Abe}, + title = {A {L}ength-{I}nvariant {H}ybrid {MIX}}, + booktitle = {Advances in Cryptology - {ASIACRYPT} 2000}, + year = {2000}, + publisher = {Springer-Verlag, LNCS 1976}, +} + +@InProceedings{PShuffle, + author = {Jun Furukawa and Kazue Sako}, + title = {An Efficient Scheme for Proving a Shuffle}, + editor = {Joe Kilian}, + booktitle = {CRYPTO 2001}, + year = {2001}, + publisher = {Springer-Verlag, LNCS 2139}, +} + + +@InProceedings{jakobsson-optimally, + author = "Markus Jakobsson and Ari Juels", + title = "An Optimally Robust Hybrid Mix Network (Extended Abstract)", + booktitle = {Principles of Distributed Computing - {PODC} '01}, + year = "2001", + publisher = {ACM Press}, + note = {\url{http://citeseer.nj.nec.com/492015.html}}, +} + +@InProceedings{kesdogan, + author = {D. Kesdogan and M. Egner and T. B\"uschkes}, + title = {Stop-and-Go {MIX}es Providing Probabilistic Anonymity in an Open + System}, + booktitle = {Information Hiding (IH 1998)}, + year = {1998}, + publisher = {Springer-Verlag, LNCS 1525}, +} + %note = {\url{http://www.cl.cam.ac.uk/~fapp2/ihw98/ihw98-sgmix.pdf}}, + +@InProceedings{socks4, + author = {David Koblas and Michelle R. Koblas}, + title = {{SOCKS}}, + booktitle = {UNIX Security III Symposium (1992 USENIX Security + Symposium)}, + pages = {77--83}, + year = 1992, + publisher = {USENIX}, +} + +@InProceedings{flash-mix, + author = {Markus Jakobsson}, + title = {Flash {M}ixing}, + booktitle = {Principles of Distributed Computing - {PODC} '99}, + year = {1999}, + publisher = {ACM Press}, + note = {\newline \url{http://citeseer.nj.nec.com/jakobsson99flash.html}}, +} + +@InProceedings{SK, + author = {Joe Kilian and Kazue Sako}, + title = {Receipt-Free {MIX}-Type Voting Scheme - A Practical Solution to + the Implementation of a Voting Booth}, + booktitle = {EUROCRYPT '95}, + year = {1995}, + publisher = {Springer-Verlag}, +} + +@InProceedings{OAEP, + author = {M. Bellare and P. Rogaway}, + year = {1994}, + booktitle = {EUROCRYPT '94}, + title = {Optimal {A}symmetric {E}ncryption {P}adding : How To Encrypt With + {RSA}}, + publisher = {Springer-Verlag}, + note = {\newline \url{http://www-cse.ucsd.edu/users/mihir/papers/oaep.html}}, +} +@inproceedings{babel, + title = {Mixing {E}-mail With {B}abel}, + author = {Ceki G\"ulc\"u and Gene Tsudik}, + booktitle = {{Network and Distributed Security Symposium (NDSS 96)}}, + year = 1996, + month = {February}, + pages = {2--16}, + publisher = {IEEE}, +} + %note = {\url{http://citeseer.nj.nec.com/2254.html}}, + +@Misc{rprocess, + author = {RProcess}, + title = {Selective Denial of Service Attacks}, + note = {\newline \url{http://www.eff.org/pub/Privacy/Anonymity/1999\_09\_DoS\_remail\_vuln.html}}, +} + +@Article{remailer-history, + author = {Sameer Parekh}, + title = {Prospects for Remailers}, + journal = {First Monday}, + volume = {1}, + number = {2}, + month = {August}, + year = {1996}, + note = {\url{http://www.firstmonday.dk/issues/issue2/remailers/}}, +} + +@Article{chaum-mix, + author = {David Chaum}, + title = {Untraceable electronic mail, return addresses, and digital pseudo-nyms}, + journal = {Communications of the ACM}, + year = {1981}, + volume = {4}, + number = {2}, + month = {February}, +} + %note = {\url{http://www.eskimo.com/~weidai/mix-net.txt}}, + +@InProceedings{nym-alias-net, + author = {David Mazi\`{e}res and M. Frans Kaashoek}, + title = {{The Design, Implementation and Operation of an Email + Pseudonym Server}}, + booktitle = {$5^{th}$ ACM Conference on Computer and + Communications Security (CCS'98)}, + year = 1998, + publisher = {ACM Press}, +} + %note = {\newline \url{http://www.scs.cs.nyu.edu/~dm/}}, + +@InProceedings{tangler, + author = {Marc Waldman and David Mazi\`{e}res}, + title = {Tangler: A Censorship-Resistant Publishing System + Based on Document Entanglements}, + booktitle = {$8^{th}$ ACM Conference on Computer and + Communications Security (CCS-8)}, + pages = {86--135}, + year = 2001, + publisher = {ACM Press}, +} + %note = {\url{http://www.scs.cs.nyu.edu/~dm/}} + +@misc{neochaum, + author = {Tim May}, + title = {Payment mixes for anonymity}, + howpublished = {E-mail archived at + \url{http://\newline www.inet-one.com/cypherpunks/dir.2000.02.28-2000.03.05/msg00334.html}}, +} + +@misc{helsingius, + author = {J. Helsingius}, + title = {{\tt anon.penet.fi} press release}, + note = {\newline \url{http://www.penet.fi/press-english.html}}, +} + +@InProceedings{garay97secure, + author = {J. Garay and R. Gennaro and C. Jutla and T. Rabin}, + title = {Secure distributed storage and retrieval}, + booktitle = {11th International Workshop, WDAG '97}, + pages = {275--289}, + year = {1997}, + publisher = {Springer-Verlag, LNCS 1320}, + note = {\newline \url{http://citeseer.nj.nec.com/garay97secure.html}}, +} + +@InProceedings{PIK, + author = {C. Park and K. Itoh and K. Kurosawa}, + title = {Efficient anonymous channel and all/nothing election scheme}, + booktitle = {Advances in Cryptology -- {EUROCRYPT} '93}, + pages = {248--259}, + publisher = {Springer-Verlag, LNCS 765}, +} + +@Misc{pgpfaq, + key = {PGP}, + title = {{PGP} {FAQ}}, + note = {\newline \url{http://www.faqs.org/faqs/pgp-faq/}}, +} + +@Article{riordan-schneier, + author = {James Riordan and Bruce Schneier}, + title = {A Certified E-mail Protocol with No Trusted Third Party}, + journal = {13th Annual Computer Security Applications Conference}, + month = {December}, + year = {1998}, + note = {\newline \url{http://www.counterpane.com/certified-email.html}}, +} + + +@Article{crowds-tissec, + author = {Michael K. Reiter and Aviel D. Rubin}, + title = {Crowds: Anonymity for Web Transactions}, + journal = {ACM TISSEC}, + year = 1998, + volume = 1, + number = 1, + pages = {66--92}, + month = {June}, +} + %note = {\url{http://citeseer.nj.nec.com/284739.html}} + +@Article{crowds-dimacs, + author = {Michael K. Reiter and Aviel D. Rubin}, + title = {Crowds: Anonymity for Web Transactions}, + journal = {{DIMACS} Technical Report (Revised)}, + volume = {97}, + number = {15}, + month = {August}, + year = {1997}, +} + +@Misc{advogato, + author = {Raph Levien}, + title = {Advogato's Trust Metric}, + note = {\newline \url{http://www.advogato.org/trust-metric.html}}, +} + +@InProceedings{publius, + author = {Marc Waldman and Aviel Rubin and Lorrie Cranor}, + title = {Publius: {A} robust, tamper-evident, censorship-resistant and + source-anonymous web publishing system}, + booktitle = {Proc. 9th USENIX Security Symposium}, + pages = {59--72}, + year = {2000}, + month = {August}, +} + %note = {\newline \url{http://citeseer.nj.nec.com/waldman00publius.html}}, + +@Misc{freedom-nyms, + author = {Russell Samuels}, + title = {Untraceable Nym Creation on the {F}reedom {N}etwork}, + year = {1999}, + month = {November}, + day = {21}, + note = {\newline \url{http://www.freedom.net/products/whitepapers/white11.html}}, +} + +@techreport{freedom2-arch, + title = {Freedom Systems 2.0 Architecture}, + author = {Philippe Boucher and Adam Shostack and Ian Goldberg}, + institution = {Zero Knowledge Systems, {Inc.}}, + year = {2000}, + month = {December}, + type = {White Paper}, + day = {18}, +} + +@techreport{freedom21-security, + title = {Freedom Systems 2.1 Security Issues and Analysis}, + author = {Adam Back and Ian Goldberg and Adam Shostack}, + institution = {Zero Knowledge Systems, {Inc.}}, + year = {2001}, + month = {May}, + type = {White Paper}, +} + +@inproceedings{cfs:sosp01, + title = {Wide-area cooperative storage with {CFS}}, + author = {Frank Dabek and M. Frans Kaashoek and David Karger and Robert Morris and Ion Stoica}, + booktitle = {18th {ACM} {S}ymposium on {O}perating {S}ystems {P}rinciples ({SOSP} '01)}, + year = {2001}, + month = {October}, + address = {Chateau Lake Louise, Banff, Canada}, +} + +@inproceedings{SS03, + title = {Passive Attack Analysis for Connection-Based Anonymity Systems}, + author = {Andrei Serjantov and Peter Sewell}, + booktitle = {Computer Security -- ESORICS 2003}, + publisher = {Springer-Verlag, LNCS 2808}, + year = {2003}, + month = {October}, +} + %note = {\url{http://www.cl.cam.ac.uk/users/aas23/papers_aas/conn_sys.ps}}, + +@Misc{pk-relations, + author = {M. Bellare and A. Desai and D. Pointcheval and P. Rogaway}, + title = {Relations Among Notions of Security for Public-Key Encryption + Schemes}, + howpublished = { + Extended abstract in {\em Advances in Cryptology - CRYPTO '98}, LNCS Vol. 1462. + Springer-Verlag, 1998. + Full version available from \newline \url{http://www-cse.ucsd.edu/users/mihir/}}, +} + + +@InProceedings{mix-acc, + author = {Roger Dingledine and Michael J. Freedman and David + Hopwood and David Molnar}, + title = {{A Reputation System to Increase MIX-net + Reliability}}, + booktitle = {Information Hiding (IH 2001)}, + pages = {126--141}, + year = 2001, + editor = {Ira S. Moskowitz}, + publisher = {Springer-Verlag, LNCS 2137}, +} + %note = {\url{http://www.freehaven.net/papers.html}}, + +@InProceedings{casc-rep, + author = {Roger Dingledine and Paul Syverson}, + title = {{Reliable MIX Cascade Networks through Reputation}}, + booktitle = {Financial Cryptography}, + year = 2002, + editor = {Matt Blaze}, + publisher = {Springer-Verlag, LNCS 2357}, +} + %note = {\newline \url{http://www.freehaven.net/papers.html}}, + +@InProceedings{zhou96certified, + author = {Zhou and Gollmann}, + title = {Certified Electronic Mail}, + booktitle = {{ESORICS: European Symposium on Research in Computer + Security}}, + publisher = {Springer-Verlag, LNCS 1146}, + year = {1996}, + note = {\newline \url{http://citeseer.nj.nec.com/zhou96certified.html}}, +} + +@Misc{realtime-mix, + author = {Anja Jerichow and Jan M\"uller and Andreas Pfitzmann and + Birgit Pfitzmann and Michael Waidner}, + title = {{Real-Time MIXes: A Bandwidth-Efficient Anonymity Protocol}}, + howpublished = {IEEE Journal on Selected Areas in Communications, 1998.}, + note = {\url{http://www.zurich.ibm.com/security/publications/1998.html}}, +} + +@InProceedings{danezis:pet2003, + author = {George Danezis}, + title = {Mix-networks with Restricted Routes}, + booktitle = {Privacy Enhancing Technologies (PET 2003)}, + year = 2003, + editor = {Roger Dingledine}, + publisher = {Springer-Verlag LNCS 2760} +} + +@InProceedings{gap-pets03, + author = {Krista Bennett and Christian Grothoff}, + title = {{GAP} -- practical anonymous networking}, + booktitle = {Privacy Enhancing Technologies (PET 2003)}, + year = 2003, + editor = {Roger Dingledine}, + publisher = {Springer-Verlag LNCS 2760} +} + +@Article{hordes-jcs, + author = {Brian Neal Levine and Clay Shields}, + title = {Hordes: A Multicast-Based Protocol for Anonymity}, + journal = {Journal of Computer Security}, + year = 2002, + volume = 10, + number = 3, + pages = {213--240} +} + +@TechReport{herbivore, + author = {Sharad Goel and Mark Robson and Milo Polte and Emin G\"{u}n Sirer}, + title = {Herbivore: A Scalable and Efficient Protocol for Anonymous Communication}, + institution = {Cornell University Computing and Information Science}, + year = 2003, + type = {Technical Report}, + number = {TR2003-1890}, + month = {February} +} + +@InProceedings{p5, + author = {Rob Sherwood and Bobby Bhattacharjee and Aravind Srinivasan}, + title = {$P^5$: A Protocol for Scalable Anonymous Communication}, + booktitle = {IEEE Symposium on Security and Privacy}, + pages = {58--70}, + year = 2002, + publisher = {IEEE CS} +} + +@phdthesis{ian-thesis, + title = {A Pseudonymous Communications Infrastructure for the Internet}, + author = {Ian Goldberg}, + school = {UC Berkeley}, + year = {2000}, + month = {Dec}, +} + +@Article{taz, + author = {Ian Goldberg and David Wagner}, + title = {TAZ Servers and the Rewebber Network: Enabling + Anonymous Publishing on the World Wide Web}, + journal = {First Monday}, + year = 1998, + volume = 3, + number = 4, + month = {August}, + note = {\url{http://www.firstmonday.dk/issues/issue3_4/goldberg/}} +} + +@Misc{tcp-over-tcp-is-bad, + key = {tcp-over-tcp-is-bad}, + title = {Why {TCP} Over {TCP} Is A Bad Idea}, + author = {Olaf Titz}, + note = {\url{http://sites.inka.de/sites/bigred/devel/tcp-tcp.html}} +} + +@inproceedings{wright02, + title = {An Analysis of the Degradation of Anonymous Protocols}, + author = {Matthew Wright and Micah Adler and Brian Neil Levine and Clay Shields}, + booktitle = {{Network and Distributed Security Symposium (NDSS 02)}}, + year = {2002}, + month = {February}, + publisher = {IEEE}, +} + +@inproceedings{wright03, + title = {Defending Anonymous Communication Against Passive Logging Attacks}, + author = {Matthew Wright and Micah Adler and Brian Neil Levine and Clay Shields}, + booktitle = {IEEE Symposium on Security and Privacy}, + pages= {28--41}, + year = {2003}, + month = {May}, + publisher = {IEEE CS}, +} + + +@InProceedings{attack-tor-oak05, + author = {Steven J. Murdoch and George Danezis}, + title = {Low-cost Traffic Analysis of {T}or}, + booktitle = {IEEE Symposium on Security and Privacy}, + year = 2005, + month = {May}, + publisher = {IEEE CS} +} + +@Misc{jap-backdoor, + author={{The AN.ON Project}}, + howpublished={Press release}, + year={2003}, + month={September}, + title={German Police proceeds against anonymity service}, + note={\url{http://www.datenschutzzentrum.de/material/themen/presse/anon-bka_e.htm}} +} + +@article{shsm03, + title = {Using Caching for Browsing Anonymity}, + author = {Anna Shubina and Sean Smith}, + journal = {ACM SIGEcom Exchanges}, + volume = {4}, + number = {2}, + year = {2003}, + month = {Sept}, + note = {\url{http://www.acm.org/sigs/sigecom/exchanges/volume_4_(03)/4.2-Shubina.pdf}}, +} + +@inproceedings{tor-design, + title = {Tor: The Second-Generation Onion Router}, + author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, + booktitle = {Proceedings of the 13th USENIX Security Symposium}, + year = {2004}, + month = {August}, + note = {\url{https://www.torproject.org/tor-design.pdf}} +} + +@inproceedings{flow-correlation04, + title = {On Flow Correlation Attacks and Countermeasures in Mix Networks}, + author = {Ye Zhu and Xinwen Fu and Bryan Graham and Riccardo Bettati and Wei Zhao}, + booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2004)}, + year = {2004}, + month = {May}, + series = {LNCS}, + note = {\url{http://students.cs.tamu.edu/xinwenfu/paper/PET04.pdf}}, +} + +@InProceedings{danezis:pet2004, + author = "George Danezis", + title = "The Traffic Analysis of Continuous-Time Mixes", + booktitle= {Privacy Enhancing Technologies (PET 2004)}, + editor = {David Martin and Andrei Serjantov}, + month = {May}, + year = {2004}, + series = {LNCS}, + note = {\url{http://www.cl.cam.ac.uk/users/gd216/cmm2.pdf}}, +} + +@inproceedings{feamster:wpes2004, + title = {Location Diversity in Anonymity Networks}, + author = {Nick Feamster and Roger Dingledine}, + booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2004)}}, + year = {2004}, + month = {October}, + address = {Washington, DC, USA}, + note = {\url{http://freehaven.net/doc/routing-zones/routing-zones.ps}}, +} + +@inproceedings{koepsell:wpes2004, + title = {How to Achieve Blocking Resistance for Existing Systems Enabling Anonymous Web Surfing}, + author = {Stefan K\"opsell and Ulf Hilling}, + booktitle = {{Proceedings of the Workshop on Privacy in the Electronic Society (WPES 2004)}}, + year = {2004}, + month = {October}, + address = {Washington, DC, USA}, + note = {\url{http://freehaven.net/anonbib/papers/p103-koepsell.pdf}}, +} + +@inproceedings{sync-batching, + title = {Synchronous Batching: From Cascades to Free Routes}, + author = {Roger Dingledine and Vitaly Shmatikov and Paul Syverson}, + booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2004)}, + editor = {David Martin and Andrei Serjantov}, + year = {2004}, + month = {May}, + series = {LNCS}, + note = {\url{http://freehaven.net/doc/sync-batching/sync-batching.pdf}}, +} + +@InProceedings{e2e-traffic, + author = "Nick Mathewson and Roger Dingledine", + title = "Practical Traffic Analysis: Extending and Resisting Statistical Disclosure", + booktitle= {Privacy Enhancing Technologies (PET 2004)}, + editor = {David Martin and Andrei Serjantov}, + month = {May}, + year = {2004}, + series = {LNCS}, + note = {\url{http://freehaven.net/doc/e2e-traffic/e2e-traffic.pdf}}, +} + +@Misc{dtls, + author = {E. Rescorla and N. Modadugu}, + title = {{Datagram Transport Layer Security}}, + howpublished = {IETF Draft}, + month = {December}, + year = {2003}, + note = {\url{http://www.ietf.org/internet-drafts/draft-rescorla-dtls-02.txt}}, +} + +@InProceedings{usability-network-effect, + author={Roger Dingledine and Nick Mathewson}, + title={Anonymity Loves Company: Usability and the Network Effect}, + booktitle = {Designing Security Systems That People Can Use}, + year = {2005}, + publisher = {O'Reilly Media}, +} + +@inproceedings{usability:weis2006, + title = {Anonymity Loves Company: Usability and the Network Effect}, + author = {Roger Dingledine and Nick Mathewson}, + booktitle = {Proceedings of the Fifth Workshop on the Economics of Information Security + (WEIS 2006)}, + year = {2006}, + month = {June}, + address = {Cambridge, UK}, + bookurl = {http://weis2006.econinfosec.org/}, + note = {\url{http://freehaven.net/doc/wupss04/usability.pdf}}, +} + +@Misc{six-four, + key = {six-four}, + title = {{The Six/Four System}}, + note = {\url{http://sourceforge.net/projects/sixfour/}} +} + +@inproceedings{clayton:pet2006, + title = {Ignoring the Great Firewall of China}, + author = {Richard Clayton and Steven J. Murdoch and Robert N. M. Watson}, + booktitle = {Proceedings of the Sixth Workshop on Privacy Enhancing Technologies (PET 2006)}, + year = {2006}, + month = {June}, + address = {Cambridge, UK}, + publisher = {Springer}, + bookurl = {http://petworkshop.org/2006/}, + note = {\url{http://www.cl.cam.ac.uk/~rnc1/ignoring.pdf}}, +} + +@Misc{zuckerman-threatmodels, + key = {zuckerman-threatmodels}, + title = {We've got to adjust some of our threat models}, + author = {Ethan Zuckerman}, + note = {\url{http://www.ethanzuckerman.com/blog/?p=1019}} +} + +@Misc{cgiproxy, + key = {cgiproxy}, + title = {{CGIProxy: HTTP/FTP Proxy in a CGI Script}}, + author = {James Marshall}, + note = {\url{http://www.jmarshall.com/tools/cgiproxy/}} +} + +@Misc{circumventor, + key = {circumventor}, + title = {{How to install the Circumventor program}}, + author = {Bennett Haselton}, + note = {\url{http://www.peacefire.org/circumventor/simple-circumventor-instructions.html}} +} + +@Misc{psiphon, + key = {psiphon}, + title = {Psiphon}, + author = {Ronald Deibert et al}, + note = {\url{http://psiphon.civisec.org/}} +} + +@InProceedings{tcpstego, author = {Steven J. Murdoch and Stephen Lewis}, + title = {Embedding Covert Channels into {TCP/IP}}, + booktitle = {Information Hiding: 7th International Workshop}, + pages = {247--261}, + year = {2005}, + editor = {Mauro Barni and Jordi Herrera-Joancomart\'{\i} and +Stefan Katzenbeisser and Fernando P\'{e}rez-Gonz\'{a}lez}, + volume = {3727}, + series = {LNCS}, + address = {Barcelona, Catalonia (Spain)}, + month = {June}, + publisher = {Springer-Verlag}, + url = {http://www.cl.cam.ac.uk/~sjm217/papers/ih05coverttcp.pdf} +} + +@phdthesis{blossom-thesis, + title = {Perspective Access Networks}, + author = {Geoffrey Goodell}, + school = {Harvard University}, + year = {2006}, + month = {July}, + note = {\url{http://afs.eecs.harvard.edu/~goodell/thesis.pdf}}, +} + +@inproceedings{tap:pet2006, + title = {On the Security of the Tor Authentication Protocol}, + author = {Ian Goldberg}, + booktitle = {Proceedings of the Sixth Workshop on Privacy Enhancing Technologies (PET 2006)}, + year = {2006}, + month = {June}, + address = {Cambridge, UK}, + publisher = {Springer}, + bookurl = {http://petworkshop.org/2006/}, + note = {\url{http://www.cypherpunks.ca/~iang/pubs/torsec.pdf}}, +} + +@inproceedings{rep-anon, + title = {{Reputation in P2P Anonymity Systems}}, + author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, + booktitle = {Proceedings of Workshop on Economics of Peer-to-Peer Systems}, + year = {2003}, + month = {June}, + note = {\url{http://freehaven.net/doc/econp2p03/econp2p03.pdf}}, +} + +@misc{tor-challenges, + author = {Roger Dingledine and Nick Mathewson and Paul Syverson}, + title = {Challenges in deploying low-latency anonymity}, + year = {2005}, + note = {Manuscript} +} + +@InProceedings{chaum-blind, + author = {David Chaum}, + title = {Blind Signatures for Untraceable Payments}, + booktitle = {Advances in Cryptology: Proceedings of Crypto 82}, + pages = {199--203}, + year = 1983, + editor = {D. Chaum and R.L. Rivest and A.T. Sherman}, + publisher = {Plenum Press} +} + +@Article{netauth, + author = {Geoffrey Goodell and Paul Syverson}, + title = {The Right Place at the Right Time: Examining the use of network location in authentication and abuse prevention}, + journal = {Communications of the ACM}, + year = 2007, + volume = 50, + number = 5, + pages = {113--117}, + month = {May} +} + +@misc{ip-to-country, + key = {ip-to-country}, + title = {IP-to-country database}, + note = {\url{http://ip-to-country.webhosting.info/}}, +} + +@misc{mackinnon-personal, + author = {Rebecca MacKinnon}, + title = {Private communication}, + year = {2006}, +} + +@inproceedings{pet05-bissias, + title = {Privacy Vulnerabilities in Encrypted HTTP Streams}, + author = {George Dean Bissias and Marc Liberatore and Brian Neil Levine}, + booktitle = {Proceedings of Privacy Enhancing Technologies workshop (PET 2005)}, + year = {2005}, + month = {May}, + note = {\url{http://prisms.cs.umass.edu/brian/pubs/bissias.liberatore.pet.2005.pdf}}, +} + +@InProceedings{infranet, + author = {Nick Feamster and Magdalena Balazinska and Greg Harfst and Hari Balakrishnan and David Karger}, + title = {Infranet: Circumventing Web Censorship and Surveillance}, + booktitle = {Proceedings of the 11th USENIX Security Symposium}, + year = {2002}, + month = {August}, + note = {\url{http://nms.lcs.mit.edu/~feamster/papers/usenixsec2002.pdf}}, +} + +@techreport{ ptacek98insertion, + author = "Thomas H. Ptacek and Timothy N. Newsham", + title = "Insertion, Evasion, and Denial of Service: Eluding Network Intrusion Detection", + institution = "Secure Networks, Inc.", + address = "Suite 330, 1201 5th Street S.W, Calgary, Alberta, Canada, T2R-0Y6", + year = "1998", + url = "citeseer.ist.psu.edu/ptacek98insertion.html", +} + +@inproceedings{active-wardens, + author = "Gina Fisk and Mike Fisk and Christos Papadopoulos and Joshua Neil", + title = "Eliminating Steganography in Internet Traffic with Active Wardens", + booktitle = {Information Hiding Workshop (IH 2002)}, + year = {2002}, + month = {October}, + editor = {Fabien Petitcolas}, + publisher = {Springer-Verlag, LNCS 2578}, +} + +@inproceedings{clog-the-queue, + title = {Don't Clog the Queue: Circuit Clogging and Mitigation in {P2P} anonymity schemes}, + author = {Jon McLachlan and Nicholas Hopper}, + booktitle = {Proceedings of Financial Cryptography (FC '08)}, + year = {2008}, + month = {January}, +} + +@inproceedings{snader08, + title = {A Tune-up for {Tor}: Improving Security and Performance in the {Tor} Network}, + author = {Robin Snader and Nikita Borisov}, + booktitle = {Proceedings of the Network and Distributed Security Symposium - {NDSS} '08}, + year = {2008}, + month = {February}, + publisher = {Internet Society}, +} + +@inproceedings{murdoch-pet2008, + title = {Metrics for Security and Performance in Low-Latency Anonymity Networks}, + author = {Steven J. Murdoch and Robert N. M. Watson}, + booktitle = {Proceedings of the Eighth International Symposium on Privacy Enhancing Technologies (PETS 2008)}, + year = {2008}, + month = {July}, + address = {Leuven, Belgium}, + pages = {115--132}, + editor = {Nikita Borisov and Ian Goldberg}, + publisher = {Springer}, + bookurl = {http://petsymposium.org/2008/}, +} + +@inproceedings{danezis-pet2008, + title = {Bridging and Fingerprinting: Epistemic Attacks on Route Selection}, + author = {George Danezis and Paul Syverson}, + booktitle = {Proceedings of the Eighth International Symposium on Privacy Enhancing Technologies (PETS 2008)}, + year = {2008}, + month = {July}, + address = {Leuven, Belgium}, + pages = {133--150}, + editor = {Nikita Borisov and Ian Goldberg}, + publisher = {Springer}, + bookurl = {http://petsymposium.org/2008/}, +} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "tor-design" +%%% End: diff --git a/2006/blocking/usenixsubmit.cls b/2006/blocking/usenixsubmit.cls new file mode 100644 index 0000000..743ffcf --- /dev/null +++ b/2006/blocking/usenixsubmit.cls @@ -0,0 +1,7 @@ +% Created by Anil Somayaji + +\ProvidesClass{usenixsubmit} +\LoadClass[11pt,letterpaper]{article} +\usepackage{times} +\usepackage[margin=1in]{geometry} +

1 0

[tech-reports/master] Add overhead-directory-info blog post.
by karsten＠torproject.org 31 Aug '12

31 Aug '12

commit 51e3396a95fc38a1b07998811a65d18926570b15 Author: Karsten Loesing <karsten.loesing(a)gmx.net> Date: Thu Aug 30 11:13:27 2012 +0200 Add overhead-directory-info blog post. --- 2009/overhead-directory-info/.gitignore | 3 + .../overhead-directory-info.bib | 11 ++ .../overhead-directory-info.tex | 178 ++++++++++++++++++++ 2009/overhead-directory-info/tortechrep.cls | 1 + 4 files changed, 193 insertions(+), 0 deletions(-) diff --git a/2009/overhead-directory-info/.gitignore b/2009/overhead-directory-info/.gitignore new file mode 100644 index 0000000..b91e2a9 --- /dev/null +++ b/2009/overhead-directory-info/.gitignore @@ -0,0 +1,3 @@ +overhead-directory-info.pdf +overhead-directory-info-2009-02-16.pdf + diff --git a/2009/overhead-directory-info/overhead-directory-info.bib b/2009/overhead-directory-info/overhead-directory-info.bib new file mode 100644 index 0000000..20bf985 --- /dev/null +++ b/2009/overhead-directory-info/overhead-directory-info.bib @@ -0,0 +1,11 @@ +@techreport{loesing2009dirarch, + title = {Measuring the Tor Network, Evaluation of Relays from Public + Directory Data}, + author = {Karsten Loesing}, + institution = {The Tor Project}, + year = {2009}, + month = {June}, + number = {2009-06-001}, + note = {\url{https://research.torproject.org/techreports/dirarch-2009-06-22.pdf}}, +} + diff --git a/2009/overhead-directory-info/overhead-directory-info.tex b/2009/overhead-directory-info/overhead-directory-info.tex new file mode 100644 index 0000000..f0e53f1 --- /dev/null +++ b/2009/overhead-directory-info/overhead-directory-info.tex @@ -0,0 +1,178 @@ +\documentclass{tortechrep} +\begin{document} + +\author{Roger Dingledine} +\contact{arma(a)torproject.org} +\reportid{2009-02-001} +\date{February 16, 2009} +\title{Overhead from directory info: past, present, future} +\maketitle + +\section{Introduction} + +A growing number of people want to use Tor in low-bandwidth contexts +(e.g.\ modems or shared Internet cafes in the Middle East) and mobile +contexts (start up a Tor client, use it for a short time, and then stop it +again). +Currently Tor is nearly unusable in these situations, because it spends +too many bytes fetching directory info. +This report summarizes the steps we've taken so far to reduce directory +overhead, and explains the steps that are coming next. +First, what do I mean by ``directory info''? +Part of the Tor design is the \emph{discovery} component: how clients +learn about the available Tor relays, along with their keys, locations, +exit policies, and so on. +Tor's solution so far uses a few trusted directory authorities that sign +and distribute official lists of the relays that make up the Tor network. + +\section{History of v1, v2, v3 dir protocols} + +Over the years we've had several different ``directory protocols'', each +more bandwidth-friendly than the last, and often providing stronger +security properties as well. +In Tor's first directory design% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/attic/dir-spec-v1.txt}} +(Sept 2002), each authority created its own list of every relay +descriptor, as one flat text file. +A short summary of relay status at the top of the file told clients which +relays were reachable. +Every Tor client fetched a copy from an authority every 10 minutes. + +Tor 0.0.8 (Aug 2004) introduced ``directory caches'', where normal relays +would fetch a copy of the directory and serve it to others, to take some +burden off the authorities. +Tor 0.0.9 (Dec 2004) let clients download ``running routers'' status +summaries separately from the main directory, so they could keep more +up-to-date on reachability without needing to refetch all the descriptors. +It also added zlib-style compression during transfer. +At this point everybody fetched the whole directory every hour and the +running-routers document every 15 minutes. + +There were two big flaws with the v1 directory scheme: a security problem +and an overhead problem. +The security problem was that even though there were three authorities, +you just went with the most recent opinion you could find, so a single +evil authority could screw everybody. +The overhead problem was that clients were fetching a new directory even +when very little of it had changed. +In Dec 2004 there were 57 relays and the uncompressed directory was 172KB; +but by May 2006 we were up to 749 relays and the full directory was almost +1MB compressed. +Even though we'd lowered the period for fetching new copies to 2~hours +(20~minutes for caches), this was not good. + +We introduced the v2 directory design% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec-v2.txt}} +in Tor 0.1.1.20 in May 2006. +Each authority now produced its own ``network status'' document, which +listed brief summaries of each relay along with a hash of the current +relay descriptor. +Clients fetched all the status documents (there were 5 authorities by now) +and ignored relays listed by less than half of them. +Clients only fetched the relay descriptors they were missing. +Once bootstrapped, clients fetched one new status document (round robin) +per hour. +Peter Palfrader produced a graph of bandwidth needed to bootstrap and then +keep up-to-date over a day.% +\footnote{As of August 30, 2012, this graph file cannot be found anymore.} + +All of the improvements so far were oriented toward saving bandwidth at +the server side: we figured that clients had plenty of bandwidth, and we +wanted to avoid overloading the authorities and caches. +But if we wanted to add more directory authorities (a majority of 5 is +still an uncomfortably small number), bootstrapping clients would have to +fetch one more network status for every new authority. +By early 2008, each status document listed 2500 relay summaries and came +in around 175KB compressed, meaning you needed 875KB of status docs when starting up, and then another megabyte of descriptors after that. And we couldn't add more authorities without making the problem even worse. + +The v3 directory design% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt}} +in Tor 0.2.0.30 (Jul 2008) solved this last problem by having the +authorities coordinate and produce an hourly ``consensus'' network status +document, signed by a majority of the six v3 authorities. +Now clients only have to fetch one status document. + +\section{Next fixes} + +Based in part on sponsorship by NLnet% +\footnote{\url{http://www.nlnet.nl/}} +to reduce Tor's directory overhead, there are two directions we're +exploring now: reducing relay download overhead and reducing consensus +networkstatus download overhead. + +We've cut relay descriptor size by 60\% by moving some of the descriptor +info to separate ``extra-info'' descriptors that clients don't need to +fetch,% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/proposals/104-short-descriptors.txt}} +and we've cut the consensus size by 40\% by leaving out non-Running +relays% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/proposals/138-remove-down-routers-from-consensus.txt}} +(since clients won't fetch descriptors for them anyway). + +We spent the second half of 2008 working on a much more radical design +change% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/proposals/141-jit-sd-downloads.txt}} +where we move all the descriptor components that are used for path +selection into the consensus, and then clients would download each relay +descriptor ``just in time'' as part of circuit extension. +This design would have two huge benefits: a) clients download zero +descriptors at startup, greatly speeding bootstrapping, and b) the total +number of relay descriptor downloads would be based on the number of +circuits built, regardless of how many relays are in the network. + +Alas, we have backed off on this proposal: Karsten Loesing's work on +hidden service performance concluded that the circuit extension is the +main performance killer, and our ``just in time'' download proposal would +add more round-trips and complexity into exactly that step. +So we have a new plan: we're still going to move all the path-selection +information into the consensus, but then we're going to put the remaining +pieces into a new microdescriptor% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/proposals/158-microdescriptors.txt}} +that will hopefully change at most weekly. +That means the initial bootstrap costs will still be there (though the +microdescriptor is maybe a third the size of the normal descriptor), but +so long as you can keep a disk cache, descriptor maintenance will be +reduced to roughly zero. +We're aiming to roll this change out in Tor 0.2.3.x, in late 2010. + +We also have plans to further reduce the consensus download overhead. +Since the consensus doesn't actually change that much from one hour to the +next, clients should fetch consensus diffs% +\footnote{\url{https://gitweb.torproject.org/torspec.git/blob/HEAD:/proposals/140-consensus-diffs.txt}} +rather than fetching a whole new consensus. +We could expect another 80\% reduction in size here. +We hope to roll this step out in mid to late 2009. +Alas, this goal is stymied by the fact that we haven't found any small +portable BSD-licensed C diff libraries.% +\footnote{\url{https://lists.torproject.org/pipermail/tor-dev/2008-June/001533.html}} +Anybody know one? + +So these two changes together mean an initial bootstrap cost of maybe +100KB+300KB, and then a maintenance cost of maybe 20KB/hour. +But actually, once we've gotten the maintenance level so low, we should +think about updating the consensus more often than once an hour. +The goal would be to get relays that change IP addresses back into action +as soon as possible---currently it takes 2 to 4 hours before a new relay +(or a relay with a new location) gets noticed by clients. +Since one-third of Tor relays run on dynamic IP +addresses~\cite{loesing2009dirarch}, bringing that level down to 30 to +60~minutes could mean a lot more network capacity. + +Down the road, there are even more radical design changes to consider. +One day there will be too many relays for every client to know about all +of them, so we will want to partition the network (see Section~4.4 of +Tor's development roadmap% +\footnote{\url{https://svn.torproject.org/svn/projects/roadmaps/2008-12-19-roadmap-full.pdf}}). +When we do that, we can bound the amount of directory info that each +client has to maintain. +Another promising idea is to figure out ways to let clients build paths +through the network while requiring less information about each relay. +There's definitely a tradeoff here between centralized coordination (which +is easier to design, and can more easily provide good anonymity +properties) and scaling to many tens of thousands of relays. +But that, as they say, is a story for another time. + +\bibliography{overhead-directory-info} + +\end{document} + diff --git a/2009/overhead-directory-info/tortechrep.cls b/2009/overhead-directory-info/tortechrep.cls new file mode 120000 index 0000000..4c24db2 --- /dev/null +++ b/2009/overhead-directory-info/tortechrep.cls @@ -0,0 +1 @@ +../../tortechrep.cls \ No newline at end of file

1 0