[tor-commits] [bridgedb/master] Add t.i.process.protocols for handling Tor exit-list download.

isis at torproject.org isis at torproject.org
Sat Mar 21 02:02:56 UTC 2015


commit 59c0772f275debce5df690a3bac919484b4ae1e2
Author: Isis Lovecruft <isis at torproject.org>
Date:   Tue Nov 5 01:32:58 2013 +0000

    Add t.i.process.protocols for handling Tor exit-list download.
    
     * ADD bridgedb.proxy module for handling open proxy lists.
    
     * ADD bridgedb.proxy.ExitListProtocol class for asyncronously processing the
       results read from an HTTPS connection to a remote TorBulkExitList.py.
    
     * ADD bridgedb.proxy.ProxySet for storing a MutableSet of proxy addresses,
       which functions better than bridgedb.Main.ProxyCatagory because it will not
       allow duplicates to be stored. It also enables us to add its methods as
       callbacks on deferreds, meansing that the ExitListProtocol can callback its
       protocol's deferred with a ProxySet.addExitRelays(), which automatically
       parses and adds the IP:port pairs as they are received from the wire, and
       adds them to the ProxySet's MutableSet if they do not already exist.
    
    The ProxySet class can also store whether an IP:port pair is a known Tor exit
    relay (with the ``ProxySet.addExitRelay`` method mentioned above), this is
    *most* of the fix for #4771.
    
     * FIXES #4771: bridgedb's list of tor exit relays is down since bulk exit
       list is down.
    
     * ADD several IP address parsers/validators to bridgedb/parse/__init__.py,
       because I wasn't sure yet if they should be their own module. These checks
       are improved versions of the old IP address checks in bridgedb.Bridges,
       because they do not use regexes, and also check that IPv6 addresses are not
       site-local.
---
 lib/bridgedb/proxy.py           |  466 ++++++++++++++++++++++++++++++++
 lib/bridgedb/test/test_proxy.py |  560 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 1026 insertions(+)

diff --git a/lib/bridgedb/proxy.py b/lib/bridgedb/proxy.py
new file mode 100644
index 0000000..d954213
--- /dev/null
+++ b/lib/bridgedb/proxy.py
@@ -0,0 +1,466 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis at torproject.org>
+#           please also see AUTHORS file
+# :copyright: (c) 2013-2015 Isis Lovecruft
+#             (c) 2007-2015, The Tor Project, Inc.
+#             (c) 2007-2015, all entities within the AUTHORS file
+# :license: 3-clause BSD, see included LICENSE for information
+
+"""Classes for finding and managing lists of open proxies."""
+
+from __future__ import print_function
+from collections import MutableSet
+from functools import update_wrapper
+from functools import wraps
+
+import ipaddr
+import logging
+import os
+import time
+
+from twisted.internet import defer
+from twisted.internet import protocol
+from twisted.internet import reactor
+from twisted.internet import utils as txutils
+from bridgedb.runner import find
+from bridgedb.parse.addr import isIPAddress
+
+
+def downloadTorExits(proxyList, ipaddress, port=443, protocol=None):
+    """Run a script which downloads a list of Tor exit relays which allow their
+    clients to exit to the given **ipaddress** and **port**.
+
+    :param proxyList: The :class:`ProxySet` instance from :mod:`bridgedb.Main`.
+    :param str ipaddress: The IP address that each Tor exit relay should be
+        capable of connecting to for clients, as specified by its ExitPolicy.
+    :param int port: The port corresponding to the above **ipaddress** that
+        each Tor exit relay should allow clients to exit to. (See
+        https://check.torproject.org/cgi-bin/TorBulkExitList.py.)
+    :type protocol: :api:`twisted.internet.protocol.Protocol`
+    :param protocol: A :class:`~bridgedb.proxy.ExitListProtocol`, or any other
+        :api:`~twisted.internet.protocol.Protocol` implementation for
+        processing the results of a process which downloads a list of Tor exit
+        relays. This parameter is mainly meant for use in testing, and should
+        not be changed.
+    :rtype: :class:`~twisted.internet.defer.Deferred`
+    :returns: A deferred which will callback with a list, each item in the
+        list is a string containing an IP of a Tor exit relay.
+    """
+    proto = ExitListProtocol() if protocol is None else protocol()
+    args = [proto.script, '--stdout', '-a', ipaddress, '-p', str(port)]
+    proto.deferred.addCallback(proxyList.addExitRelays)
+    proto.deferred.addErrback(logging.exception)
+    transport = reactor.spawnProcess(proto, proto.script, args=args, env={})
+    return proto.deferred
+
+def loadProxiesFromFile(filename, proxySet=None, removeStale=False):
+    """Load proxy IP addresses from a list of files.
+
+    :param str filename: A filename whose path can be either absolute or
+        relative to the current working directory. The file should contain the
+        IP addresses of various open proxies, one per line, for example::
+
+            11.11.11.11
+            22.22.22.22
+            123.45.67.89
+
+    :type proxySet: None or :class:`~bridgedb.proxy.ProxySet`.
+    :param proxySet: If given, load the addresses read from the files into
+        this ``ProxySet``.
+    :param bool removeStale: If ``True``, remove proxies from the **proxySet**
+        which were not listed in any of the **files**.
+        (default: ``False``)
+    :returns: A list of all the proxies listed in the **files* (regardless of
+        whether they were added or removed).
+    """
+    logging.info("Reloading proxy lists...")
+
+    addresses = []
+    if proxySet:
+        oldProxySet = proxySet.copy()
+
+    with open(filename, 'r') as proxyFile:
+        for line in proxyFile.readlines():
+            line = line.strip()
+            if proxySet:
+                # ProxySet.add() will validate the IP address
+                if proxySet.add(line, tag=filename):
+                    logging.info("Added %s to the proxy list." % line)
+                    addresses.append(line)
+            else:
+                ip = isIPAddress(line)
+                if ip:
+                    addresses.append(ip)
+
+    if proxySet:
+        stale = list(oldProxySet.difference(addresses))
+
+        if removeStale:
+            for ip in stale:
+                if proxySet.getTag(ip) == filename:
+                    logging.info("Removing stale IP %s from proxy list." % ip)
+                    proxySet.remove(ip)
+                else:
+                    logging.info("Tag %s didn't match %s"
+                                 % (proxySet.getTag(ip), filename))
+
+    return addresses
+
+
+class ProxySet(MutableSet):
+    """A :class:`collections.MutableSet` for storing validated IP addresses."""
+
+    #: A tag to apply to IP addresses within this ``ProxySet`` which are known
+    #: Tor exit relays.
+    _exitTag = 'exit_relay'
+
+    def __init__(self, proxies=dict()):
+        """Initialise a ``ProxySet``.
+
+        :type proxies: A tuple, list, dict, or set.
+        :param proxies: Optionally, initialise with an iterable, ``proxies``.
+            For each ``item`` in that iterable, ``item`` must either:
+                1. be a string or int representing an IP address, or,
+                2. be another iterable, whose first item satisfies #1.
+        """
+        super(ProxySet, self).__init__()
+        self._proxydict = dict()
+        self._proxies = set()
+        self.addProxies(proxies)
+
+    @property
+    def proxies(self):
+        """All proxies in this set, regardless of tags."""
+        return list(self._proxies)
+
+    @property
+    def exitRelays(self):
+        """Get all proxies in this ``ProxySet`` tagged as Tor exit relays.
+
+        :rtype: set
+        :returns: A set of all known Tor exit relays which are contained
+            within this :class:`~bridgedb.proxy.ProxySet`.
+        """
+        return self.getAllWithTag(self._exitTag)
+
+    def __add__(self, ip=None, tag=None):
+        """Add an **ip** to this set, with an optional **tag**.
+
+        This has no effect if the **ip** is already present.  The **ip** is
+        only added if it passes the checks in
+        :func:`~bridgedb.parse.addr.isIPAddress`.
+
+        :type ip: basestring or int
+        :param ip: The IP address to add.
+        :param tag: An optional value to link to **ip**. If not given, it will
+            be a timestamp (seconds since epoch, as a float) for when **ip**
+            was first added to this set.
+        :rtype: bool
+        :returns: ``True`` if **ip** is in this set; ``False`` otherwise.
+        """
+        ip = isIPAddress(ip)
+        if ip:
+            if self._proxies.isdisjoint(set(ip)):
+                self._proxies.add(ip)
+                self._proxydict[ip] = tag if tag else time.time()
+                return True
+        return False
+
+    def __radd__(self, *args, **kwargs): self.__add__(*args, **kwargs)
+
+    def __contains__(self, ip):
+        """x.__contains__(y) <==> y in x.
+
+        :type ip: basestring or int
+        :param ip: The IP address to check.
+        :rtype: boolean
+        :returns: True if ``ip`` is in this set; False otherwise.
+        """
+        ipset = [isIPAddress(ip),]
+        if ipset and len(self._proxies.intersection(ipset)) == len(ipset):
+            return True
+        return False
+
+    def __sub__(self, ip):
+        """Entirely remove **ip** from this set.
+
+        :type ip: basestring or int
+        :param ip: The IP address to remove.
+        """
+        try:
+            self._proxydict.pop(ip)
+            self._proxies.discard(ip)
+        except KeyError:
+            pass
+
+    def __rsub__(self, *args, **kwargs): raise NotImplemented
+
+    def _getErrorMessage(self, x=None, y=None):
+        """Make an error message describing how this class works."""
+        message = """\nParameter 'proxies' must be one of:
+    - a {1} of {0}
+    - a {2} of {0}
+    - a {3}, whose keys are {0} (the values can be anything)
+    - a {4} of {1}s, whose first object in each {1} must be a {0}
+    - a {4} of {0}
+        """.format(type(''), type(()), type([]), type({}), type(set(())))
+        end = "You gave: a {0}".format(type(y))
+        end += " of {0}".format(type(x))
+        return os.linesep.join((message, end))
+
+    def addProxies(self, proxies, tag=None):
+        """Add proxies to this set.
+
+        This calls :func:`add` for each item in the iterable **proxies**.
+        Each proxy, if added, will be tagged with a current timestamp.
+
+        :type proxies: A tuple, list, dict, or set.
+        :param proxies: An iterable.  For each ``item`` in that iterable,
+            ``item`` must either:
+                1. be a string or int representing an IP address, or,
+                2. be another iterable, whose first item satisfies #1.
+        :keyword tag: An optional value to link to all untagged
+            **proxies**. If ``None``, it will be a timestamp (seconds since
+            epoch, as a float) for when the proxy was first added to this set.
+        """
+        if isinstance(proxies, dict):
+            [self.add(ip, value) for (ip, value) in proxies.items()]
+        else:
+            try:
+                for x in proxies:
+                    if isinstance(x, (tuple, list, set)):
+                        if len(x) == 2:   self.add(x[0], x[1])
+                        elif len(x) == 1: self.add(x, tag)
+                        else: raise ValueError(self._getErrorMessage(x, proxies))
+                    elif isinstance(x, (basestring, int)):
+                        self.add(x, tag)
+                    else:
+                        raise ValueError(self._getErrorMessage(x, proxies))
+            except TypeError:
+                raise ValueError(self._getErrorMessage(proxies, None))
+
+    @wraps(addProxies)
+    def addExitRelays(self, relays):
+        logging.info("Loading exit relays into proxy list...")
+        [self.add(x, self._exitTag) for x in relays]
+
+    def getTag(self, ip):
+        """Get the tag for an **ip** in this ``ProxySet``, if available.
+
+        :type ip: basestring or int
+        :param ip: The IP address to obtain the tag for.
+        :rtype: ``None`` or basestring or int
+        :returns: The tag for that **ip**, iff **ip** exists in this
+            ``ProxySet`` and it has a tag.
+        """
+        return self._proxydict.get(ip)
+
+    def getAllWithTag(self, tag):
+        """Get all proxies in this ``ProxySet`` with a given tag.
+
+        :param basestring tag: A tag to search for.
+        :rtype: set
+        :returns: A set of all proxies which are contained within this
+            :class:`~bridgedb.proxy.ProxySet` which are also tagged with
+            **tag**.
+        """
+        return set([key for key, value in filter(lambda x: x[1] == tag,
+                                                 self._proxydict.items())])
+
+    def firstSeen(self, ip):
+        """Get the timestamp when **ip** was first seen, if available.
+
+        :type ip: basestring or int
+        :param ip: The IP address to obtain a timestamp for.
+        :rtype: float or None
+        :returns: The timestamp (in seconds since epoch) if available.
+            Otherwise, returns None.
+        """
+        when = self.getTag(ip)
+        if isinstance(when, float):
+            return when
+
+    def isExitRelay(self, ip):
+        """Check if ``ip`` is a known Tor exit relay.
+
+        :type ip: basestring or int
+        :param ip: The IP address to check.
+        :rtype: boolean
+        :returns: True if ``ip`` is a known Tor exit relay; False otherwise.
+        """
+        if self.getTag(ip) == self._exitTag:
+            return True
+        return False
+
+    def replaceProxyList(self, proxies, tag=None):
+        """Clear everything and add all ``proxies``.
+
+        :type proxies: A tuple, list, dict, or set.
+        :param proxies: An iterable.  For each ``item`` in that iterable,
+            ``item`` must either:
+                1. be a string or int representing an IP address, or,
+                2. be another iterable, whose first item satisfies #1.
+        """
+        try:
+            self.clear()
+            self.addProxies(proxies, tag=tag)
+        except Exception as error:
+            logging.error(str(error))
+
+    _assigned = ('__name__', '__doc__')
+
+    @wraps(MutableSet._hash)
+    def __hash__(self):      return self._hash()
+    def __iter__(self):      return self._proxies.__iter__()
+    def __len__(self):       return len(self._proxydict.items())
+    def __repr__(self):      return type('')(self.proxies)
+    def __str__(self):       return os.linesep.join(self.proxies)
+    update_wrapper(__iter__, set.__iter__, _assigned)
+    update_wrapper(__len__,  len, _assigned)
+    update_wrapper(__repr__, repr, _assigned)
+    update_wrapper(__str__,  str, _assigned)
+
+    def add(self, ip, tag=None): return self.__add__(ip, tag)
+    def copy(self):              return self.__class__(self._proxydict.copy())
+    def contains(self, ip):      return self.__contains__(ip)
+    def discard(self, ip):       return self.__sub__(ip)
+    def remove(self, other):     return self.__sub__(other)
+    update_wrapper(add,          __add__)
+    update_wrapper(copy,         __init__)
+    update_wrapper(contains,     __contains__)
+    update_wrapper(discard,      __sub__)
+    update_wrapper(remove,       __sub__)
+
+    def difference(self, other):           return self._proxies.difference(other)
+    def issubset(self, other):             return self._proxies.issubset(other)
+    def issuperset(self, other):           return self._proxies.issuperset(other)
+    def intersection(self, other):         return self._proxies.intersection(other)
+    def symmetric_difference(self, other): return self._proxies.symmetric_difference(other)
+    def union(self, other):                return self._proxies.union(other)
+    update_wrapper(difference,             set.difference, _assigned)
+    update_wrapper(issubset,               set.issubset, _assigned)
+    update_wrapper(issuperset,             set.issuperset, _assigned)
+    update_wrapper(intersection,           set.intersection, _assigned)
+    update_wrapper(symmetric_difference,   set.symmetric_difference, _assigned)
+    update_wrapper(union,                  set.union, _assigned)
+
+
+class ExitListProtocol(protocol.ProcessProtocol):
+    """A :class:`~twisted.internet.protocol.Protocol` for ``get-exit-list``.
+
+    :attr boolean connected: True if our ``transport`` is connected.
+
+    :type transport: An implementer of
+        :interface:`twisted.internet.interface.IProcessTransport`.
+    :attr transport: If :func:`twisted.internet.reactor.spawnProcess` is
+        called with an instance of this class as it's ``protocol``, then
+        :func:`~twisted.internet.reactor.spawnProcess` will return this
+        ``transport``.
+    """
+
+    def __init__(self):
+        """Create a protocol for downloading a list of current Tor exit relays.
+
+        :type exitlist: :class:`ProxySet`
+        :ivar exitlist: A :class:`~collections.MutableSet` containing the IP
+            addresses of known Tor exit relays which can reach our public IP
+            address.
+        :ivar list data: A list containing a ``bytes`` object for each chuck
+            of data received from the ``transport``.
+        :ivar deferred: A deferred which will callback with the ``exitlist``
+            when the process has ended.
+
+        :param string script: The full pathname of the script to run.
+        """
+        self.data = []
+        self.script = find('get-tor-exits')
+        self.exitlist = ProxySet()
+        self.deferred = defer.Deferred()
+
+    def childConnectionLost(self, childFD):
+        """See :func:`t.i.protocol.ProcessProtocol.childConnectionLost`."""
+        protocol.ProcessProtocol.childConnectionLost(self, childFD)
+
+    def connectionMade(self):
+        """Called when a connection is made.
+
+        This may be considered the initializer of the protocol, because it is
+        called when the connection is completed.  For clients, this is called
+        once the connection to the server has been established; for servers,
+        this is called after an accept() call stops blocking and a socket has
+        been received.  If you need to send any greeting or initial message,
+        do it here.
+        """
+        logging.debug("ExitListProtocol: Connection made with remote server")
+        self.transport.closeStdin()
+
+    def errReceived(self, data):
+        """Some data was received from stderr."""
+        # The get-exit-list script uses twisted.python.log to log to stderr:
+        logging.debug(data)  # pragma: no cover
+
+    def outReceived(self, data):
+        """Some data was received from stdout."""
+        self.data.append(data)
+
+    def outConnectionLost(self):
+        """This will be called when stdout is closed."""
+        logging.debug("Finished downloading list of Tor exit relays.")
+        self.transport.loseConnection()
+        self.parseData()
+
+    def parseData(self):
+        """Parse all data received so far into our
+        :class:`<bridgedb.proxy.ProxySet> exitlist`.
+        """
+        unparseable = []
+
+        data = ''.join(self.data).split('\n')
+
+        for line in data:
+            line.strip()
+            if not line: continue
+            # If it reached an errorpage, then we grabbed raw HTML that starts
+            # with an HTML tag:
+            if line.startswith('<'): break
+            if line.startswith('#'): continue
+            ip = isIPAddress(line)
+            if ip:
+                logging.info("Discovered Tor exit relay: %s" % ip)
+                self.exitlist.add(ip)
+            else:
+                logging.debug("Got exitlist line that wasn't an IP: %s" % line)
+                unparseable.append(line)
+
+        if unparseable:
+            logging.warn(("There were unparseable lines in the downloaded "
+                          "list of Tor exit relays: %r") % unparseable)
+
+    def processEnded(self, reason):
+        """Called when the child process exits and all file descriptors
+        associated with it have been closed.
+
+        :type reason: :class:`twisted.python.failure.Failure`
+        """
+        self.transport.loseConnection()
+        if reason.value.exitCode != 0:  # pragma: no cover
+            logging.debug(reason.getTraceback())
+            logging.error("There was an error downloading Tor exit list: %s"
+                          % reason.value)
+        else:
+            logging.info("Finished processing list of Tor exit relays.")
+        logging.debug("Transferring exit list to storage...")
+        # Avoid triggering the deferred twice, e.g. on processExited():
+        if not self.deferred.called:
+            self.deferred.callback(list(self.exitlist.proxies))
+
+    def processExited(self, reason):
+        """This will be called when the subprocess exits.
+
+        :type reason: :class:`twisted.python.failure.Failure`
+        """
+        logging.debug("%s exited with status code %d"
+                      % (self.script, reason.value.exitCode))
diff --git a/lib/bridgedb/test/test_proxy.py b/lib/bridgedb/test/test_proxy.py
new file mode 100644
index 0000000..78e24ff
--- /dev/null
+++ b/lib/bridgedb/test/test_proxy.py
@@ -0,0 +1,560 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis at torproject.org>
+#           please also see AUTHORS file
+# :copyright: (c) 2013-2015 Isis Lovecruft
+#             (c) 2007-2015, The Tor Project, Inc.
+#             (c) 2007-2015, all entities within the AUTHORS file
+# :license: 3-clause BSD, see included LICENSE for information
+
+"""Tests for :mod:`bridgedb.proxy`."""
+
+import sure
+
+from twisted.internet import defer
+from twisted.trial import unittest
+
+from bridgedb import proxy
+
+
+EXIT_LIST_0 = """\
+11.11.11.11
+22.22.22.22
+123.45.67.89"""
+
+EXIT_LIST_1 = """\
+33.33.33.33
+44.44.44.44
+55.55.55.55
+66.66.66.66
+77.77.77.77"""
+
+EXIT_LIST_BAD = """\
+foo
+bar
+baz"""
+
+
+class MockExitListProtocol(proxy.ExitListProtocol):
+    """A mocked version of :class:`~bridgedb.proxy.ExitListProtocol`."""
+
+    def __init__(self):
+        proxy.ExitListProtocol.__init__(self)
+        self._data = EXIT_LIST_0
+        self.script = '/bin/echo'
+        print()
+
+    def _log(self, msg):
+        print("%s: %s" % (self.__class__.__name__, msg))
+
+    def childConnectionLost(self, childFD):
+        self._log("childConnectionLost() called with childFD=%s" % childFD)
+        proxy.ExitListProtocol.childConnectionLost(self, childFD)
+
+    def connectionMade(self):
+        self._log("connectionMade() called")
+        proxy.ExitListProtocol.connectionMade(self)
+
+    def errReceived(self, data):
+        self._log("errReceived() called with %s" % data)
+        proxy.ExitListProtocol.errReceived(self, data)
+
+    def outReceivedData(self, data):
+        self._log("outReceivedData() called with %s" % data)
+        proxy.ExitListProtocol.outReceivedData(self, data)
+
+    def outConnectionLost(self):
+        self._log("outConnectionLost() called")
+        proxy.ExitListProtocol.outConnectionLost(self)
+
+    def parseData(self):
+        data = self._data.split('\n')
+        for line in data:
+            line = line.strip()
+            if not line: continue
+            if line.startswith('<'): break
+            if line.startswith('#'): continue
+            ip = proxy.isIPAddress(line)
+            if ip:
+                self._log("adding IP %s to exitlist..." % ip)
+                self.exitlist.add(ip)
+
+    def processEnded(self, reason):
+        self._log("processEnded() called with %s" % reason)
+        proxy.ExitListProtocol.processEnded(self, reason)
+
+    def processExited(self, reason):
+        self._log("processExited() called with %s" % reason)
+        proxy.ExitListProtocol.processExited(self, reason)
+
+
+class ProxySetImplementationTest(unittest.TestCase):
+
+    def setUp(self):
+        # We have to put something in it, otherwise self.ps.should.be.ok won't
+        # think it's truthy:
+        self.ps = proxy.ProxySet(['1.1.1.1'])
+
+    def test_instantiation(self):
+        self.ps.should.be.ok
+        self.ps.should.have.property('__contains__').being.callable
+        self.ps.should.have.property('__hash__').being.callable
+        self.ps.should.have.property('__iter__').being.callable
+        self.ps.should.have.property('__len__').being.callable
+        self.ps.should.have.property('add').being.callable
+        self.ps.should.have.property('copy').being.callable
+        self.ps.should.have.property('contains').being.callable
+        self.ps.should.have.property('discard').being.callable
+        self.ps.should.have.property('remove').being.callable
+
+        self.ps.should.have.property('difference').being.callable
+        self.ps.should.have.property('issubset').being.callable
+        self.ps.should.have.property('issuperset').being.callable
+        self.ps.should.have.property('intersection').being.callable
+        self.ps.should.have.property('symmetric_difference').being.callable
+        self.ps.should.have.property('union').being.callable
+
+    def test_attributes(self):
+        self.ps.should.have.property('proxies').being.a(list)
+        self.ps.should.have.property('exitRelays').being.a(set)
+        self.ps.should.have.property('_proxies').being.a(set)
+        self.ps.should.have.property('_proxydict').being.a(dict)
+
+
+class LoadProxiesFromFileIntegrationTests(unittest.TestCase):
+    """Unittests for :class:`~bridgedb.proxy.loadProxiesFromFile()`."""
+
+    def setUp(self):
+        self.fn0 = '%s-0' % self.__class__.__name__
+        self.fn1 = '%s-1' % self.__class__.__name__
+        self.badfile = '%s-badfile' % self.__class__.__name__
+        self.writeFiles()
+
+    def writeFiles(self):
+        with open(self.fn0, 'w') as fh:
+            fh.write(EXIT_LIST_0)
+            fh.flush()
+        with open(self.fn1, 'w') as fh:
+            fh.write(EXIT_LIST_1)
+            fh.flush()
+        with open(self.badfile, 'w') as fh:
+            fh.write(EXIT_LIST_BAD)
+            fh.flush()
+
+    def emptyFile(self, filename):
+        """We have to do this is a separate method, otherwise Twisted doesn't
+        actually do it.
+        """
+        fh = open(filename, 'w')
+        fh.truncate()
+        fh.close()
+
+    def test_proxy_loadProxiesFromFile_1_file(self):
+        """Test loading proxies from one file."""
+        proxies = proxy.loadProxiesFromFile(self.fn0)
+        self.assertEqual(len(proxies), 3)
+
+    def test_proxy_loadProxiesFromFile_1_file_and_proxyset(self):
+        """Test loading proxies from one file."""
+        proxyList = proxy.ProxySet(['1.1.1.1'])
+        proxies = proxy.loadProxiesFromFile(self.fn0, proxySet=proxyList)
+        self.assertEqual(len(proxies), 3)
+        self.assertEqual(len(proxyList), 4)
+
+    def test_proxy_loadProxiesFromFile_2_files_and_proxyset(self):
+        """Test loading proxies from two files."""
+        proxyList = proxy.ProxySet(['1.1.1.1'])
+        proxy.loadProxiesFromFile(self.fn0, proxySet=proxyList)
+        proxies = proxy.loadProxiesFromFile(self.fn1, proxySet=proxyList)
+        self.assertEqual(len(proxies), 5)
+        self.assertEqual(len(proxyList), 9)
+
+    def test_proxy_loadProxiesFromFile_removeStale(self):
+        """Test loading proxies from two files and removing the stale ones."""
+        proxyList = proxy.ProxySet(['1.1.1.1'])
+        self.assertEqual(len(proxyList), 1)
+        proxies = proxy.loadProxiesFromFile(self.fn0, proxySet=proxyList)
+        self.assertEqual(len(proxies), 3)
+        self.assertEqual(len(proxyList), 4)
+        proxies = proxy.loadProxiesFromFile(self.fn1, proxySet=proxyList)
+        self.assertEqual(len(proxies), 5)
+        self.assertEqual(len(proxyList), 9)
+
+        self.emptyFile(self.fn0)
+        proxies = proxy.loadProxiesFromFile(self.fn0, proxySet=proxyList,
+                                            removeStale=True)
+        self.assertEqual(len(proxies), 0)
+        self.assertEqual(len(proxyList), 6)
+
+    def test_proxy_loadProxiesFromFile_duplicates(self):
+        """Loading proxies from the same file twice shouldn't store
+        duplicates.
+        """
+        proxyList = proxy.ProxySet(['1.1.1.1'])
+        proxy.loadProxiesFromFile(self.fn1, proxySet=proxyList)
+        self.assertEqual(len(proxyList), 6)
+        proxy.loadProxiesFromFile(self.fn1, proxySet=proxyList)
+        self.assertEqual(len(proxyList), 6)
+
+    def test_proxy_loadProxiesFromFile_bad_file(self):
+        """Loading proxies from a file with invalid IPs in it should do
+        nothing.
+        """
+        proxyList = proxy.ProxySet()
+        proxy.loadProxiesFromFile(self.badfile, proxySet=proxyList)
+        self.assertEqual(len(proxyList), 0)
+
+
+class DownloadTorExitsTests(unittest.TestCase):
+    """Tests for `~bridgedb.proxy.downloadTorExits()`."""
+
+    def test_proxy_downloadTorExits(self):
+        proxyList = proxy.ProxySet()
+        d = proxy.downloadTorExits(proxyList,
+                                   'OurIPWouldGoHere',
+                                   protocol=MockExitListProtocol)
+        self.assertIsInstance(d, defer.Deferred)
+
+
+class ProxySetUnittests(unittest.TestCase):
+    """Unittests for :class:`~bridgedb.proxy.ProxySet`."""
+
+    def setUp(self):
+        self.proxies = EXIT_LIST_1.split('\n')
+        self.moarProxies = EXIT_LIST_0.split('\n')
+
+        self.proxyList = proxy.ProxySet()
+        for p in self.proxies:
+            self.proxyList.add(p)
+
+    def test_ProxySet_init(self):
+        """When initialised (after setUp() has run), the ProxySet should
+        contain a number of proxies equal to the number we added in the setUp()
+        method.
+        """
+        self.assertEquals(len(self.proxyList), len(self.proxies))
+
+    def test_ProxySet_proxies_getter(self):
+        """ProxySet.proxies should list all proxies."""
+        self.assertItemsEqual(self.proxyList.proxies, set(self.proxies))
+
+    def test_ProxySet_proxies_setter(self):
+        """``ProxySet.proxies = ['foo']`` should raise an ``AttributeError``."""
+        self.assertRaises(AttributeError, self.proxyList.__setattr__, 'proxies', ['foo'])
+
+    def test_ProxySet_proxies_deleter(self):
+        """``del(ProxySet.proxies)`` should raise an AttributeError."""
+        self.assertRaises(AttributeError, self.proxyList.__delattr__, 'proxies')
+
+    def test_ProxySet_exitRelays_issubset_proxies(self):
+        """ProxySet.exitRelays should always be a subset of ProxySet.proxies."""
+        self.assertTrue(self.proxyList.exitRelays.issubset(self.proxyList.proxies))
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertTrue(self.proxyList.exitRelays.issubset(self.proxyList.proxies))
+
+    def test_ProxySet_exitRelays_getter(self):
+        """ProxySet.exitRelays should list all exit relays."""
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertItemsEqual(self.proxyList.exitRelays, set(self.moarProxies))
+
+    def test_ProxySet_exitRelays_setter(self):
+        """``ProxySet.exitRelays = ['foo']`` should raise an ``AttributeError``."""
+        self.assertRaises(AttributeError, self.proxyList.__setattr__, 'exitRelays', ['foo'])
+
+    def test_ProxySet_exitRelays_deleter(self):
+        """``del(ProxySet.exitRelays)`` should raise an AttributeError."""
+        self.assertRaises(AttributeError, self.proxyList.__delattr__, 'exitRelays')
+
+    def test_ProxySet_add_new(self):
+        """ProxySet.add() should add a new proxy."""
+        self.proxyList.add('110.110.110.110')
+        self.assertEquals(len(self.proxyList), len(self.proxies) + 1)
+        self.assertIn('110.110.110.110', self.proxyList)
+
+    def test_ProxySet_add_None(self):
+        """ProxySet.add() called with None should return False."""
+        self.assertFalse(self.proxyList.add(None))
+        self.assertEquals(len(self.proxyList), len(self.proxies))
+
+    def test_ProxySet_add_duplicate(self):
+        """ProxySet.add() shouldn't add the same proxy twice."""
+        self.proxyList.add(self.proxies[0])
+        self.assertEquals(len(self.proxyList), len(self.proxies))
+        self.assertIn(self.proxies[0], self.proxyList)
+
+    def test_ProxySet_addExitRelays(self):
+        """ProxySet.addExitRelays() should add the new proxies."""
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertIn(self.moarProxies[0], self.proxyList)
+
+    def test_ProxySet_radd_new(self):
+        """ProxySet.radd() should add a new proxy."""
+        self.proxyList.__radd__('110.110.110.110')
+        self.assertEquals(len(self.proxyList), len(self.proxies) + 1)
+        self.assertIn('110.110.110.110', self.proxyList)
+
+    def test_ProxySet_addExitRelays_tagged(self):
+        """ProxySet.addExitRelays() should add the new proxies, and they should
+        be tagged as being Tor exit relays.
+        """
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertTrue(self.proxyList.isExitRelay(self.moarProxies[0]))
+        self.assertEquals(self.proxyList.getTag(self.moarProxies[0]),
+                          self.proxyList._exitTag)
+
+    def test_ProxySet_addExitRelays_length(self):
+        """ProxySet.addExitRelays() should add the new proxies and then the
+        total number should be equal to the previous number of proxies plus the
+        new exit relays added.
+        """
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertEquals(len(self.proxyList), len(self.proxies) + len(self.moarProxies))
+
+    def test_ProxySet_addExitRelays_previous_proxies_kept(self):
+        """ProxySet.addExitRelays() should add the new proxies and keep ones that
+        were already in the ProxySet.
+        """
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertIn(self.proxies[0], self.proxyList)
+
+    def test_ProxySet_addExitRelays_previous_proxies_not_tagged(self):
+        """ProxySet.addExitRelays() should add the new proxies and tag them,
+        but any previous non-exit relays in the ProxySet shouldn't be tagged as
+        being Tor exit relays.
+        """
+        self.proxyList.addExitRelays(self.moarProxies)
+        self.assertFalse(self.proxyList.isExitRelay(self.proxies[0]))
+        self.assertNotEquals(self.proxyList.getTag(self.proxies[0]),
+                             self.proxyList._exitTag)
+
+    def test_ProxySet_addProxies_tuple_individual_tags(self):
+        """ProxySet.addProxies() should add the new proxies and tag them with
+        whatever tags we want.
+        """
+        tags = ['foo', 'bar', 'baz']
+        extraProxies = zip(self.moarProxies, tags)
+        self.proxyList.addProxies(extraProxies)
+        self.assertEquals(len(self.proxyList), len(self.proxies) + len(extraProxies))
+        self.assertIn(extraProxies[0][0], self.proxyList)
+        self.assertEquals(self.proxyList._proxydict[extraProxies[0][0]], extraProxies[0][1])
+        self.assertEquals(self.proxyList._proxydict[extraProxies[1][0]], extraProxies[1][1])
+        self.assertEquals(self.proxyList._proxydict[extraProxies[2][0]], extraProxies[2][1])
+
+    def test_ProxySet_addProxies_tuple_too_many_items(self):
+        """``ProxySet.addProxies()`` where the tuples have >2 items should
+        raise a ValueError.
+        """
+        extraProxies = zip(self.moarProxies,
+                           ['sometag' for _ in range(len(self.moarProxies))],
+                           ['othertag' for _ in range(len(self.moarProxies))])
+        self.assertRaises(ValueError, self.proxyList.addProxies, extraProxies)
+
+    def test_ProxySet_addProxies_list(self):
+        """``ProxySet.addProxies(..., tag='sometag')`` should add the new
+        proxies and tag them all with the same tag.
+        """
+        self.proxyList.addProxies(self.moarProxies, tag='sometag')
+        self.assertEquals(len(self.proxyList), len(self.proxies) + len(self.moarProxies))
+        self.assertIn(self.moarProxies[0], self.proxyList)
+        for p in self.moarProxies:
+            self.assertEquals(self.proxyList.getTag(p), 'sometag')
+        for p in self.proxies:
+            self.assertNotEqual(self.proxyList.getTag(p), 'sometag')
+
+    def test_ProxySet_addProxies_set(self):
+        """``ProxySet.addProxies(..., tag=None)`` should add the new
+        proxies and tag them all with timestamps.
+        """
+        self.proxyList.addProxies(set(self.moarProxies))
+        self.assertEquals(len(self.proxyList), len(self.proxies) + len(self.moarProxies))
+        self.assertIn(self.moarProxies[0], self.proxyList)
+        for p in self.moarProxies:
+            self.assertIsInstance(self.proxyList.getTag(p), float)
+        for p in self.proxies:
+            self.assertNotEqual(self.proxyList.getTag(p), 'sometag')
+
+    def test_ProxySet_addProxies_bad_type(self):
+        """``ProxySet.addProxies()`` called with something which is neither an
+        iterable, a basestring, or an int should raise a ValueError.
+        """
+        self.assertRaises(ValueError, self.proxyList.addProxies, object)
+
+    def test_ProxySet_addProxies_list_of_bad_types(self):
+        """``ProxySet.addProxies()`` called with something which is neither an
+        iterable, a basestring, or an int should raise a ValueError.
+        """
+        self.assertRaises(ValueError, self.proxyList.addProxies, [object, object, object])
+
+    def test_ProxySet_getTag(self):
+        """ProxySet.getTag() should get the tag for a proxy in the set."""
+        self.proxyList.add('1.1.1.1', 'bestproxyevar')
+        self.assertEquals(self.proxyList.getTag('1.1.1.1'), 'bestproxyevar')
+
+    def test_ProxySet_getTag_nonexistent(self):
+        """ProxySet.getTag() should get None for a proxy not in the set."""
+        self.assertIsNone(self.proxyList.getTag('1.1.1.1'))
+
+    def test_ProxySet_clear(self):
+        """ProxySet.clear() should clear the set of proxies."""
+        self.proxyList.clear()
+        self.assertEquals(len(self.proxyList), 0)
+        self.assertEquals(len(self.proxyList.proxies), 0)
+        self.assertEquals(len(self.proxyList._proxies), 0)
+        self.assertEquals(len(self.proxyList._proxydict.items()), 0)
+
+    def test_ProxySet_contains_list(self):
+        """Calling ``list() is in ProxySet()`` should return False."""
+        self.assertFalse(self.proxyList.contains(list(self.proxies[0],)))
+
+    def test_ProxySet_contains_nonexistent(self):
+        """``ProxySet().contains()`` with a proxy not in the set should
+        return False.
+        """
+        self.assertFalse(self.proxyList.contains(self.moarProxies[0]))
+
+    def test_ProxySet_contains_nonexistent(self):
+        """``ProxySet().contains()`` with a proxy in the set should
+        return True.
+        """
+        self.assertTrue(self.proxyList.contains(self.proxies[0]))
+
+    def test_ProxySet_copy(self):
+        """ProxySet.copy() should create an exact copy."""
+        newProxyList = self.proxyList.copy()
+        self.assertEquals(newProxyList, self.proxyList)
+
+    def test_ProxySet_difference(self):
+        """ProxySet.difference() should list the items in ProxySetA which
+        aren't in ProxySetB.
+        """
+        proxySetA = self.proxyList
+        proxySetB = proxy.ProxySet(self.moarProxies)
+        self.assertItemsEqual(proxySetA.difference(proxySetB),
+                              set(self.proxies))
+        self.assertItemsEqual(proxySetB.difference(proxySetA),
+                              set(self.moarProxies))
+
+    def test_ProxySet_firstSeen_returns_timestamp(self):
+        """ProxySet.firstSeen() should return a timestamp for a proxy with a
+        timestamp tag.
+        """
+        self.proxyList.add(self.moarProxies[0])
+        self.assertIsNotNone(self.proxyList.firstSeen(self.moarProxies[0]))
+
+    def test_ProxySet_firstSeen_returns_float(self):
+        """ProxySet.firstSeen() should return a timestamp for a proxy with a
+        timestamp tag.
+        """
+        self.proxyList.add(self.moarProxies[1])
+        self.assertIsInstance(self.proxyList.firstSeen(self.moarProxies[1]), float)
+
+    def test_ProxySet_firstSeen_other_tags(self):
+        """ProxySet.firstSeen() should return None when a proxy doesn't have a
+        timestamp.
+        """
+        self.proxyList.add(self.moarProxies[2], 'sometag')
+        self.assertIsNone(self.proxyList.firstSeen(self.moarProxies[2]))
+
+    def test_ProxySet_issubset(self):
+        """ProxySet.issubset() on a superset should return True."""
+        self.assertTrue(self.proxyList.issubset(set(self.proxies + self.moarProxies[:0])))
+
+    def test_ProxySet_issuperset(self):
+        """ProxySet.issubset() on a subset should return True."""
+        self.assertTrue(self.proxyList.issuperset(set(self.proxies[:1])))
+
+    def test_ProxySet_remove(self):
+        """ProxySet.remove() should subtract proxies which were already added
+        to the set.
+        """
+        self.proxyList.remove(self.proxies[0])
+        self.assertEquals(len(self.proxyList), len(self.proxies) - 1)
+        self.assertNotIn(self.proxies[0], self.proxyList)
+
+    def test_ProxySet_remove_nonexistent(self):
+        """ProxySet.remove() shouldn't subtract proxies which aren't already in
+        the set.
+        """
+        self.proxyList.remove('110.110.110.110')
+        self.assertEquals(len(self.proxyList), len(self.proxies))
+        self.assertNotIn('110.110.110.110', self.proxyList)
+
+    def test_ProxySet_replaceProxyList(self):
+        """ProxySet.replaceProxyList should remove all the current proxies and
+        add all the new ones.
+        """
+        self.proxyList.replaceProxyList(self.moarProxies, 'seven proxies')
+        for p in self.moarProxies:
+            self.assertIn(p, self.proxyList)
+            self.assertEqual(self.proxyList.getTag(p), 'seven proxies')
+        for p in self.proxies:
+            self.assertNotIn(p, self.proxyList)
+
+    def test_ProxySet_replaceProxyList_bad_type(self):
+        """ProxySet.replaceProxyList should remove all the current proxies and
+        then since we're giving it a bad type it should do nothing else.
+        """
+        self.proxyList.replaceProxyList([object, object, object])
+        self.assertEqual(len(self.proxyList), 0)
+
+    def test_ProxySet_hash(self):
+        """Two equal ProxySets should return the same hash."""
+        proxyListA = proxy.ProxySet(self.proxies)
+        proxyListB = proxy.ProxySet(self.proxies)
+        self.assertEqual(proxyListA, proxyListB)
+        self.assertItemsEqual(proxyListA, proxyListB)
+        self.assertEqual(hash(proxyListA), hash(proxyListB))
+
+
+class ExitListProtocolTests(unittest.TestCase):
+    """Unittests for :class:`~bridgedb.proxy.ExitListProtocol`."""
+
+    def setUp(self):
+        self.proto = proxy.ExitListProtocol()
+
+    def test_ExitListProtocol_parseData_error_page(self):
+        """ """
+        self.proto.data = """\
+<!doctype html>
+<html lang="en">
+<body>
+  <div class="content">
+  <img src="/torcheck/img/tor-on.png" class="onion" />
+  <h4>Welcome to the Tor Bulk Exit List exporting tool.</h4>
+  </div>
+</body>
+</html>"""
+        self.proto.parseData()
+        self.assertEqual(len(self.proto.exitlist), 0)
+
+    def test_ExitListProtocol_parseData_page_with_3_ips_with_comments(self):
+        """ """
+        self.proto.data = """\
+# This is a list of all Tor exit nodes from the past 16 hours that can contact 1.1.1.1 on port 443 #
+# You can update this list by visiting https://check.torproject.org/cgi-bin/TorBulkExitList.py?ip=1.1.1.1&port=443 #
+# This file was generated on Fri Feb  6 02:04:27 UTC 2015 #
+101.99.64.150
+103.10.197.50
+103.240.91.7"""
+        self.proto.parseData()
+        self.assertEqual(len(self.proto.exitlist), 3)
+
+    def test_ExitListProtocol_parseData_page_with_3_ips(self):
+        """ """
+        self.proto.data = """
+101.99.64.150
+103.10.197.50
+103.240.91.7"""
+        self.proto.parseData()
+        self.assertEqual(len(self.proto.exitlist), 3)
+
+    def test_ExitListProtocol_parseData_page_with_bad_ip(self):
+        """ """
+        self.proto.data = """
+192.168.0.1
+127.0.0.1
+103.240.91.7"""
+        self.proto.parseData()
+        self.assertEqual(len(self.proto.exitlist), 1)





More information about the tor-commits mailing list