[tor-commits] [ooni-probe/master] Finish adding support for using txsocksx in HTTP templat in HTTP templat in HTTP templat in HTTP template

art at torproject.org art at torproject.org
Wed Nov 6 09:12:44 UTC 2013


commit 8682869d638393364f69c075db06b3814b2a1481
Author: Arturo Filastò <art at fuffa.org>
Date:   Mon Sep 16 12:13:53 2013 +0200

    Finish adding support for using txsocksx in HTTP templat in HTTP templat in HTTP templat in HTTP template
    
    * Make sure we are using the TrueHeaders and in SOCKSAgent and Agent
---
 ooni/templates/httpt.py        |    9 +-
 ooni/utils/trueheaders.py      |  159 +++++++++++++++++++++++++++++
 ooni/utils/txagentwithsocks.py |  215 ----------------------------------------
 3 files changed, 163 insertions(+), 220 deletions(-)

diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index 79ac475..9a44c18 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -11,7 +11,7 @@ from twisted.internet.error import ConnectionRefusedError, DNSLookupError, TCPTi
 from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.web._newclient import Request, Response, ResponseNeverReceived
 from twisted.web.client import Agent
-from txsocksx.http import SOCKS5Agent
+from ooni.utils.trueheaders import TrueHeadersAgent, TrueHeadersSOCKS5Agent
 
 from ooni.nettest import NetTestCase
 from ooni.utils import log
@@ -64,7 +64,7 @@ class HTTPTest(NetTestCase):
             log.err("Warning! pyOpenSSL is not installed. https websites will "
                      "not work")
 
-        self.control_agent = SOCKS5Agent(reactor,
+        self.control_agent = TrueHeadersSOCKS5Agent(reactor,
                 proxyEndpoint=TCP4ClientEndpoint(reactor, '127.0.0.1',
                     config.tor.socks_port))
 
@@ -77,12 +77,11 @@ class HTTPTest(NetTestCase):
             except ValueError:
                 raise InvalidSocksProxyOption
             socksport = int(socksport)
-            self.agent = SOCKS5Agent(reactor,
+            self.agent = TrueHeadersSOCKS5Agent(reactor,
                 proxyEndpoint=TCP4ClientEndpoint(reactor, sockshost,
                     socksport))
         else:
-            #XXX: pool?
-            self.agent = Agent(reactor)
+            self.agent = TrueHeadersAgent(reactor)
 
         self.report['agent'] = 'agent'
 
diff --git a/ooni/utils/trueheaders.py b/ooni/utils/trueheaders.py
new file mode 100644
index 0000000..06e5c02
--- /dev/null
+++ b/ooni/utils/trueheaders.py
@@ -0,0 +1,159 @@
+# :authors: Giovanni Pellerano
+# :licence: see LICENSE
+#
+# Here we make sure that the HTTP Headers sent and received are True. By this
+# we mean that they are not normalized and that the ordering is maintained.
+
+import struct
+import itertools
+from copy import copy
+
+from zope.interface import implements
+from twisted.web import client, _newclient, http_headers
+from twisted.web._newclient import Request, RequestNotSent, RequestGenerationFailed, TransportProxyProducer, STATUS
+from twisted.internet import protocol, reactor
+from twisted.internet.protocol import ClientFactory, Protocol
+from twisted.internet.endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
+from twisted.internet import interfaces, defer
+from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
+
+from txsocksx.http import SOCKS5Agent
+from txsocksx.client import SOCKS5ClientFactory
+SOCKS5ClientFactory.noisy = False
+
+from ooni.utils import log
+
+class TrueHeaders(http_headers.Headers):
+    def __init__(self, rawHeaders=None):
+        self._rawHeaders = dict()
+        if rawHeaders is not None:
+            for name, values in rawHeaders.iteritems():
+                if type(values) is list:
+                  self.setRawHeaders(name, values[:])
+                elif type(values) is dict:
+                  self._rawHeaders[name.lower()] = values
+
+    def setRawHeaders(self, name, values):
+        if name.lower() not in self._rawHeaders:
+          self._rawHeaders[name.lower()] = dict()
+        self._rawHeaders[name.lower()]['name'] = name
+        self._rawHeaders[name.lower()]['values'] = values
+
+    def getDiff(self, headers, ignore=[]):
+        """
+
+        Args:
+
+            headers: a TrueHeaders object
+
+            ignore: specify a list of header fields to ignore
+
+        Returns:
+
+            a set containing the header names that are not present in
+            header_dict or not present in self.
+        """
+        diff = set()
+        field_names = []
+
+        headers_a = copy(self)
+        headers_b = copy(headers)
+        for name in ignore:
+            try:
+                del headers_a._rawHeaders[name.lower()]
+            except KeyError:
+                pass
+            try:
+                del headers_b._rawHeaders[name.lower()]
+            except KeyError:
+                pass
+
+        for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
+                headers_b.getAllRawHeaders()):
+            field_names.append(k)
+
+        for name in field_names:
+            if self.getRawHeaders(name) and headers.getRawHeaders(name):
+                pass
+            else:
+                diff.add(name)
+        return diff
+
+    def getAllRawHeaders(self):
+        for k, v in self._rawHeaders.iteritems():
+            yield v['name'], v['values']
+
+    def getRawHeaders(self, name, default=None):
+        if name.lower() in self._rawHeaders:
+            return self._rawHeaders[name.lower()]['values']
+        return default
+
+class HTTPClientParser(_newclient.HTTPClientParser):
+    def logPrefix(self):
+        return 'HTTPClientParser'
+
+    def connectionMade(self):
+        self.headers = TrueHeaders()
+        self.connHeaders = TrueHeaders()
+        self.state = STATUS
+        self._partialHeader = None
+
+    def headerReceived(self, name, value):
+        if self.isConnectionControlHeader(name):
+            headers = self.connHeaders
+        else:
+            headers = self.headers
+        headers.addRawHeader(name, value)
+
+class HTTP11ClientProtocol(_newclient.HTTP11ClientProtocol):
+    def request(self, request):
+        if self._state != 'QUIESCENT':
+            return fail(RequestNotSent())
+
+        self._state = 'TRANSMITTING'
+        _requestDeferred = maybeDeferred(request.writeTo, self.transport)
+        self._finishedRequest = Deferred()
+
+        self._currentRequest = request
+
+        self._transportProxy = TransportProxyProducer(self.transport)
+        self._parser = HTTPClientParser(request, self._finishResponse)
+        self._parser.makeConnection(self._transportProxy)
+        self._responseDeferred = self._parser._responseDeferred
+
+        def cbRequestWrotten(ignored):
+            if self._state == 'TRANSMITTING':
+                self._state = 'WAITING'
+                self._responseDeferred.chainDeferred(self._finishedRequest)
+
+        def ebRequestWriting(err):
+            if self._state == 'TRANSMITTING':
+                self._state = 'GENERATION_FAILED'
+                self.transport.loseConnection()
+                self._finishedRequest.errback(
+                    Failure(RequestGenerationFailed([err])))
+            else:
+                log.err(err, 'Error writing request, but not in valid state '
+                             'to finalize request: %s' % self._state)
+
+        _requestDeferred.addCallbacks(cbRequestWrotten, ebRequestWriting)
+
+        return self._finishedRequest
+
+class _HTTP11ClientFactory(client._HTTP11ClientFactory):
+    noisy = False
+    def buildProtocol(self, addr):
+        return HTTP11ClientProtocol(self._quiescentCallback)
+
+class HTTPConnectionPool(client.HTTPConnectionPool):
+    _factory = _HTTP11ClientFactory
+
+class TrueHeadersAgent(client.Agent):
+    def __init__(self, *args, **kw):
+        super(TrueHeadersAgent, self).__init__(*args, **kw)
+        self._pool = HTTPConnectionPool(reactor, False)
+
+class TrueHeadersSOCKS5Agent(SOCKS5Agent):
+    def __init__(self, *args, **kw):
+        super(TrueHeadersSOCKS5Agent, self).__init__(*args, **kw)
+        self._pool = HTTPConnectionPool(reactor, False)
diff --git a/ooni/utils/txagentwithsocks.py b/ooni/utils/txagentwithsocks.py
deleted file mode 100644
index a6fa276..0000000
--- a/ooni/utils/txagentwithsocks.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# -*- encoding: utf-8 -*-
-#
-# :authors: Giovanni Pellerano
-# :licence: see LICENSE
-
-import struct
-import itertools
-from copy import copy
-
-from zope.interface import implements
-from twisted.web import client, _newclient, http_headers
-from twisted.web._newclient import Request, RequestNotSent, RequestGenerationFailed, TransportProxyProducer, STATUS
-from twisted.internet import protocol
-from twisted.internet.protocol import ClientFactory, Protocol
-from twisted.internet.endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
-from twisted.internet import interfaces, defer
-from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
-
-from txsocksx.client import SOCKS5ClientEndpoint
-from txsocksx.client import SOCKS5ClientFactory
-SOCKS5ClientFactory.noisy = False
-
-from ooni.utils import log
-
-class TrueHeaders(http_headers.Headers):
-    def __init__(self, rawHeaders=None):
-        self._rawHeaders = dict()
-        if rawHeaders is not None:
-            for name, values in rawHeaders.iteritems():
-                if type(values) is list:
-                  self.setRawHeaders(name, values[:])
-                elif type(values) is dict:
-                  self._rawHeaders[name.lower()] = values
-
-    def setRawHeaders(self, name, values):
-        if name.lower() not in self._rawHeaders:
-          self._rawHeaders[name.lower()] = dict()
-        self._rawHeaders[name.lower()]['name'] = name
-        self._rawHeaders[name.lower()]['values'] = values
-
-    def getDiff(self, headers, ignore=[]):
-        """
-
-        Args:
-
-            headers: a TrueHeaders object
-
-            ignore: specify a list of header fields to ignore
-
-        Returns:
-
-            a set containing the header names that are not present in
-            header_dict or not present in self.
-        """
-        diff = set()
-        field_names = []
-
-        headers_a = copy(self)
-        headers_b = copy(headers)
-        for name in ignore:
-            try:
-                del headers_a._rawHeaders[name.lower()]
-            except KeyError:
-                pass
-            try:
-                del headers_b._rawHeaders[name.lower()]
-            except KeyError:
-                pass
-
-        for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
-                headers_b.getAllRawHeaders()):
-            field_names.append(k)
-
-        for name in field_names:
-            if self.getRawHeaders(name) and headers.getRawHeaders(name):
-                pass
-            else:
-                diff.add(name)
-        return diff
-
-    def getAllRawHeaders(self):
-        for k, v in self._rawHeaders.iteritems():
-            yield v['name'], v['values']
-
-    def getRawHeaders(self, name, default=None):
-        if name.lower() in self._rawHeaders:
-            return self._rawHeaders[name.lower()]['values']
-        return default
-
-class HTTPClientParser(_newclient.HTTPClientParser):
-    def logPrefix(self):
-        return 'HTTPClientParser'
-
-    def connectionMade(self):
-        self.headers = TrueHeaders()
-        self.connHeaders = TrueHeaders()
-        self.state = STATUS
-        self._partialHeader = None
-
-    def headerReceived(self, name, value):
-        if self.isConnectionControlHeader(name):
-            headers = self.connHeaders
-        else:
-            headers = self.headers
-        headers.addRawHeader(name, value)
-
-class HTTP11ClientProtocol(_newclient.HTTP11ClientProtocol):
-    def request(self, request):
-        if self._state != 'QUIESCENT':
-            return fail(RequestNotSent())
-
-        self._state = 'TRANSMITTING'
-        _requestDeferred = maybeDeferred(request.writeTo, self.transport)
-        self._finishedRequest = Deferred()
-
-        self._currentRequest = request
-
-        self._transportProxy = TransportProxyProducer(self.transport)
-        self._parser = HTTPClientParser(request, self._finishResponse)
-        self._parser.makeConnection(self._transportProxy)
-        self._responseDeferred = self._parser._responseDeferred
-
-        def cbRequestWrotten(ignored):
-            if self._state == 'TRANSMITTING':
-                self._state = 'WAITING'
-                self._responseDeferred.chainDeferred(self._finishedRequest)
-
-        def ebRequestWriting(err):
-            if self._state == 'TRANSMITTING':
-                self._state = 'GENERATION_FAILED'
-                self.transport.loseConnection()
-                self._finishedRequest.errback(
-                    Failure(RequestGenerationFailed([err])))
-            else:
-                log.err(err, 'Error writing request, but not in valid state '
-                             'to finalize request: %s' % self._state)
-
-        _requestDeferred.addCallbacks(cbRequestWrotten, ebRequestWriting)
-
-        return self._finishedRequest
-
-class _HTTP11ClientFactory(client._HTTP11ClientFactory):
-    noisy = False
-    def buildProtocol(self, addr):
-        return HTTP11ClientProtocol(self._quiescentCallback)
-
-try:
-    class HTTPConnectionPool(client.HTTPConnectionPool):
-        _factory = _HTTP11ClientFactory
-except AttributeError:
-    log.err("Your version of Twisted is outdated and we will not support HTTPConnectionPool")
-    HTTPConnectionPool = None
-
-class UnsupportedTwistedVersion(Exception):
-    pass
-
-class Agent(client.Agent):
-    def __init__(self, reactor,
-                 contextFactory=client.WebClientContextFactory(),
-                 connectTimeout=None, bindAddress=None,
-                 pool=None, sockshost=None, socksport=None):
-        if pool is None and HTTPConnectionPool:
-            pool = HTTPConnectionPool(reactor, False)
-        self._reactor = reactor
-        self._pool = pool
-        self._contextFactory = contextFactory
-        self._connectTimeout = connectTimeout
-        self._bindAddress = bindAddress
-        self._sockshost = sockshost
-        self._socksport = socksport
-
-    def logPrefix(self):
-        return 'SOCKSAgent'
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        if (uri.startswith('shttp') or uri.startswith('httpo')) and not HTTPConnectionPool:
-            log.err("Requests over SOCKS are supported only with versions of Twisted >= 12.1.0")
-            raise UnsupportedTwistedVersion
-        return client.Agent.request(self, method, uri, headers, bodyProducer)
-
-    def _getEndpoint(self, scheme, host, port):
-        kwargs = {}
-        if self._connectTimeout is not None:
-            kwargs['timeout'] = self._connectTimeout
-        kwargs['bindAddress'] = self._bindAddress
-        if scheme == 'http':
-            return TCP4ClientEndpoint(self._reactor, host, port, **kwargs)
-        elif scheme == 'shttp' or scheme == 'httpo':
-            socksProxy = TCP4ClientEndpoint(self._reactor, self._sockshost,
-                    self._socksport)
-            return SOCKS5ClientEndpoint(host, port, socksProxy)
-        elif scheme == 'https':
-            return SSL4ClientEndpoint(self._reactor, host, port,
-                    self._wrapContextFactory(host, port), **kwargs)
-        else:
-            raise SchemeNotSupported("Unsupported scheme: %r" % (scheme,))
-
-    def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
-                             headers, bodyProducer, requestPath):
-        if headers is None:
-            headers = TrueHeaders()
-        if not headers.hasHeader('host'):
-            headers = headers.copy()
-            headers.addRawHeader(
-                'host', self._computeHostValue(parsedURI.scheme,
-                    parsedURI.host, parsedURI.port))
-
-        d = self._pool.getConnection(key, endpoint)
-        def cbConnected(proto):
-            return proto.request(
-                Request(method, requestPath, headers, bodyProducer,
-                        persistent=self._pool.persistent))
-        d.addCallback(cbConnected)
-        return d
-





More information about the tor-commits mailing list