commit 8682869d638393364f69c075db06b3814b2a1481
Author: Arturo Filastò <art(a)fuffa.org>
Date: Mon Sep 16 12:13:53 2013 +0200
Finish adding support for using txsocksx in HTTP templat in HTTP templat in HTTP templat in HTTP template
* Make sure we are using the TrueHeaders and in SOCKSAgent and Agent
---
ooni/templates/httpt.py | 9 +-
ooni/utils/trueheaders.py | 159 +++++++++++++++++++++++++++++
ooni/utils/txagentwithsocks.py | 215 ----------------------------------------
3 files changed, 163 insertions(+), 220 deletions(-)
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index 79ac475..9a44c18 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -11,7 +11,7 @@ from twisted.internet.error import ConnectionRefusedError, DNSLookupError, TCPTi
from twisted.internet.endpoints import TCP4ClientEndpoint
from twisted.web._newclient import Request, Response, ResponseNeverReceived
from twisted.web.client import Agent
-from txsocksx.http import SOCKS5Agent
+from ooni.utils.trueheaders import TrueHeadersAgent, TrueHeadersSOCKS5Agent
from ooni.nettest import NetTestCase
from ooni.utils import log
@@ -64,7 +64,7 @@ class HTTPTest(NetTestCase):
log.err("Warning! pyOpenSSL is not installed. https websites will "
"not work")
- self.control_agent = SOCKS5Agent(reactor,
+ self.control_agent = TrueHeadersSOCKS5Agent(reactor,
proxyEndpoint=TCP4ClientEndpoint(reactor, '127.0.0.1',
config.tor.socks_port))
@@ -77,12 +77,11 @@ class HTTPTest(NetTestCase):
except ValueError:
raise InvalidSocksProxyOption
socksport = int(socksport)
- self.agent = SOCKS5Agent(reactor,
+ self.agent = TrueHeadersSOCKS5Agent(reactor,
proxyEndpoint=TCP4ClientEndpoint(reactor, sockshost,
socksport))
else:
- #XXX: pool?
- self.agent = Agent(reactor)
+ self.agent = TrueHeadersAgent(reactor)
self.report['agent'] = 'agent'
diff --git a/ooni/utils/trueheaders.py b/ooni/utils/trueheaders.py
new file mode 100644
index 0000000..06e5c02
--- /dev/null
+++ b/ooni/utils/trueheaders.py
@@ -0,0 +1,159 @@
+# :authors: Giovanni Pellerano
+# :licence: see LICENSE
+#
+# Here we make sure that the HTTP Headers sent and received are True. By this
+# we mean that they are not normalized and that the ordering is maintained.
+
+import struct
+import itertools
+from copy import copy
+
+from zope.interface import implements
+from twisted.web import client, _newclient, http_headers
+from twisted.web._newclient import Request, RequestNotSent, RequestGenerationFailed, TransportProxyProducer, STATUS
+from twisted.internet import protocol, reactor
+from twisted.internet.protocol import ClientFactory, Protocol
+from twisted.internet.endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
+from twisted.internet import interfaces, defer
+from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
+
+from txsocksx.http import SOCKS5Agent
+from txsocksx.client import SOCKS5ClientFactory
+SOCKS5ClientFactory.noisy = False
+
+from ooni.utils import log
+
+class TrueHeaders(http_headers.Headers):
+ def __init__(self, rawHeaders=None):
+ self._rawHeaders = dict()
+ if rawHeaders is not None:
+ for name, values in rawHeaders.iteritems():
+ if type(values) is list:
+ self.setRawHeaders(name, values[:])
+ elif type(values) is dict:
+ self._rawHeaders[name.lower()] = values
+
+ def setRawHeaders(self, name, values):
+ if name.lower() not in self._rawHeaders:
+ self._rawHeaders[name.lower()] = dict()
+ self._rawHeaders[name.lower()]['name'] = name
+ self._rawHeaders[name.lower()]['values'] = values
+
+ def getDiff(self, headers, ignore=[]):
+ """
+
+ Args:
+
+ headers: a TrueHeaders object
+
+ ignore: specify a list of header fields to ignore
+
+ Returns:
+
+ a set containing the header names that are not present in
+ header_dict or not present in self.
+ """
+ diff = set()
+ field_names = []
+
+ headers_a = copy(self)
+ headers_b = copy(headers)
+ for name in ignore:
+ try:
+ del headers_a._rawHeaders[name.lower()]
+ except KeyError:
+ pass
+ try:
+ del headers_b._rawHeaders[name.lower()]
+ except KeyError:
+ pass
+
+ for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
+ headers_b.getAllRawHeaders()):
+ field_names.append(k)
+
+ for name in field_names:
+ if self.getRawHeaders(name) and headers.getRawHeaders(name):
+ pass
+ else:
+ diff.add(name)
+ return diff
+
+ def getAllRawHeaders(self):
+ for k, v in self._rawHeaders.iteritems():
+ yield v['name'], v['values']
+
+ def getRawHeaders(self, name, default=None):
+ if name.lower() in self._rawHeaders:
+ return self._rawHeaders[name.lower()]['values']
+ return default
+
+class HTTPClientParser(_newclient.HTTPClientParser):
+ def logPrefix(self):
+ return 'HTTPClientParser'
+
+ def connectionMade(self):
+ self.headers = TrueHeaders()
+ self.connHeaders = TrueHeaders()
+ self.state = STATUS
+ self._partialHeader = None
+
+ def headerReceived(self, name, value):
+ if self.isConnectionControlHeader(name):
+ headers = self.connHeaders
+ else:
+ headers = self.headers
+ headers.addRawHeader(name, value)
+
+class HTTP11ClientProtocol(_newclient.HTTP11ClientProtocol):
+ def request(self, request):
+ if self._state != 'QUIESCENT':
+ return fail(RequestNotSent())
+
+ self._state = 'TRANSMITTING'
+ _requestDeferred = maybeDeferred(request.writeTo, self.transport)
+ self._finishedRequest = Deferred()
+
+ self._currentRequest = request
+
+ self._transportProxy = TransportProxyProducer(self.transport)
+ self._parser = HTTPClientParser(request, self._finishResponse)
+ self._parser.makeConnection(self._transportProxy)
+ self._responseDeferred = self._parser._responseDeferred
+
+ def cbRequestWrotten(ignored):
+ if self._state == 'TRANSMITTING':
+ self._state = 'WAITING'
+ self._responseDeferred.chainDeferred(self._finishedRequest)
+
+ def ebRequestWriting(err):
+ if self._state == 'TRANSMITTING':
+ self._state = 'GENERATION_FAILED'
+ self.transport.loseConnection()
+ self._finishedRequest.errback(
+ Failure(RequestGenerationFailed([err])))
+ else:
+ log.err(err, 'Error writing request, but not in valid state '
+ 'to finalize request: %s' % self._state)
+
+ _requestDeferred.addCallbacks(cbRequestWrotten, ebRequestWriting)
+
+ return self._finishedRequest
+
+class _HTTP11ClientFactory(client._HTTP11ClientFactory):
+ noisy = False
+ def buildProtocol(self, addr):
+ return HTTP11ClientProtocol(self._quiescentCallback)
+
+class HTTPConnectionPool(client.HTTPConnectionPool):
+ _factory = _HTTP11ClientFactory
+
+class TrueHeadersAgent(client.Agent):
+ def __init__(self, *args, **kw):
+ super(TrueHeadersAgent, self).__init__(*args, **kw)
+ self._pool = HTTPConnectionPool(reactor, False)
+
+class TrueHeadersSOCKS5Agent(SOCKS5Agent):
+ def __init__(self, *args, **kw):
+ super(TrueHeadersSOCKS5Agent, self).__init__(*args, **kw)
+ self._pool = HTTPConnectionPool(reactor, False)
diff --git a/ooni/utils/txagentwithsocks.py b/ooni/utils/txagentwithsocks.py
deleted file mode 100644
index a6fa276..0000000
--- a/ooni/utils/txagentwithsocks.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# -*- encoding: utf-8 -*-
-#
-# :authors: Giovanni Pellerano
-# :licence: see LICENSE
-
-import struct
-import itertools
-from copy import copy
-
-from zope.interface import implements
-from twisted.web import client, _newclient, http_headers
-from twisted.web._newclient import Request, RequestNotSent, RequestGenerationFailed, TransportProxyProducer, STATUS
-from twisted.internet import protocol
-from twisted.internet.protocol import ClientFactory, Protocol
-from twisted.internet.endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
-from twisted.internet import interfaces, defer
-from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
-
-from txsocksx.client import SOCKS5ClientEndpoint
-from txsocksx.client import SOCKS5ClientFactory
-SOCKS5ClientFactory.noisy = False
-
-from ooni.utils import log
-
-class TrueHeaders(http_headers.Headers):
- def __init__(self, rawHeaders=None):
- self._rawHeaders = dict()
- if rawHeaders is not None:
- for name, values in rawHeaders.iteritems():
- if type(values) is list:
- self.setRawHeaders(name, values[:])
- elif type(values) is dict:
- self._rawHeaders[name.lower()] = values
-
- def setRawHeaders(self, name, values):
- if name.lower() not in self._rawHeaders:
- self._rawHeaders[name.lower()] = dict()
- self._rawHeaders[name.lower()]['name'] = name
- self._rawHeaders[name.lower()]['values'] = values
-
- def getDiff(self, headers, ignore=[]):
- """
-
- Args:
-
- headers: a TrueHeaders object
-
- ignore: specify a list of header fields to ignore
-
- Returns:
-
- a set containing the header names that are not present in
- header_dict or not present in self.
- """
- diff = set()
- field_names = []
-
- headers_a = copy(self)
- headers_b = copy(headers)
- for name in ignore:
- try:
- del headers_a._rawHeaders[name.lower()]
- except KeyError:
- pass
- try:
- del headers_b._rawHeaders[name.lower()]
- except KeyError:
- pass
-
- for k, v in itertools.chain(headers_a.getAllRawHeaders(), \
- headers_b.getAllRawHeaders()):
- field_names.append(k)
-
- for name in field_names:
- if self.getRawHeaders(name) and headers.getRawHeaders(name):
- pass
- else:
- diff.add(name)
- return diff
-
- def getAllRawHeaders(self):
- for k, v in self._rawHeaders.iteritems():
- yield v['name'], v['values']
-
- def getRawHeaders(self, name, default=None):
- if name.lower() in self._rawHeaders:
- return self._rawHeaders[name.lower()]['values']
- return default
-
-class HTTPClientParser(_newclient.HTTPClientParser):
- def logPrefix(self):
- return 'HTTPClientParser'
-
- def connectionMade(self):
- self.headers = TrueHeaders()
- self.connHeaders = TrueHeaders()
- self.state = STATUS
- self._partialHeader = None
-
- def headerReceived(self, name, value):
- if self.isConnectionControlHeader(name):
- headers = self.connHeaders
- else:
- headers = self.headers
- headers.addRawHeader(name, value)
-
-class HTTP11ClientProtocol(_newclient.HTTP11ClientProtocol):
- def request(self, request):
- if self._state != 'QUIESCENT':
- return fail(RequestNotSent())
-
- self._state = 'TRANSMITTING'
- _requestDeferred = maybeDeferred(request.writeTo, self.transport)
- self._finishedRequest = Deferred()
-
- self._currentRequest = request
-
- self._transportProxy = TransportProxyProducer(self.transport)
- self._parser = HTTPClientParser(request, self._finishResponse)
- self._parser.makeConnection(self._transportProxy)
- self._responseDeferred = self._parser._responseDeferred
-
- def cbRequestWrotten(ignored):
- if self._state == 'TRANSMITTING':
- self._state = 'WAITING'
- self._responseDeferred.chainDeferred(self._finishedRequest)
-
- def ebRequestWriting(err):
- if self._state == 'TRANSMITTING':
- self._state = 'GENERATION_FAILED'
- self.transport.loseConnection()
- self._finishedRequest.errback(
- Failure(RequestGenerationFailed([err])))
- else:
- log.err(err, 'Error writing request, but not in valid state '
- 'to finalize request: %s' % self._state)
-
- _requestDeferred.addCallbacks(cbRequestWrotten, ebRequestWriting)
-
- return self._finishedRequest
-
-class _HTTP11ClientFactory(client._HTTP11ClientFactory):
- noisy = False
- def buildProtocol(self, addr):
- return HTTP11ClientProtocol(self._quiescentCallback)
-
-try:
- class HTTPConnectionPool(client.HTTPConnectionPool):
- _factory = _HTTP11ClientFactory
-except AttributeError:
- log.err("Your version of Twisted is outdated and we will not support HTTPConnectionPool")
- HTTPConnectionPool = None
-
-class UnsupportedTwistedVersion(Exception):
- pass
-
-class Agent(client.Agent):
- def __init__(self, reactor,
- contextFactory=client.WebClientContextFactory(),
- connectTimeout=None, bindAddress=None,
- pool=None, sockshost=None, socksport=None):
- if pool is None and HTTPConnectionPool:
- pool = HTTPConnectionPool(reactor, False)
- self._reactor = reactor
- self._pool = pool
- self._contextFactory = contextFactory
- self._connectTimeout = connectTimeout
- self._bindAddress = bindAddress
- self._sockshost = sockshost
- self._socksport = socksport
-
- def logPrefix(self):
- return 'SOCKSAgent'
-
- def request(self, method, uri, headers=None, bodyProducer=None):
- if (uri.startswith('shttp') or uri.startswith('httpo')) and not HTTPConnectionPool:
- log.err("Requests over SOCKS are supported only with versions of Twisted >= 12.1.0")
- raise UnsupportedTwistedVersion
- return client.Agent.request(self, method, uri, headers, bodyProducer)
-
- def _getEndpoint(self, scheme, host, port):
- kwargs = {}
- if self._connectTimeout is not None:
- kwargs['timeout'] = self._connectTimeout
- kwargs['bindAddress'] = self._bindAddress
- if scheme == 'http':
- return TCP4ClientEndpoint(self._reactor, host, port, **kwargs)
- elif scheme == 'shttp' or scheme == 'httpo':
- socksProxy = TCP4ClientEndpoint(self._reactor, self._sockshost,
- self._socksport)
- return SOCKS5ClientEndpoint(host, port, socksProxy)
- elif scheme == 'https':
- return SSL4ClientEndpoint(self._reactor, host, port,
- self._wrapContextFactory(host, port), **kwargs)
- else:
- raise SchemeNotSupported("Unsupported scheme: %r" % (scheme,))
-
- def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
- headers, bodyProducer, requestPath):
- if headers is None:
- headers = TrueHeaders()
- if not headers.hasHeader('host'):
- headers = headers.copy()
- headers.addRawHeader(
- 'host', self._computeHostValue(parsedURI.scheme,
- parsedURI.host, parsedURI.port))
-
- d = self._pool.getConnection(key, endpoint)
- def cbConnected(proto):
- return proto.request(
- Request(method, requestPath, headers, bodyProducer,
- persistent=self._pool.persistent))
- d.addCallback(cbConnected)
- return d
-