commit 2e0da24fb07ac2b6b8669926b8c14161b5406733 Merge: 21ae4fb 3f9035f Author: Arturo Filastò art@fuffa.org Date: Thu Jun 6 17:49:40 2013 +0200
Merge branch 'master' into feature/ui
* master: Only process the input when we are dealing with URLs Remove really old Makefile which seems to no longer point to valid directories. Remove oonib report archival script. Remove line to ignore oonib/oonibackend.conf from .gitignore. Remove log factory for oonib from ooni/utils/log.py. Remove old bin/oonib script. Stop reactor when we can't start the sniffer. Strip superflous data from ASN number Fix import error for pushFilenameStack Add Dominic Hamon's nettest for running tests written with other interpreters. Parsley 1.1 is now in pypi so we don't have to mirror it. Remove unused geodata module that depended on pygeoip Explicit versions of scapy, parsley and dnspython Remove oonib from ooni-probe repo. Update requirements.txt to what twisted wants. Include by default the details of the probe Add extra commands to step 1 Add URL inputProcessor to tcpconnect Add inputProcessor to http_host Set pcap name for each NetTest, and use consistent timestamps.
Conflicts: ooni/config.py ooni/director.py ooni/nettest.py ooni/utils/geodata.py requirements.txt
.gitignore | 1 - Makefile | 202 ------------------------------- README.md | 2 + data/nettests/blocking/tcpconnect.py | 24 ++++ data/nettests/manipulation/http_host.py | 10 ++ data/ooniprobe.conf.sample | 8 +- nettests/experimental/script.py | 90 ++++++++++++++ ooni/director.py | 21 +++- ooni/geoip.py | 2 +- ooni/nettest.py | 4 +- ooni/otime.py | 2 + ooni/reporter.py | 2 +- ooni/utils/geodata.py | 40 ------ ooni/utils/log.py | 15 --- requirements.txt | 23 ++-- scripts/archive_reports.py | 174 -------------------------- 16 files changed, 162 insertions(+), 458 deletions(-)
diff --cc data/nettests/blocking/tcpconnect.py index 3b22427,0000000..5b432e0 mode 100644,000000..100644 --- a/data/nettests/blocking/tcpconnect.py +++ b/data/nettests/blocking/tcpconnect.py @@@ -1,45 -1,0 +1,69 @@@ +# -*- encoding: utf-8 -*- +from twisted.internet.protocol import Factory, Protocol +from twisted.internet.endpoints import TCP4ClientEndpoint + +from twisted.internet.error import ConnectionRefusedError +from twisted.internet.error import TCPTimedOutError, TimeoutError + +from ooni import nettest +from ooni.errors import handleAllFailures +from ooni.utils import log + +class TCPFactory(Factory): + def buildProtocol(self, addr): + return Protocol() + +class TCPConnectTest(nettest.NetTestCase): + name = "TCP Connect" + author = "Arturo Filastò" + version = "0.1" + inputFile = ['file', 'f', None, + 'File containing the IP:PORT combinations to be tested, one per line'] + + requiredOptions = ['file'] + def test_connect(self): + """ + This test performs a TCP connection to the remote host on the specified port. + the report will contains the string 'success' if the test has + succeeded, or the reason for the failure if it has failed. + """ + host, port = self.input.split(":") + def connectionSuccess(protocol): + protocol.transport.loseConnection() + log.debug("Got a connection to %s" % self.input) + self.report["connection"] = 'success' + + def connectionFailed(failure): + self.report['connection'] = handleAllFailures(failure) + + from twisted.internet import reactor + point = TCP4ClientEndpoint(reactor, host, int(port)) + d = point.connect(TCPFactory()) + d.addCallback(connectionSuccess) + d.addErrback(connectionFailed) + return d + ++ def inputProcessor(self, filename=None): ++ """ ++ This inputProcessor extracts name:port pairs from urls ++ XXX: Does not support unusual port numbers ++ """ ++ def strip_url(address): ++ proto, path = x.strip().split('://') ++ proto = proto.lower() ++ host = path.split('/')[0] ++ if proto == 'http': ++ return "%s:80" % host ++ if proto == 'https': ++ return "%s:443" % host ++ ++ if filename: ++ fp = open(filename) ++ for x in fp.readlines(): ++ if x.startswith("http"): ++ yield strip_url(x) ++ else: ++ yield x.strip() ++ fp.close() ++ else: ++ pass diff --cc data/nettests/manipulation/http_host.py index d95d836,0000000..3f1e0c6 mode 100644,000000..100644 --- a/data/nettests/manipulation/http_host.py +++ b/data/nettests/manipulation/http_host.py @@@ -1,141 -1,0 +1,151 @@@ +# -*- encoding: utf-8 -*- +# +# HTTP Host Test +# ************** +# +# :authors: Arturo Filastò +# :licence: see LICENSE + +import json +from twisted.python import usage + +from ooni.utils import randomStr, randomSTR + +from ooni.utils import log +from ooni.templates import httpt + +class UsageOptions(usage.Options): + optParameters = [['backend', 'b', 'http://127.0.0.1:57001', + 'URL of the test backend to use. Should be \ + listening on port 80 and be a \ + HTTPReturnJSONHeadersHelper'], + ['content', 'c', None, 'The file to read \ + from containing the content of a block page']] + +class HTTPHost(httpt.HTTPTest): + """ + This test is aimed at detecting the presence of a transparent HTTP proxy + and enumerating the sites that are being censored by it. + + It places inside of the Host header field the hostname of the site that is + to be tested for censorship and then determines if the probe is behind a + transparent HTTP proxy (because the response from the backend server does + not match) and if the site is censorsed, by checking if the page that it + got back matches the input block page. + """ + name = "HTTP Host" + author = "Arturo Filastò" + version = "0.2.3" + + randomizeUA = False + usageOptions = UsageOptions + + inputFile = ['file', 'f', None, + 'List of hostnames to test for censorship'] + + requiredOptions = ['backend'] + + def test_filtering_prepend_newline_to_method(self): + headers = {} + headers["Host"] = [self.input] + return self.doRequest(self.localOptions['backend'], method="\nGET", + headers=headers) + + def test_filtering_add_tab_to_host(self): + headers = {} + headers["Host"] = [self.input + '\t'] + return self.doRequest(self.localOptions['backend'], + headers=headers) + + def test_filtering_of_subdomain(self): + headers = {} + headers["Host"] = [randomStr(10) + '.' + self.input] + return self.doRequest(self.localOptions['backend'], + headers=headers) + + def test_filtering_via_fuzzy_matching(self): + headers = {} + headers["Host"] = [randomStr(10) + self.input + randomStr(10)] + return self.doRequest(self.localOptions['backend'], + headers=headers) + + def test_send_host_header(self): + """ + Stuffs the HTTP Host header field with the site to be tested for + censorship and does an HTTP request of this kind to our backend. + + We randomize the HTTP User Agent headers. + """ + headers = {} + headers["Host"] = [self.input] + return self.doRequest(self.localOptions['backend'], + headers=headers) + + def check_for_censorship(self, body): + """ + If we have specified what a censorship page looks like here we will + check if the page we are looking at matches it. + + XXX this is not tested, though it is basically what was used to detect + censorship in the palestine case. + """ + if self.localOptions['content']: + self.report['censored'] = True + censorship_page = open(self.localOptions['content']) + response_page = iter(body.split("\n")) + + for censorship_line in censorship_page.xreadlines(): + response_line = response_page.next() + if response_line != censorship_line: + self.report['censored'] = False + break + + censorship_page.close() + else: + self.report['censored'] = None + + def processResponseBody(self, body): + """ + XXX this is to be filled in with either a domclass based classified or + with a rule that will allow to detect that the body of the result is + that of a censored site. + """ + # If we don't see a json array we know that something is wrong for + # sure + if not body.startswith("{"): + log.msg("This does not appear to be JSON") + self.report['transparent_http_proxy'] = True + self.check_for_censorship(body) + return + try: + content = json.loads(body) + except: + log.msg("The json does not parse, this is not what we expected") + self.report['transparent_http_proxy'] = True + self.check_for_censorship(body) + return + + # We base the determination of the presence of a transparent HTTP + # proxy on the basis of the response containing the json that is to be + # returned by a HTTP Request Test Helper + if 'request_headers' in content and \ + 'request_line' in content and \ + 'headers_dict' in content: + log.msg("Found the keys I expected in %s" % content) + self.report['transparent_http_proxy'] = False + self.report['censored'] = False + else: + log.msg("Did not find the keys I expected in %s" % content) + self.report['transparent_http_proxy'] = True + self.check_for_censorship(body) + ++ def inputProcessor(self, filename=None): ++ """ ++ This inputProcessor extracts domain names from urls ++ """ ++ if filename: ++ fp = open(filename) ++ for x in fp.readlines(): ++ yield x.strip().split('//')[-1].split('/')[0] ++ fp.close() ++ else: pass diff --cc data/ooniprobe.conf.sample index a12b526,0000000..ca94c86 mode 100644,000000..100644 --- a/data/ooniprobe.conf.sample +++ b/data/ooniprobe.conf.sample @@@ -1,55 -1,0 +1,55 @@@ +# This is the configuration file for OONIProbe +# This file follows the YAML markup format: http://yaml.org/spec/1.2/spec.html +# Keep in mind that indentation matters. + +basic: + # Where OONIProbe should be writing it's log file + logfile: /var/log/ooniprobe.log +privacy: + # Should we include the IP address of the probe in the report? - includeip: false ++ includeip: true + # Should we include the ASN of the probe in the report? - includeasn: false ++ includeasn: true + # Should we include the country as reported by GeoIP in the report? - includecountry: false ++ includecountry: true + # Should we include the city as reported by GeoIP in the report? - includecity: false ++ includecity: true + # Should we collect a full packet capture on the client? + includepcap: false +reports: + # This is a packet capture file (.pcap) to load as a test: + pcap: null + collector: 'httpo://nkvphnp3p6agi5qq.onion' +advanced: + geoip_data_dir: /usr/share/ooni/ + debug: true + # enable if auto detection fails + #tor_binary: /usr/sbin/tor + # For auto detection + interface: auto + # Of specify a specific interface + #interface: wlan0 + # If you do not specify start_tor, you will have to have Tor running and + # explicitly set the control port and SOCKS port + start_tor: false + # After how many seconds we should give up on a particular measurement + measurement_timeout: 30 + # After how many retries we should give up on a measurement + measurement_retries: 2 + # How many measurments to perform concurrently + measurement_concurrency: 100 + # After how may seconds we should give up reporting + reporting_timeout: 30 + # After how many retries to give up on reporting + reporting_retries: 3 + # How many reports to perform concurrently + reporting_concurrency: 20 +tor: + socks_port: 9050 + #control_port: 9051 + # Specify the absolute path to the Tor bridges to use for testing + #bridges: bridges.list + # Specify path of the tor datadirectory. + # This should be set to something to avoid having Tor download each time + # the descriptors and consensus data. + #data_dir: ~/.tor/ diff --cc ooni/director.py index bb02201,809eae4..30a68f9 --- a/ooni/director.py +++ b/ooni/director.py @@@ -6,10 -6,10 +6,10 @@@ import r from ooni import geoip from ooni.managers import ReportEntryManager, MeasurementManager from ooni.reporter import Report - from ooni.utils import log, checkForRoot + from ooni.utils import log, checkForRoot, pushFilenameStack from ooni.utils.net import randomFreePort -from ooni.nettest import NetTest +from ooni.nettest import NetTest, getNetTestInformation +from ooni.settings import config from ooni import errors
from txtorcon import TorConfig @@@ -87,39 -86,10 +87,40 @@@ class Director(object) # This deferred is fired once all the measurements and their reporting # tasks are completed. self.allTestsDone = defer.Deferred() + self.sniffer = None
+ def getNetTests(self): + nettests = {} + def is_nettest(filename): + return not filename == '__init__.py' \ + and filename.endswith('.py') + + for category in self.categories: + dirname = os.path.join(config.nettest_directory, category) + # print path to all filenames. + for filename in os.listdir(dirname): + if is_nettest(filename): + net_test_file = os.path.join(dirname, filename) + nettest = getNetTestInformation(net_test_file) + + if nettest['id'] in nettests: + log.err("Found a two tests with the same name %s, %s" % + (nettest_path, nettests[nettest['id']]['path'])) + else: + category = dirname.replace(config.nettest_directory, '') + nettests[nettest['id']] = nettest + + return nettests + @defer.inlineCallbacks def start(self): + self.netTests = self.getNetTests() + + if config.privacy.includepcap: + log.msg("Starting") + if not config.reports.pcap: + config.generate_pcap_filename() + self.startSniffing()
if config.advanced.start_tor: log.msg("Starting Tor...") diff --cc ooni/reporter.py index 621eaeb,85df36e..251e824 --- a/ooni/reporter.py +++ b/ooni/reporter.py @@@ -29,10 -29,10 +29,10 @@@ except ImportError from ooni import errors
from ooni import otime - from ooni.utils import geodata, pushFilenameStack + from ooni.utils import pushFilenameStack from ooni.utils.net import BodyReceiver, StringProducer, userAgents
-from ooni import config +from ooni.settings import config
from ooni.tasks import ReportEntry, TaskTimedOut, ReportTracker