commit 02aaceb9070e341e89e9852b3bf3ff8b24b9f865 Author: Arturo Filastò arturo@filasto.net Date: Wed Nov 30 19:55:14 2016 +0000
Add support for downloading bridge lists from ooni-resources (#682)
* Add support for downloading bridge lists from ooni-resources
This implements what is described in: #652
* Move input_processor logic in tcp_connect into the setUp method
* Use multiple with statements on same line
* Use variable in onion to store the names of pluggable transports
* Fix sorting to respect desc and asc keys
* Enable some basic anomaly detection for tcp_connect tests
* Fix handling of stripped input lines in tcp_connect --- data/decks/tor.yaml | 7 +++ ooni/deck/store.py | 87 ++++++++++++++++++++++++++--------- ooni/measurements.py | 16 ++++++- ooni/nettests/blocking/tcp_connect.py | 74 ++++++++++++++--------------- ooni/utils/onion.py | 2 + 5 files changed, 123 insertions(+), 63 deletions(-)
diff --git a/data/decks/tor.yaml b/data/decks/tor.yaml index 9d0634f..ea6e1a3 100644 --- a/data/decks/tor.yaml +++ b/data/decks/tor.yaml @@ -10,3 +10,10 @@ tasks: - name: Runs the Meek fronted request test ooni: test_name: meek_fronted_requests + +- name: Does a tcp_connect test on the tor bridges + ooni: + annotations: + test_class: tor_bridge_reachability + test_name: tcp_connect + file: "$tor_bridge_lines" diff --git a/ooni/deck/store.py b/ooni/deck/store.py index 956ea37..6e3e6de 100644 --- a/ooni/deck/store.py +++ b/ooni/deck/store.py @@ -21,6 +21,25 @@ class InputNotFound(Exception): class DeckNotFound(Exception): pass
+ +def write_txt_from_csv(in_file, out_file, func, skip_header=True): + with in_file.open('r') as in_fh, out_file.open('w') as out_fh: + csvreader = csv.reader(in_fh) + if skip_header: + csvreader.next() + for row in csvreader: + out_fh.write(func(row)) + +def write_descriptor(out_file, name, desc_id, filepath, file_type): + with out_file.open('w') as out_fh: + json.dump({ + "name": name, + "filepath": filepath, + "last_updated": timestampNowISO8601UTC(), + "id": desc_id, + "type": file_type + }, out_fh) + class InputStore(object): def __init__(self): self.path = FilePath(config.inputs_directory) @@ -51,30 +70,55 @@ class InputStore(object): desc_fname = "citizenlab-test-lists_{0}.desc".format(cc)
out_file = self.path.child("data").child(data_fname) - out_fh = out_file.open('w') - with in_file.open('r') as in_fh: - csvreader = csv.reader(in_fh) - csvreader.next() - for row in csvreader: - out_fh.write("%s\n" % row[0]) - out_fh.close() + write_txt_from_csv(in_file, out_file, + lambda row: "{}\n".format(row[0]) + )
desc_file = self.path.child("descriptors").child(desc_fname) - with desc_file.open('w') as out_fh: - if cc == "global": - name = "List of globally accessed websites" - else: - # XXX resolve this to a human readable country name - country_name = cc - name = "List of websites for {0}".format(country_name) - json.dump({ - "name": name, - "filepath": out_file.path, - "last_updated": timestampNowISO8601UTC(), - "id": "citizenlab_{0}_urls".format(cc), - "type": "file/url" - }, out_fh) + if cc == "global": + name = "List of globally accessed websites" + else: + # XXX resolve this to a human readable country name + country_name = cc + name = "List of websites for {0}".format(country_name) + write_descriptor(desc_file, name, + "citizenlab_{0}_urls".format(cc), + out_file.path, + "file/url") + self._cache_stale = True + yield defer.succeed(None) + + @defer.inlineCallbacks + def update_tor_bridge_lines(self, country_code): + from ooni.utils import onion + in_file = self.resources.child("tor-bridges").child( + "tor-bridges-ip-port.csv" + ) + if not in_file.exists(): + yield check_for_update(country_code) + + data_fname = "tor-bridge-lines.txt" + desc_fname = "tor-bridge-lines.desc" + out_file = self.path.child("data").child(data_fname) + + def format_row(row): + host, port, nickname, protocol = row + if protocol.lower() not in onion.pt_names: + return "{}:{}\n".format(host, port) + return "{} {}:{}\n".format(protocol, host, port) + + write_txt_from_csv(in_file, out_file, format_row) + desc_file = self.path.child("descriptors").child(desc_fname) + write_descriptor( + desc_file, "Tor bridge lines", + "tor_bridge_lines", out_file.path, + "file/ip-port" + ) + self._cache_stale = True + + # Do an empty defer to fit inside of a event loop clock + yield defer.succeed(None)
@defer.inlineCallbacks def create(self, country_code=None): @@ -88,6 +132,7 @@ class InputStore(object): mkdir_p(self.path.child("data").path)
yield self.update_url_lists(country_code) + yield self.update_tor_bridge_lines(country_code)
@defer.inlineCallbacks def update(self, country_code=None): diff --git a/ooni/measurements.py b/ooni/measurements.py index d79fffb..b3b3e0f 100644 --- a/ooni/measurements.py +++ b/ooni/measurements.py @@ -14,7 +14,8 @@ class MeasurementInProgress(Exception): class Process(): supported_tests = [ "web_connectivity", - "http_requests" + "http_requests", + "tcp_connect" ] @staticmethod def web_connectivity(entry): @@ -26,6 +27,15 @@ class Process(): return result
@staticmethod + def tcp_connect(entry): + result = {} + result['anomaly'] = False + if entry['test_keys']['connection'] != "success": + result['anomaly'] = True + result['url'] = entry['input'] + return result + + @staticmethod def http_requests(entry): result = {} test_keys = entry['test_keys'] @@ -50,6 +60,8 @@ def generate_summary(input_file, output_file): if entry['test_name'] in Process.supported_tests: result = getattr(Process, entry['test_name'])(entry) result['idx'] = idx + if not result.get('url', None): + result['url'] = entry['input'] results['test_name'] = entry['test_name'] results['test_start_time'] = entry['test_start_time'] results['country_code'] = entry['probe_cc'] @@ -145,7 +157,7 @@ def list_measurements(compute_size=False, order=None): return measurements
if order.lower() in ['asc', 'desc']: - reverse = {'asc': True, 'desc': False}[order.lower()] + reverse = {'asc': False, 'desc': True}[order.lower()] measurements.sort(key=operator.itemgetter('test_start_time'), reverse=reverse) return measurements diff --git a/ooni/nettests/blocking/tcp_connect.py b/ooni/nettests/blocking/tcp_connect.py index f2712ea..da92129 100644 --- a/ooni/nettests/blocking/tcp_connect.py +++ b/ooni/nettests/blocking/tcp_connect.py @@ -18,7 +18,7 @@ class TCPConnectTest(nettest.NetTestCase): description = "Performs a TCP connect scan of all the " \ "host port combinations given as input." author = "Arturo Filastò" - version = "0.1" + version = "0.2.0" inputFile = [ 'file', 'f', @@ -29,6 +29,38 @@ class TCPConnectTest(nettest.NetTestCase): requiresRoot = False requiredOptions = ['file']
+ def setUp(self): + def strip_url(address): + proto, path = address.strip().split('://') + proto = proto.lower() + host = path.split('/')[0] + if proto == 'http': + return host, 80 + if proto == 'https': + return host, 443 + + pluggable_transports = ( + "obfs3", "obfs2", "fte", "scramblesuit", + "obfs4" + ) + def is_bridge_line(line): + first = line.split(" ")[0] + return first.lower() in pluggable_transports + ("bridge",) + def strip_bridge(line): + if line.lower().startswith("bridge"): + return line.split(" ")[2].split(":") + return line.split(" ")[1].split(":") + + if self.input.startswith("http"): + host, port = strip_url(self.input) + elif is_bridge_line(self.input): + host, port = strip_bridge(self.input) + else: + host, port = self.input.split(" ")[0].split(":") + + self.host = host + self.port = port + def test_connect(self): """ This test performs a TCP connection to the remote host on the @@ -36,8 +68,6 @@ class TCPConnectTest(nettest.NetTestCase): The report will contains the string 'success' if the test has succeeded, or the reason for the failure if it has failed. """ - host, port = self.input.split(":") - def connectionSuccess(protocol): protocol.transport.loseConnection() log.debug("Got a connection to %s" % self.input) @@ -47,44 +77,8 @@ class TCPConnectTest(nettest.NetTestCase): self.report['connection'] = handleAllFailures(failure)
from twisted.internet import reactor - point = TCP4ClientEndpoint(reactor, host, int(port)) + point = TCP4ClientEndpoint(reactor, self.host, int(self.port)) d = point.connect(TCPFactory()) d.addCallback(connectionSuccess) d.addErrback(connectionFailed) return d - - def inputProcessor(self, filename=None): - """ - This inputProcessor extracts name:port pairs from urls - XXX: Does not support unusual port numbers - """ - def strip_url(address): - proto, path = x.strip().split('://') - proto = proto.lower() - host = path.split('/')[0] - if proto == 'http': - return "%s:80" % host - if proto == 'https': - return "%s:443" % host - - pluggable_transports = ("obfs3", "obfs2", "fte", "scramblesuit") - def is_bridge_line(line): - first = line.split(" ")[0] - return first.lower() in pluggable_transports + ("bridge",) - def strip_bridge(line): - if line.lower().startswith("Bridge"): - return line.split(" ")[2] - return line.split(" ")[1] - - if filename: - fp = open(filename) - for x in fp.readlines(): - if x.startswith("http"): - yield strip_url(x) - elif is_bridge_line(x): - yield strip_bridge(x) - else: - yield x.split(" ")[0] - fp.close() - else: - pass diff --git a/ooni/utils/onion.py b/ooni/utils/onion.py index 7414e2f..9d41b29 100644 --- a/ooni/utils/onion.py +++ b/ooni/utils/onion.py @@ -136,6 +136,8 @@ _transport_line_templates = {
}
+pt_names = _transport_line_templates.keys() + class UnrecognizedTransport(Exception): pass class UninstalledTransport(Exception):