commit 691db20224e4311444de85b592add60a54b9d38c Author: aagbsn aagbsn@extc.org Date: Tue Dec 6 13:30:49 2011 -0800
4297 - parse or-addresses from bridge descriptors
Updates parseBridgeDesc to read or-address line from bridge descriptors, and adds a container class PortList
parseBridgeDesc adds parseORAddress which parses or-address lines. Valid address:portspec are added to each Bridge object in a dictionary of or-address:PortList key:values.
class PortList is a container class that represents the or-address PORTSPEC field, which is a list of ports and port ranges.
The implementation contains an efficient representation of port-ranges, implements __contains__ for single ports and port-ranges, and will also squash or merge port ranges that intersect.
The getPort() function returns a single valid port (as defined by the PORTSPEC line). Each subsequent call to getPort() will iterate over the valid ports, so that BridgeDB may return a single 'bridge' line for inclusion in torrc and uniformly distribute the available ports.
getConfigLine may be passed 'selectFromORAddresses=True', which will return a valid config line from the set of addresses and ports. Filtering for IPv6 or IPv4 addresses is supported, if there are no matches in or-addresses BridgeDB will return the primary ip:orport instead.
Also added is a generator 'getAllConfigLines()' which will return every valid address:port (including expanding ranges of ports - which may be a lot of output) combination a bridge supports, one per line. --- lib/bridgedb/Bridges.py | 279 ++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 262 insertions(+), 17 deletions(-)
diff --git a/lib/bridgedb/Bridges.py b/lib/bridgedb/Bridges.py index 358e362..306b757 100644 --- a/lib/bridgedb/Bridges.py +++ b/lib/bridgedb/Bridges.py @@ -1,5 +1,3 @@ -# BridgeDB by Nick Mathewson. -# Copyright (c) 2007-2009, The Tor Project, Inc. # See LICENSE for licensing information
""" @@ -15,6 +13,8 @@ import re import sha import socket import time +import ipaddr +import random
import bridgedb.Storage import bridgedb.Bucket @@ -44,15 +44,13 @@ def is_valid_ip(ip): False """
- if not re.match(r'(\d+).(\d+).(\d+).(\d+)', ip): - # inet_aton likes "1.2" as a synonym for "0.0.1.2". We don't. - return False + # ipaddr does not treat "1.2" as a synonym for "0.0.1.2" try: - socket.inet_aton(ip) - except socket.error: + ipaddr.IPAddress(ip) + except ValueError: + # not a valid IPv4 or IPv6 address return False - else: - return True + return True
def is_valid_fingerprint(fp): """Return true iff fp in the right format to be a hex fingerprint @@ -67,6 +65,17 @@ def is_valid_fingerprint(fp): else: return True
+def is_valid_or_address(or_address): + """Return true iff or_address is in the right format + (ip,frozenset(port)) or (ip, frozenset(port_low,port_high)) for ranges + """ + if len(or_address) != 2: return False + ip,port = or_address + if not is_valid_ip(ip): return False + if type(port) is not int: return False + if not (1 <= port <= 65535): return False + return True + toHex = binascii.b2a_hex fromHex = binascii.a2b_hex
@@ -105,12 +114,15 @@ class Bridge: ## no spaces. ## running,stable -- DOCDOC ## blockingCountries -- list of country codes blocking this bridge - def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None): + def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None, + or_addresses=None): """Create a new Bridge. One of fingerprint and id_digest must be set.""" self.nickname = nickname self.ip = ip self.orport = orport + if not or_addresses: or_addresses = {} + self.or_addresses = or_addresses self.running = self.stable = None self.blockingCountries = None if id_digest is not None: @@ -132,15 +144,63 @@ class Bridge:
def __repr__(self): """Return a piece of python that evaluates to this bridge.""" + if self.or_addresses: + return "Bridge(%r,%r,%d,%r,or_addresses=%s)"%( + self.nickname, self.ip, self.orport, self.fingerprint, + self.or_addresses) return "Bridge(%r,%r,%d,%r)"%( self.nickname, self.ip, self.orport, self.fingerprint)
- def getConfigLine(self,includeFingerprint=False): + def getConfigLine(self,includeFingerprint=False, + selectFromORAddresses=False, + needIPv4=True, needIPv6=False): """Return a line describing this bridge for inclusion in a torrc.""" + + # select an address:port from or-addresses + if selectFromORAddresses and self.or_addresses: + filtered_addresses = None + # bridges may have both classes. we only return one. + if needIPv4: + f = lambda x: type(x[0]) is ipaddr.IPv4Address + filtered_addresses = filter(f, self.or_addresses.items()) + elif needIPv6: + f = lambda x: type(x[0]) is ipaddr.IPv6Address + filtered_addresses = filter(f, self.or_addresses.items()) + + #XXX: we could instead have two lists of or-addresses + if filtered_addresses: + address,portlist = random.choice(filtered_addresses) + if type(address) is ipaddr.IPv6Address: + ip = "[%s]"%address + else: + ip = "%s"%address + orport = portlist.getPort() #magic + + # default to ip,orport ; ex. when logging + else: + ip = self.ip + orport = self.orport + if includeFingerprint: - return "bridge %s:%d %s" % (self.ip, self.orport, self.fingerprint) + return "bridge %s:%d %s" % (ip, orport, self.fingerprint) else: - return "bridge %s:%d" % (self.ip, self.orport) + return "bridge %s:%d" % (ip, orport) + + def getAllConfigLines(self,includeFingerprint=False): + """Generator. Iterate over all valid config lines for this bridge.""" + # warning: a bridge with large port ranges may generate thousands + # of lines of output + for address,portlist in self.or_addresses.items(): + if type(address) is ipaddr.IPv6Address: + ip = "[%s]" % address + else: + ip = "%s" % address + + for orport in portlist: + if includeFingerprint: + yield "bridge %s:%d %s" % (ip,orport,self.fingerprint) + else: + yield "bridge %s:%d" % (ip,orport)
def assertOK(self): assert is_valid_ip(self.ip) @@ -164,10 +224,37 @@ class Bridge: return False
def parseDescFile(f, bridge_purpose='bridge'): - """Generator. Parses a cached-descriptors file 'f', and yields a Bridge - object for every entry whose purpose matches bridge_purpose. + """Generator. Parses a cached-descriptors file 'f' and yeilds a Bridge object + for every entry whose purpose matches bridge_purpose. + This Generator understands the new descriptor format described in + 186-multiple-orports.txt + + The new specification provides for specifying multiple ORports as well + as supporting new address format for IPv6 addresses. + + The router descriptor "or-address" may occur zero, one, or multiple times. + parseDescFile adds each ADDRESS:PORTSPEC to the Bridge.or_addresses list. + + The "or-address" should not duplicate the address:port pair from the "router" + description. (Should we try to catch this case?) + + A node may not list more than 8 or-address lines. + (should we try to enforce this too?) + + Here is the new format: + + or-address SP ADDRESS ":" PORTLIST NL + ADDRESS = IP6ADDR | IP4ADDR + IPV6ADDR = an ipv6 address, surrounded by square brackets. + IPV4ADDR = an ipv4 address, represented as a dotted quad. + PORTLIST = PORTSPEC | PORTSPEC "," PORTLIST + PORTSPEC = PORT | PORT "-" PORT + PORT = a number between 1 and 65535 inclusive. """ + nickname = ip = orport = fingerprint = purpose = None + num_or_address_lines = 0 + or_addresses = {}
for line in f: line = line.strip() @@ -185,14 +272,172 @@ def parseDescFile(f, bridge_purpose='bridge'): orport = int(items[3]) elif line.startswith("fingerprint "): fingerprint = line[12:].replace(" ", "") + elif line.startswith("or-address "): + if num_or_address_lines < 8: + line = line[11:] + address,portlist = parseORAddressLine(line) + try: + or_addresses[address].add(portlist) + except KeyError: + or_addresses[address] = portlist + else: + logging.warn("Skipping extra or-address line "\ + "from Bridge with ID %r" % id) + num_or_address_lines += 1 elif line.startswith("router-signature"): purposeMatches = (purpose == bridge_purpose or bridge_purpose is None) if purposeMatches and nickname and ip and orport and fingerprint: - b = Bridge(nickname, ip, orport, fingerprint) + b = Bridge(nickname, ip, orport, fingerprint, + or_addresses=or_addresses) b.assertOK() yield b - nickname = ip = orport = fingerprint = purpose = None + nickname = ip = orport = fingerprint = purpose = None + num_or_address_lines = 0 + or_addresses = {} + +class PortList: + """ container class for port ranges + """ + + def __init__(self, *args, **kwargs): + self.ports = set() + self.ranges = [] + self.portdispenser = None + if len(args) == 1: + if type(args[0]) is str: + ports = [p.split('-') for p in args[0].split(',')] + # truncate per spec + ports = ports[:16] + for ps in ports: + try: ps = [int(x) for x in ps] + except ValueError: break + if len(ps) == 1: self.add(ps[0]) + elif len(ps) == 2: self.add(ps[0],ps[1]) + else: + self.add(args[0]) + elif len(args) == 2: + l,h = args + self.add(l,h) + + def _sanitycheck(self, val): + #XXX: if debug=False this is disabled. bad! + assert type(val) is int + assert(val > 0) + assert(val <= 65535) + + def __contains__(self, val1, val2=None): + self._sanitycheck(val1) + if val2: self.sanitycheck(val2) + + # check a single port + if not val2 and val1: + if val1 in self.ports: return True + for start,end in self.ranges: + f = lambda x: start <= x <= end + if f(val1): return True + return False + + if val2 and val1: + for start,end in self.ranges: + f = lambda x: start <= x <= end + if f(val1) and f(val2): return True + + for start,end in self.ranges: + f = lambda x: start <= x <= end + if f(val): return True + + def add(self, val1, val2=None): + self._sanitycheck(val1) + + # add as a single port instead + if val2 == val1: val2 = None + if val2: + self._sanitycheck(val2) + start = min(val1,val2) + end = max(val1,val2) + self.ranges.append((start,end)) + # reduce to largest continuous ranges + self._squash() + else: + if val1 in self: return + self.ports.add(val1) + + # reset port dispenser + if self.portdispenser: + self.portdispenser = None + + def getPort(self): + # returns a single valid port + if not self.portdispenser: + self.portdispenser = self.__iter__() + try: + return self.portdispenser.next() + except StopIteration, AttributeError: + self.portdispenser = self.__iter__() + return self.portdispenser.next() + + def _squash(self): + # merge intersecting ranges + if len(self.ranges) > 1: + self.ranges.sort(key=lambda x: x[0]) + squashed = [self.ranges.pop(0)] + for r in self.ranges: + if (squashed[-1][0] <= r[0] <= squashed[-1][1]): + #intersection, extend r1, drop r2 + if r[1] > squashed[-1][1]: + squashed[-1] = (squashed[-1][0],r[1]) + # drop r + else: + # keep r + squashed.append(r) + + self.ranges = squashed + + # drop enclosed ports + ports = self.ports.copy() + for p in self.ports: + for s,e in self.ranges: + if s <= p <= e: + ports.remove(p) + self.ports = ports + + def __iter__(self): + for p in self.ports: + yield p + for l,h in self.ranges: + # +1 for inclusive range + for rr in xrange(l,h+1): + yield rr + + def __str__(self): + s = "" + for p in self.ports: + s += "".join(", %s"%p) + for l,h in self.ranges: + s += ", %s-%s" % (l,h) + return s.lstrip(", ") + + def __repr__(self): + return "PortList('%s')" % self.__str__() + +def parseORAddressLine(line): + #XXX should these go somewhere else? + re_ipv6 = re.compile("[([a-fA-F0-9:]+)]:(.*$)") + re_ipv4 = re.compile("((?:\d{1,3}.?){4}):(.*$)") + + address = None + portlist = None + # try regexp to discover ip version + for regex in [re_ipv4, re_ipv6]: + m = regex.match(line) + if m: + try: + address = ipaddr.IPAddress(m.group(1)) + portstring = m.group(2) + except IndexError, ValueError: break + portlist = PortList(portstring) + return address,portlist
def parseStatusFile(f): """DOCDOC"""
tor-commits@lists.torproject.org