[or-cvs] r12771: Initial svn import of bridge disbursal code (/ bridgedb bridgedb/trunk bridgedb/trunk/lib bridgedb/trunk/lib/bridgedb)

nickm at seul.org nickm at seul.org
Tue Dec 11 23:21:20 UTC 2007


Author: nickm
Date: 2007-12-11 18:21:20 -0500 (Tue, 11 Dec 2007)
New Revision: 12771

Added:
   bridgedb/
   bridgedb/branches/
   bridgedb/tags/
   bridgedb/trunk/
   bridgedb/trunk/TODO
   bridgedb/trunk/lib/
   bridgedb/trunk/lib/bridgedb/
   bridgedb/trunk/lib/bridgedb/Bridges.py
   bridgedb/trunk/lib/bridgedb/Dist.py
   bridgedb/trunk/lib/bridgedb/Main.py
Log:
Initial svn import of bridge disbursal code

Added: bridgedb/trunk/TODO
===================================================================
--- bridgedb/trunk/TODO	                        (rev 0)
+++ bridgedb/trunk/TODO	2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,16 @@
+
+For dec:
+- better name
+- https frontend
+- email frontend
+- normalize email correctly
+- figure out which python version I need; document that.
+- break time into periods
+
+Later:
+- better area division logic
+- make all proxies get stuck in their own area.
+- implement hop
+- implement slightly nicer logging
+
+

Added: bridgedb/trunk/lib/bridgedb/Bridges.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Bridges.py	                        (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Bridges.py	2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,304 @@
+#!/usr/bin/python
+
+import binascii
+import bisect
+import hashlib
+import hmac
+import socket
+import struct
+import time
+
+HEX_FP_LEN = 40
+ID_LEN = 20
+
+HEX_DIGEST_LEN = 64
+DIGEST_LEN = 32
+
+def is_valid_ip(ip):
+    try:
+        socket.inet_aton(ip)
+    except socekt.error:
+        return False
+    else:
+        return True
+
+def is_valid_fingerprint(fp):
+    if len(fp) != HEX_FP_LEN:
+        return False
+    try:
+        toHex(fp)
+    except TypeError:
+        return False
+    else:
+        return True
+
+toHex = binascii.b2a_hex
+fromHex = binascii.a2b_hex
+
+def get_hmac(k,v):
+    h = hmac.new(k, v, hashlib.sha256)
+    return h.digest()
+
+def get_hmac_fn(k, hex=True):
+    h = hmac.new(k, digestmod=hashlib.sha256)
+    def hmac_fn(v):
+        h_tmp = h.copy()
+        h_tmp.update(v)
+        if hex:
+            return h_tmp.hexdigest()
+        else:
+            return h_tmp.digest()
+    return hmac_fn
+
+def chopString(s, size):
+    for pos in xrange(0, len(s), size):
+        yield s[pos:pos+size]
+
+class Bridge:
+    def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None):
+        self.nickname = nickname
+        self.ip = ip
+        self.orport = orport
+        if id_digest is not None:
+            assert fingerprint is None
+            if len(id_digest) != DIGEST_LEN:
+                raise TypeError("Bridge with invalid ID")
+            self.fingerprint = toHex(id_digest)
+        elif fingerprint is not None:
+            if not is_valid_fingerprint(fromHex(fingerprint)):
+                raise TypeError("Bridge with invalid fingerprint")
+            self.fingerprint = fingerprint.lower()
+        else:
+            raise TypeError("Bridge with no ID")
+
+    def getID(self):
+        return fromHex(self.fingerprint)
+
+    def __repr__(self):
+        return "Bridge(%r,%r,%d,%r)"%(
+            self.nickname, self.ip, self.orport, self.fingerprint)
+
+    def getConfigLine(self):
+        return "bridge %s:%d %s" % (self.ip, self.orport, self.fingerprint)
+
+    def assertOK(self):
+        assert is_valid_ip(self.ip)
+        assert is_valid_fingerprint(self.fingerprint)
+        assert 1 <= self.orport <= 65535
+
+def parseDescFile(f, bridge_purpose='bridge'):
+    nickname = ip = orport = fingerprint = purpose = None
+
+    for line in f:
+        line = line.strip()
+        if line.startswith("opt "):
+            line = line[4:]
+
+        if line.startswith("@purpose "):
+            items = line.split()
+            purpose = items[1]
+        elif line.startswith("router "):
+            items = line.split()
+            if len(items) >= 4:
+                nickname = items[1]
+                ip = items[2]
+                orport = int(items[3])
+        elif line.startswith("fingerprint "):
+            fingerprint = line[12:].replace(" ", "")
+        elif line.startswith("router-signature"):
+            purposeMatches = (purpose == bridge_purpose or
+                              bridge_purpose is None)
+            if purposeMatches and nickname and ip and orport and fingerprint:
+                b = Bridge(nickname, ip, orport, fingerprint)
+                b.assertOK()
+                yield b
+            nickname = ip = orport = fingerprint = purpose = None
+
+class BridgeHolder:
+    def insert(self, bridge):
+        raise NotImplemented
+
+    def assignmentsArePersistent(self):
+        return True
+
+class BridgeRing(BridgeHolder):
+    def __init__(self, key):
+        self.bridges = {}
+        self.bridgesByID = {}
+        self.hmac = get_hmac_fn(key, hex=False)
+        self.isSorted = False
+        self.sortedKeys = []
+
+    def insert(self, bridge):
+        id = bridge.getID()
+        pos = self.hmac(id)
+        if not self.bridges.has_key(pos):
+            self.sortedKeys.append(pos)
+            self.isSorted = False
+        self.bridges[pos] = bridge
+        self.bridgesByID[id] = bridge
+
+    def sort(self):
+        if not self.isSorted:
+            self.sortedKeys.sort()
+            self.isSorted = True
+
+    def _getBridgeKeysAt(self, pos, N=1):
+        assert len(pos) == DIGEST_LEN
+        if N >= len(self.sortedKeys):
+            return self.sortedKeys
+        if not self.isSorted:
+            self.sort()
+        idx = bisect.bisect_left(self.sortedKeys, pos)
+        r = self.sortedKeys[idx:idx+N]
+        if len(r) < N:
+            # wrap around as needed.
+            r.extend(self.sortedKeys[:N - len(r)])
+        assert len(r) == N
+        return r
+
+    def getBridges(self, pos, N=1):
+        keys = self._getBridgeKeysAt(pos, N)
+        keys.sort()
+        return [ self.bridges[k] for k in keys ]
+
+    def getBridgeByID(self, fp):
+        return self.bridgesByID.get(fp)
+
+    def __len__(self):
+        return len(self.bridges)
+
+
+class LogDB:
+    def __init__(self, kwd, db, logfile):
+        self._kwd = kwd
+        self._db = db
+        self._logfile = logfile
+    def __delitem__(self, k):
+        self._logfile.write("%s: del[%r]\n"%(self._kwd, k))
+        del self._db[k]
+    def __setitem__(self, k, v):
+        self._logfile.write("%s: [%r] = [%r]\n"%(self._kwd, k, v))
+        self._db[k] = v
+    def setdefault(self, k, v):
+        try:
+            return self._db[k]
+        except KeyError:
+            self._logfile.write("%s: [%r] = [%r]\n"%(self._kwd, k, v))
+            self._db[k] = v
+            return v
+    def __len__(self):
+        return len(self._db)
+    def __getitem__(self, k):
+        return self._db[k]
+    def has_key(self, k):
+        return self._db.has_key(k)
+    def get(self, k, v=None):
+        return self._db.get(k, v)
+    def keys(self):
+        return self._db.keys()
+    def rotate
+
+class PrefixStore:
+    def __init__(self, store, prefix):
+        self._d = store
+        self._p = prefix
+    def __setitem__(self, k, v):
+        self._d[self._p+k] = v
+    def __delitem__(self, k):
+        del self._d[self._p+k]
+    def __getitem__(self, k):
+        return self._d[self._p+k]
+    def has_key(self, k):
+        return self._d.has_key(self._p+k)
+    def get(self, k, v=None):
+        return self._d.get(self._p+k, v)
+    def setdefault(self, k, v):
+        return self._d.setdefault(self._p+k, v)
+    def keys(self):
+        n = len(self._p)
+        return [ k[n:] for k in self._d.keys() if k.startswith(self._p) ]
+
+def FixedBridgeSplitter(BridgeHolder):
+    def __init__(self, key, rings):
+        self.hmac = get_mac_fn(key, hex=True)
+        self.rings = rings[:]
+        for r in self.rings:
+            assert(isinstance(r, BridgeHolder))
+
+    def insert(self, bridge):
+        # Grab the first 4 bytes
+        digest = self.hmac(bridge.getID())
+        pos = long( digest[:8], 16 )
+        which = pos % len(self.rings)
+        self.ring[which].insert(bridge)
+
+class UnallocatedHolder(BridgeHolder):
+    def insert(self, bridge):
+        pass
+
+    def assignmentsArePersistent(self):
+        return False
+
+class BridgeTracker:
+    def __init__(self, firstSeenStore, lastSeenStore):
+        self.firstSeenStore = firstSeenStore
+        self.lastSeenStore = lastSeenStore
+
+    def insert(self, bridge):
+        #XXXX is this really sane?  Should we track minutes? hours?
+        now = time.strftime("%Y-%m-%d %H:%M", time.gmtime())
+        bridgeID = bridge.getID()
+        # The last-seen time always gets updated
+        self.lastSeenStore[bridgeID] = now
+        # The first-seen time only gets updated if it wasn't already set.
+        self.firstSeenStore.setdefault(bridgeID, now)
+
+def BridgeSplitter(BridgeHolder):
+    def __init__(self, key, store):
+        self.hmac = hmac.new(key, digestmod=hashlib.sha256)
+        self.store = store
+        self.ringsByName = {}
+        self.totalP = 0
+        self.pValues = []
+        self.rings = []
+        self.statsHolders = []
+
+    def addRing(self, ring, ringname, p=1):
+        assert isinstance(ring, BridgeHolder)
+        self.ringsByName[ringname] = ring
+        self.pValues.append(self.totalP)
+        self.rings.append(ringname)
+        self.totalP += p
+
+    def addTracker(self, t):
+        self.statsHolders.append(t)
+
+    def insert(self, bridge):
+        assert self.rings
+        for s in self.statsHolders:
+            s.insert(bridge)
+        bridgeID = bridge.getID()
+        ringname = self.store.get(bridgeID, "")
+        ring = self.ringsByName.get(ringname)
+        if ring is not None:
+            ring.insert(bridge)
+        else:
+            pos = self.hmac(bridgeID)
+            n = int(pos[:8], 16) % self.totalP
+            pos = bisect.bisect_right(self.pValues, p) - 1
+            assert 0 <= pos < len(self.rings)
+            ringname = self.rings[pos]
+            ring = self.ringsByName.get(ringname)
+            if ring.assignmentsArePersistent():
+                self.store[bridgeID] = ringname
+            ring.insert(bridge)
+
+if __name__ == '__main__':
+    import sys
+    br = BridgeRing("hello")
+    for fname in sys.argv[1:]:
+        f = open(fname)
+        for bridge in parseDescFile(f):
+            br.insert(bridge)
+

Added: bridgedb/trunk/lib/bridgedb/Dist.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Dist.py	                        (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Dist.py	2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,80 @@
+
+import Bridges
+import socket
+
+def uniformMap(ip):
+    "Map an IP to an arbitrary 'area' string"
+    # convert the IP for 4 bytes.
+    s = socket.inet_aton(ip)
+    # return the first 3.
+    return s[:3]
+
+
+class IPBasedDistributor(Bridges.BridgeHolder):
+    def __init__(self, areaMapper, nClusters, key):
+        self.areaMapper = areaMapper
+
+        self.rings = []
+        for n in xrange(nClusters):
+            key1 = Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n)
+            self.rings.append( Bridges.BridgeRing(key1) )
+
+        key2 = Bridges.get_hmac(key, "Assign-Bridges-To-Rings")
+        self.splitter = Bridges.FixedBridgeSplitter(key2, self.rings)
+
+        key3 = Bridges.get_hmac(key, "Order-Areas-In-Rings")
+        self.areaOrderHmac = Bridges.get_hmac_fn(key3, hex=True)
+
+        key4 = Bridges.get_hmac(key, "Assign-Areas-To-Rings")
+        self.areaClusterHmac = Bridges.get_hmac_fun(key4, hex=True)
+
+    def insert(self, bridge):
+        self.splitter.insert(bridge)
+
+    def getBridgesForIP(self, ip, epoch, N=1):
+        area = self.areaMapper(ip)
+
+        # Which bridge cluster should we look at?
+        h = int( self.areaClusterHmac(area)[:8], 16 )
+        clusterNum = h % len(self.rings)
+        ring = self.rings[clusterNum]
+
+        # Now get the bridge.
+        pos = self.areaOrderHmac("<%s>%s" % (epoch, area))
+        return ring.getBridges(pos, N)
+
+
+def normalizeEmail(addr):
+    #XXXX make this better.
+    return addr.strip().lower()
+
+class EmailBasedDistributor(Bridges.BridgeHolder):
+    def __init__(self, key, store):
+
+        key1 = Bridges.get_hmac(key, "Map-Addresses-To-Ring")
+        self.emailHmac = Bridges.get_hmac_fn(key1, hex=1)
+
+        key2 = Bridges.get_hmac(key, "Order-Bridges-In-Ring")
+        self.ring = Bridges.BrigeRing(key2)
+        self.store = store
+
+    def insert(self, bridge):
+        self.ring.insert(bridge):
+
+    def getBridgesForEmail(self, emailaddress, epoch, N=1):
+        emailaddress = normalizeEmail(emailaddress)
+        if store.has_key(emailaddress):
+            result = []
+            ids = store[emailaddress])
+            for id in Bridges.chopString(ids, Bridges.ID_LEN)
+                b = self.ring.getBridgeByID(id)
+                if b != None:
+                    result.append(b)
+            return result
+
+        pos = self.emailHmac("<%s>%s" % (epoch, emailaddress))
+        result = ring.getBridges(pos, N)
+        memo = "".join(b.getID() for b in result)
+        self.store[emailaddress] = memo
+        return result
+

Added: bridgedb/trunk/lib/bridgedb/Main.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Main.py	                        (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Main.py	2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,80 @@
+
+import anydbm
+
+import Bridges
+import Dist
+import os
+import sys
+
+CONFIG = dict(
+    BRIDGE_FILES = [ "./cached-descriptors", "./cached-descriptors.new" ],
+    BRIDGE_PURPOSE = "bridge",
+    DB_FILE = [ "./bridgedist" ],
+    DB_LOG_FILE = [ "./bridgedist.log" ],
+    HTTPS_DIST = True,
+    EMAIL_DIST = True,
+    N_IP_CLUSTERS = 8,
+    MASTER_KEY_FILE = [ "./secret_key" ]
+    HTTPS_SHARE=10,
+    EMAIL_SHARE=10,
+    RESERVED_SHARE=2
+  )
+
+def getKey(fname):
+    try:
+        f = open(fname, 'r')
+    except IOError:
+        k = os.urandom(32)
+        flags = os.O_WRONLY|os.O_TRUNC|getattr(os, "O_BIN", 0)
+        fd = os.open(fname, flags, 0400)
+        os.write(fd, k)
+        os.close(fd)
+    else:
+        k = f.read()
+        f.close()
+
+    return k
+
+def load(cfg, splitter):
+    for fname in cfg.BRIDGE_FILES:
+        f = open(fname, 'r')
+        for bridge in Bridges.parseDescFile(f, cfg.BRIDGE_PURPOSE):
+            splitter.insert(bridge)
+        f.close()
+
+def startup(cfg):
+    key = getKey(MASTER_KEY_FILE)
+
+    store = anydbm.open(cfg.DB_FILE, "c", 0600)
+    if DB_LOG_FILE:
+        dblogfile = open(cfg.DB_LOG_FILE, "a+", 0)
+        store = LogDB("db", store, dblogfile)
+
+    splitter = Bridges.BridgeSplitter(Bridges.get_hmac(key, "Splitter-Key"),
+                                      Bridges.PrefixStore(store, "sp|"))
+
+    if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
+        ipDistrbutor = Dist.ipBasedDistributor(Dist.uniformMap,
+                                 Dist.N_IP_CLUSTERS,
+                                 Bridges.get_hmac(key, "HTTPS-IP-Dist-Key"))
+        splitter.addRing(ipDistributor, "https", cfg.HTTPS_SHARE)
+
+    if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
+        emailDistributor = Dist.emailBasedDistributor(
+            Bridges.get_hmac(key, "Email-Dist-Key"),
+            Bridges.PrefixStore(store, "em|"))
+        splitter.addRing(emailDistributor, "email", cfg.EMAIL_SHARE)
+
+    if cfg.RESERVED_SHARE:
+        splitter.addRing(Bridges.UnallocatedHolder(),
+                         "unallocated",
+                         cfg.RESERVED_SHARE)
+
+    stats = Bridges.BridgeTracker(Bridges.PrefixStore(store, "fs"),
+                                  Bridges.PrefixStore(store, "ls"))
+    splitter.addTracker(stats)
+
+    load(cfg, splitter)
+
+    # XXXX create twisted listeners.
+



More information about the tor-commits mailing list