[or-cvs] r12783: Add python packaging and build stuff, time interval calculat (in bridgedb/trunk: . lib/bridgedb)

nickm at seul.org nickm at seul.org
Wed Dec 12 17:55:52 UTC 2007


Author: nickm
Date: 2007-12-12 12:55:52 -0500 (Wed, 12 Dec 2007)
New Revision: 12783

Added:
   bridgedb/trunk/LICENSE
   bridgedb/trunk/MANIFEST.in
   bridgedb/trunk/README
   bridgedb/trunk/lib/bridgedb/Time.py
   bridgedb/trunk/lib/bridgedb/__init__.py
   bridgedb/trunk/setup.py
Modified:
   bridgedb/trunk/
   bridgedb/trunk/TODO
   bridgedb/trunk/lib/bridgedb/Bridges.py
   bridgedb/trunk/lib/bridgedb/Dist.py
   bridgedb/trunk/lib/bridgedb/Main.py
Log:
Add python packaging and build stuff, time interval calculation, and email address normalizer.  Switch hash to SHA1, since python 2.4 does not seem to have sha256, but make it resettable via a DIGESTMOD variable in Main.py


Property changes on: bridgedb/trunk
___________________________________________________________________
Name: svn:ignore
   + build
dist
MANIFEST

Added: bridgedb/trunk/LICENSE
===================================================================
--- bridgedb/trunk/LICENSE	                        (rev 0)
+++ bridgedb/trunk/LICENSE	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1,31 @@
+BridgeDB is distributed under this license:
+
+Copyright (c) 2007, The Tor Project, Inc.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+    * Neither the names of the copyright owners nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Added: bridgedb/trunk/MANIFEST.in
===================================================================
--- bridgedb/trunk/MANIFEST.in	                        (rev 0)
+++ bridgedb/trunk/MANIFEST.in	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1 @@
+include README TODO

Added: bridgedb/trunk/README
===================================================================
--- bridgedb/trunk/README	                        (rev 0)
+++ bridgedb/trunk/README	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1,7 @@
+
+Empty readme file.  Write this.
+
+ - You'll need Python 2.4 or later.
+
+ - Send your questions to nickm.
+

Modified: bridgedb/trunk/TODO
===================================================================
--- bridgedb/trunk/TODO	2007-12-12 17:08:54 UTC (rev 12782)
+++ bridgedb/trunk/TODO	2007-12-12 17:55:52 UTC (rev 12783)
@@ -1,13 +1,11 @@
 
 For dec:
-- better name
 - https frontend
 - email frontend
-- normalize email correctly
-- figure out which python version I need; document that.
-- break time into periods
+- write a README
 
 Later:
+- document stuff better
 - better area division logic
 - make all proxies get stuck in their own area.
 - implement hop

Modified: bridgedb/trunk/lib/bridgedb/Bridges.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Bridges.py	2007-12-12 17:08:54 UTC (rev 12782)
+++ bridgedb/trunk/lib/bridgedb/Bridges.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -1,9 +1,11 @@
-#!/usr/bin/python
+# BridgeDB by Nick Mathewson.
+# Copyright (c) 2007, The Tor Project, Inc.
+# See LICENSE for licensing informatino
 
 import binascii
 import bisect
-import hashlib
 import hmac
+import sha
 import socket
 import struct
 import time
@@ -11,8 +13,9 @@
 HEX_FP_LEN = 40
 ID_LEN = 20
 
-HEX_DIGEST_LEN = 64
-DIGEST_LEN = 32
+DIGESTMOD = sha
+HEX_DIGEST_LEN = 40
+DIGEST_LEN = 20
 
 def is_valid_ip(ip):
     try:
@@ -36,11 +39,11 @@
 fromHex = binascii.a2b_hex
 
 def get_hmac(k,v):
-    h = hmac.new(k, v, hashlib.sha256)
+    h = hmac.new(k, v, digestmod=DIGESTMOD)
     return h.digest()
 
 def get_hmac_fn(k, hex=True):
-    h = hmac.new(k, digestmod=hashlib.sha256)
+    h = hmac.new(k, digestmod=DIGESTMOD)
     def hmac_fn(v):
         h_tmp = h.copy()
         h_tmp.update(v)
@@ -197,8 +200,8 @@
         return self._db.get(k, v)
     def keys(self):
         return self._db.keys()
-    def rotate
 
+
 class PrefixStore:
     def __init__(self, store, prefix):
         self._d = store
@@ -256,7 +259,7 @@
 
 def BridgeSplitter(BridgeHolder):
     def __init__(self, key, store):
-        self.hmac = hmac.new(key, digestmod=hashlib.sha256)
+        self.hmac = hmac.new(key, digestmod=DIGESTMOD)
         self.store = store
         self.ringsByName = {}
         self.totalP = 0

Modified: bridgedb/trunk/lib/bridgedb/Dist.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Dist.py	2007-12-12 17:08:54 UTC (rev 12782)
+++ bridgedb/trunk/lib/bridgedb/Dist.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -1,5 +1,10 @@
+# BridgeDB by Nick Mathewson.
+# Copyright (c) 2007, The Tor Project, Inc.
+# See LICENSE for licensing informatino
 
-import Bridges
+import bridgedb.Bridges
+
+import re
 import socket
 
 def uniformMap(ip):
@@ -10,23 +15,23 @@
     return s[:3]
 
 
-class IPBasedDistributor(Bridges.BridgeHolder):
+class IPBasedDistributor(bridgedb.Bridges.BridgeHolder):
     def __init__(self, areaMapper, nClusters, key):
         self.areaMapper = areaMapper
 
         self.rings = []
         for n in xrange(nClusters):
-            key1 = Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n)
-            self.rings.append( Bridges.BridgeRing(key1) )
+            key1 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n)
+            self.rings.append( bridgedb.Bridges.BridgeRing(key1) )
 
-        key2 = Bridges.get_hmac(key, "Assign-Bridges-To-Rings")
-        self.splitter = Bridges.FixedBridgeSplitter(key2, self.rings)
+        key2 = bridgedb.Bridges.get_hmac(key, "Assign-Bridges-To-Rings")
+        self.splitter = bridgedb.Bridges.FixedBridgeSplitter(key2, self.rings)
 
-        key3 = Bridges.get_hmac(key, "Order-Areas-In-Rings")
-        self.areaOrderHmac = Bridges.get_hmac_fn(key3, hex=True)
+        key3 = bridgedb.Bridges.get_hmac(key, "Order-Areas-In-Rings")
+        self.areaOrderHmac = bridgedb.Bridges.get_hmac_fn(key3, hex=True)
 
-        key4 = Bridges.get_hmac(key, "Assign-Areas-To-Rings")
-        self.areaClusterHmac = Bridges.get_hmac_fun(key4, hex=True)
+        key4 = bridgedb.Bridges.get_hmac(key, "Assign-Areas-To-Rings")
+        self.areaClusterHmac = bridgedb.Bridges.get_hmac_fun(key4, hex=True)
 
     def insert(self, bridge):
         self.splitter.insert(bridge)
@@ -44,29 +49,99 @@
         return ring.getBridges(pos, N)
 
 
-def normalizeEmail(addr):
-    #XXXX make this better.
-    return addr.strip().lower()
 
-class EmailBasedDistributor(Bridges.BridgeHolder):
-    def __init__(self, key, store):
 
-        key1 = Bridges.get_hmac(key, "Map-Addresses-To-Ring")
-        self.emailHmac = Bridges.get_hmac_fn(key1, hex=1)
+# These characters are the ones that RFC2822 allows.
+#ASPECIAL = '!#$%&*+-/=?^_`{|}~'
+#ASPECIAL += "\\\'"
 
-        key2 = Bridges.get_hmac(key, "Order-Bridges-In-Ring")
-        self.ring = Bridges.BrigeRing(key2)
+# These are the ones we're pretty sure we can handle right.
+ASPECIAL = '-_+/=_~'
+ACHAR = r'[\w%s]' % "".join("\\%s"%c for c in ASPECIAL)
+DOTATOM = r'%s+(?:\.%s+)*'%(ACHAR,ACHAR)
+DOMAIN = r'\w+(?:\.\w+)*'
+ADDRSPEC = r'(%s)\@(%s)'%(DOTATOM, DOMAIN)
+
+SPACE_PAT = re.compile(r'\s+')
+ADDRSPEC_PAT = re.compile(ADDRSPEC)
+
+class BadEmail(Exception):
+    def __init__(self, msg, email):
+        Exception.__init__(self, msg)
+        self.email = email
+
+class UnsupportedDomain(BadEmail):
+    pass
+
+def extractAddrSpec(addr):
+    orig_addr = addr
+    addr = SPACE_PAT.sub(' ', addr)
+    addr = addr.strip()
+    # Only works on usual-form addresses; raises BadEmail on weird
+    # address form.  That's okay, since we'll only get those when
+    # people are trying to fool us.
+    if '<' in addr:
+        # Take the _last_ index of <, so that we don't need to bother
+        # with quoting tricks.
+        idx = addr.rindex('<')
+        addr = addr[idx:]
+        m = re.search(r'<([^>]*)>', addr)
+        if m is None:
+            raise BadEmail("Couldn't extract address spec", orig_addr)
+        addr = m.group(1)
+
+    # At this point, addr holds a putative addr-spec.  We only allow the
+    # following form:
+    #   addr-spec = local-part "@" domain
+    #   local-part = dot-atom
+    #   domain = dot-atom
+    #
+    # In particular, we are disallowing: obs-local-part, obs-domain,
+    # comment, obs-FWS,
+    #
+    # Other forms exist, but none of the incoming services we recognize
+    # support them.
+    addr = addr.replace(" ", "")
+    m = ADDRSPEC_PAT.match(addr)
+    if not m:
+        raise BadEmail("Bad address spec format", orig_addr)
+    localpart, domain = m.groups()
+    return localpart, domain
+
+def normalizeEmail(addr, domainmap):
+    addr = addr.lower()
+    localpart, domain = extractAddrSpec(addr)
+    if domainmap is not None:
+        domain = domainmap.get(domain, None)
+        if domain is None:
+            raise UnsupportedDomain("Domain not supported", addr)
+    idx = localpart.find('+')
+    if idx >= 0:
+        localpart = localpart[:idx]
+    return "%s@%s"%(localpart, domain)
+
+class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
+    def __init__(self, key, store, domainmap):
+
+        key1 = bridgedb.Bridges.get_hmac(key, "Map-Addresses-To-Ring")
+        self.emailHmac = bridgedb.Bridges.get_hmac_fn(key1, hex=1)
+
+        key2 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring")
+        self.ring = bridgedb.Bridges.BrigeRing(key2)
         self.store = store
+        self.domainmap = domainmap
 
     def insert(self, bridge):
-        self.ring.insert(bridge):
+        self.ring.insert(bridge)
 
     def getBridgesForEmail(self, emailaddress, epoch, N=1):
         emailaddress = normalizeEmail(emailaddress)
+        if emailAddress is None:
+            return [] #XXXX raise an exception.
         if store.has_key(emailaddress):
             result = []
-            ids = store[emailaddress])
-            for id in Bridges.chopString(ids, Bridges.ID_LEN)
+            ids = store[emailaddress]
+            for id in bridgedb.Bridges.chopString(ids, bridgedb.Bridges.ID_LEN):
                 b = self.ring.getBridgeByID(id)
                 if b != None:
                     result.append(b)
@@ -78,3 +153,15 @@
         self.store[emailaddress] = memo
         return result
 
+if __name__ == '__main__':
+    import sys
+    for line in sys.stdin:
+        line = line.strip()
+        if line.startswith("From: "):
+            line = line[6:]
+        try:
+            normal = normalizeEmail(line, None)
+            print normal
+        except BadEmail, e:
+            print line, e
+        

Modified: bridgedb/trunk/lib/bridgedb/Main.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Main.py	2007-12-12 17:08:54 UTC (rev 12782)
+++ bridgedb/trunk/lib/bridgedb/Main.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -1,12 +1,20 @@
+# BridgeDB by Nick Mathewson.
+# Copyright (c) 2007, The Tor Project, Inc.
+# See LICENSE for licensing informatino
 
 import anydbm
 
-import Bridges
-import Dist
 import os
 import sys
 
-CONFIG = dict(
+import bridgedb.Bridges as Bridges
+import bridgedb.Dist as Dist
+
+class Conf:
+    def __init__(self, **attrs):
+        self.__dict__.update(attrs)
+
+CONFIG = Conf(
     BRIDGE_FILES = [ "./cached-descriptors", "./cached-descriptors.new" ],
     BRIDGE_PURPOSE = "bridge",
     DB_FILE = [ "./bridgedist" ],
@@ -14,10 +22,13 @@
     HTTPS_DIST = True,
     EMAIL_DIST = True,
     N_IP_CLUSTERS = 8,
-    MASTER_KEY_FILE = [ "./secret_key" ]
+    MASTER_KEY_FILE = [ "./secret_key" ],
     HTTPS_SHARE=10,
     EMAIL_SHARE=10,
-    RESERVED_SHARE=2
+    EMAIL_DOMAINS = [ "gmail.com", "yahoo.com" ],
+    EMAIL_DOMAIN_MAP = { "mail.google.com" : "gmail.com",
+                         "googlemail.com" : "gmail.com", },
+    RESERVED_SHARE=2,
   )
 
 def getKey(fname):
@@ -60,9 +71,12 @@
         splitter.addRing(ipDistributor, "https", cfg.HTTPS_SHARE)
 
     if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
+        for d in cfg.EMAIL_DOMAINS:
+            cfg.EMAIL_DOMAIN_MAP[d] = d
         emailDistributor = Dist.emailBasedDistributor(
             Bridges.get_hmac(key, "Email-Dist-Key"),
-            Bridges.PrefixStore(store, "em|"))
+            Bridges.PrefixStore(store, "em|"),
+            cfg.EMAIL_DOMAIN_MAP.copy())
         splitter.addRing(emailDistributor, "email", cfg.EMAIL_SHARE)
 
     if cfg.RESERVED_SHARE:

Added: bridgedb/trunk/lib/bridgedb/Time.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Time.py	                        (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Time.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1,74 @@
+# BridgeDB by Nick Mathewson.
+# Copyright (c) 2007, The Tor Project, Inc.
+# See LICENSE for licensing informatino
+
+import calendar
+import time
+
+KNOWN_INTERVALS = [ "hour", "day", "week", "month" ]
+N_ELEMENTS = { 'month' : 2,
+               'day' : 3,
+               'hour' : 4 }
+
+class IntervalSchedule:
+    def __init__(self, intervaltype, count):
+        it = intervaltype.lower()
+        if it.endswith("s"): it = it[:-1]
+        if it not in KNOWN_INTERVALS:
+            raise TypeError("What's a %s?"%it)
+        assert count > 0
+        if it == 'week':
+            it = 'day'
+            count *= 7
+        self.itype = it
+        self.count = count
+        self.n_elements = N_ELEMENTS[it]
+
+    def _intervalStart(self, when):
+        if self.itype == 'month':
+            tm = time.gmtime(when)
+            n = tm.tm_year * 12 + tm.tm_mon - 1
+            n -= (n % self.count)
+            month = n%12 + 1
+            return calendar.timegm((n//12, month, 1, 0, 0, 0))
+        elif self.itype == 'day':
+            when -= when % (86400 * self.count)
+            return when
+        elif self.itype == 'hour':
+            when -= when % (3600 * self.count)
+            return when
+        else:
+            assert False
+
+    def getInterval(self, when):
+        if self.itype == 'month':
+            tm = time.gmtime(when)
+            n = tm.tm_year * 12 + tm.tm_mon - 1
+            n -= (n % self.count)
+            month = n%12 + 1
+            return "%04d-%02d" % (n // 12, month)
+        elif self.itype == 'day':
+            when = self._intervalStart(when) + 7200 #slop
+            tm = time.gmtime(when)
+            return "%04d-%02d-%02d" % (tm.tm_year, tm.tm_mon, tm.tm_mday)
+        elif self.itype == 'hour':
+            when = self._intervalStart(when) + 120 #slop
+            tm = time.gmtime(when)
+            return "%04d-%02d-%02 %02d" % (tm.tm_year, tm.tm_mon, tm.tm_mday,
+                                           tm.tm_hour)
+        else:
+            assert False
+
+    def nextIntervalStarts(self, when):
+        if self.itype == 'month':
+            tm = time.gmtime(when)
+            n = tm.tm_year * 12 + tm.tm_mon - 1
+            n -= (n % self.count)
+            month = n%12 + 1
+            tm = (n // 12, month+self.count, 1, 0,0,0)
+            return calendar.timegm(tm)
+        elif self.itype == 'day':
+            return self._intervalStart(when) + 86400 * self.coont
+        elif self.itype == 'hour':
+            return self._intervalStart(when) + 3600 * self.coont
+

Added: bridgedb/trunk/lib/bridgedb/__init__.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/__init__.py	                        (rev 0)
+++ bridgedb/trunk/lib/bridgedb/__init__.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1,2 @@
+
+# This file tells Python that this is an honest to goodness package.

Added: bridgedb/trunk/setup.py
===================================================================
--- bridgedb/trunk/setup.py	                        (rev 0)
+++ bridgedb/trunk/setup.py	2007-12-12 17:55:52 UTC (rev 12783)
@@ -0,0 +1,17 @@
+# BridgeDB by Nick Mathewson.
+# Copyright (c) 2007, The Tor Project, Inc.
+# See LICENSE for licensing informatino
+
+import distutils
+from distutils.core import setup
+
+setup(name='BridgeDB',
+      version='0.1',
+      description='Bridge disbursal tool for use with Tor anonymity network',
+      author='Nick Mathewson',
+      author_email='nickm at torproject dot org',
+      url='https://www.torproject.org',
+      package_dir= {'' : 'lib'},
+      packages=['bridgedb'])
+
+



More information about the tor-commits mailing list