[or-cvs] [thandy/master 2/2] More unit tests and documentation for the Thandy code

nickm at torproject.org nickm at torproject.org
Mon Jul 26 15:16:59 UTC 2010


Author: Nick Mathewson <nickm at torproject.org>
Date: Sun, 25 Jul 2010 15:13:04 -0400
Subject: More unit tests and documentation for the Thandy code
Commit: bbb6cb5fe6a61aa0975dcd9df11f9f648f6abba2

---
 lib/thandy/ServerCLI.py   |    6 ++
 lib/thandy/checkJson.py   |   73 ++++++++++++++++++----
 lib/thandy/download.py    |   16 +++--
 lib/thandy/encodeToXML.py |    7 ++
 lib/thandy/formats.py     |  153 ++++++++++++++++++++++++++++++++++++++++-----
 lib/thandy/keys.py        |  136 +++++++++++++++++++++++++++++++++-------
 lib/thandy/master_keys.py |    4 +
 lib/thandy/repository.py  |   14 ++++-
 lib/thandy/tests.py       |    2 +
 lib/thandy/util.py        |    6 ++
 specs/thandy-spec.txt     |    1 -
 11 files changed, 359 insertions(+), 59 deletions(-)

diff --git a/lib/thandy/ServerCLI.py b/lib/thandy/ServerCLI.py
index 9212846..4b6c1a6 100644
--- a/lib/thandy/ServerCLI.py
+++ b/lib/thandy/ServerCLI.py
@@ -15,6 +15,9 @@ def tstamp():
     return time.strftime("%Y%m%d_%H%M%S", time.localtime())
 
 def snarf(fname):
+    """Return a string containing the binary contents of the file named
+       "fname."
+    """
     f = open(fname, 'rb')
     try:
         return f.read()
@@ -22,6 +25,9 @@ def snarf(fname):
         f.close()
 
 def snarfObj(fname):
+    """Return a 2-tuple of (object, len), where the object stored in json
+       format in the file 'fname', and len is that file's length.
+    """
     f = open(fname, 'r')
     try:
         length = os.fstat(f.fileno()).st_size
diff --git a/lib/thandy/checkJson.py b/lib/thandy/checkJson.py
index e63a1fc..ab33d00 100644
--- a/lib/thandy/checkJson.py
+++ b/lib/thandy/checkJson.py
@@ -1,13 +1,18 @@
 # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.
 
+"""This file defines an object oriented pattern matching system used to
+   check decoded xJSON objects.
+"""
+
 import re
 import sys
 
 import thandy
 
 class Schema:
-    """A schema matches a set of possible Python objects, of types
-       that are encodable in JSON."""
+    """A Schema matches a set of possible Python objects, of types
+       that are encodable in JSON.  This is an abstract base type;
+       see implementations below."""
     def matches(self, obj):
         """Return True if 'obj' matches this schema, False if it doesn't."""
         try:
@@ -73,7 +78,7 @@ class RE(Schema):
 
 class Str(Schema):
     """
-       Matches a particular string.
+       Matches a particular string, and no other.
 
        >>> s = Str("Hi")
        >>> s.matches("Hi")
@@ -113,7 +118,7 @@ class AnyStr(Schema):
 
 class OneOf(Schema):
     """
-       Matches an object that matches any one of several schemas.
+       Matches an object that matches any one of several sub-schemas.
 
        >>> s = OneOf([ListOf(Int()), Str("Hello"), Str("bye")])
        >>> s.matches(3)
@@ -139,11 +144,19 @@ class OneOf(Schema):
 
 class AllOf(Schema):
     """Matches the intersection of a list of schemas.
+
+       >>> s = AllOf([RE(r'.*end'), RE(r'begin.*')])
+       >>> s.matches("an end")
+       False
+       >>> s.matches("begin well")
+       False
+       >>> s.matches("begin and end")
+       True
     """
     def __init__(self, required):
         self._subschemas = required[:]
 
-    def checkMatche(self, obj):
+    def checkMatch(self, obj):
         for s in self._subschemas:
             s.checkMatch(obj)
 
@@ -174,6 +187,10 @@ class ListOf(Schema):
        False
     """
     def __init__(self, schema, minCount=0, maxCount=sys.maxint,listName="list"):
+        """Create a new ListOf schema to match anywhere from minCount to
+           maxCount objects conforming to 'schema'.  When generating errors,
+           we will call this type 'listName'.
+        """
         self._schema = schema
         self._minCount = minCount
         self._maxCount = maxCount
@@ -240,6 +257,13 @@ class Struct(Schema):
     """
     def __init__(self, subschemas, optschemas=[], allowMore=False,
                  structName="list"):
+        """Create a new Struct schema to match lists that begin with
+           each item in 'subschemas' in order.  If there are more elements
+           than items in subschemas, additional elements much match
+           the items in optschemas (if any).  If there are more elements
+           than items in subschemas and optschemas put together, then
+           the object is only matched when allowMore is true.
+        """
         self._subschemas = subschemas + optschemas
         self._min = len(subschemas)
         self._allowMore = allowMore
@@ -276,6 +300,8 @@ class DictOf(Schema):
        False
     """
     def __init__(self, keySchema, valSchema):
+        """Return a new DictSchema to match objects all of whose keys match
+           keySchema, and all of whose values match valSchema."""
         self._keySchema = keySchema
         self._valSchema = valSchema
     def checkMatch(self, obj):
@@ -306,8 +332,9 @@ class Opt:
 
 class Obj(Schema):
     """
-       Matches a dict from specified keys to key-specific types.  Unrecognized
-       keys are allowed.
+       Matches a dict from specified keys to key-specific types.  All
+       keys are requied unless explicitly marked with Opt.
+       Unrecognized keys are always allowed.
 
        >>> s = Obj(a=AnyStr(), bc=Struct([Int(), Int()]))
        >>> s.matches({'a':"ZYYY", 'bc':[5,9]})
@@ -324,7 +351,6 @@ class Obj(Schema):
         self._objname = _objname
         self._required = d.items()
 
-
     def checkMatch(self, obj):
         if not isinstance(obj, dict):
             raise thandy.FormatException("Wanted a %s; did not get a dict"%
@@ -372,6 +398,7 @@ class TaggedObj(Schema):
     """
     def __init__(self, tagName, tagIsOptional=False, ignoreUnrecognized=True,
                  **tagvals):
+        #DOCDOC
         self._tagName = tagName
         self._tagOpt = tagIsOptional
         self._ignoreOthers = ignoreUnrecognized
@@ -421,17 +448,26 @@ class Int(Schema):
        >>> Int(lo=10, hi=30).matches(5)
        False
     """
-    def __init__(self, lo=-sys.maxint, hi=sys.maxint):
+    def __init__(self, lo=None, hi=None):
+        """Return a new Int schema to match items between lo and hi inclusive.
+        """
+        if lo is not None and hi is not None:
+            assert lo <= hi
         self._lo = lo
         self._hi = hi
+        plo,phi=lo,hi
+        if plo is None: plo = "..."
+        if phi is None: phi = "..."
+        self._range = "[%s,%s]"%(plo,phi)
     def checkMatch(self, obj):
         if isinstance(obj, bool) or not isinstance(obj, (int, long)):
             # We need to check for bool as a special case, since bool
             # is for historical reasons a subtype of int.
             raise thandy.FormatException("Got %r instead of an integer"%obj)
-        elif not (self._lo <= obj <= self._hi):
-            raise thandy.FormatException("%r not in range [%r,%r]"
-                                         %(obj, self._lo, self._hi))
+        elif (self._lo is not None and self._lo > obj) or (
+            self._hi is not None and self._hi < obj):
+            raise thandy.FormatException("%r not in range %s"
+                                         %(obj, self._range))
 
 class Bool(Schema):
     """
@@ -450,7 +486,20 @@ class Bool(Schema):
             raise thandy.FormatException("Got %r instead of a boolean"%obj)
 
 class Func(Schema):
+    """
+       Matches an object based on the value of some boolen function
+
+       >>> even = lambda x: (x%2)==0
+       >>> s = Func(even, baseSchema=Int())
+       >>> s.matches(99)
+       False
+       >>> s.matches(98)
+       True
+       >>> s.matches("ninety-eight")
+       False
+    """
     def __init__(self, fn, baseSchema=None):
+        #DOCDOC
         self._fn = fn
         self._base = baseSchema
     def checkMatch(self, obj):
diff --git a/lib/thandy/download.py b/lib/thandy/download.py
index 8e774ec..52c6fe4 100644
--- a/lib/thandy/download.py
+++ b/lib/thandy/download.py
@@ -31,7 +31,7 @@ class DownloadManager:
         # Work queue of DownloadJobs that we intend to process once a thread
         # is free.
         self.downloadQueue = Queue.Queue()
-        # DOCDOC
+        # Work queue of functions that need to be run in the main thread.
         self.resultQueue = Queue.Queue()
 
         # List of worker threads.
@@ -43,10 +43,10 @@ class DownloadManager:
         for t in self.threads:
             t.setDaemon(True)
 
-        # DOCDOC
+        # Used to remember the status of downloads to avoid too much retrying
         self.statusLog = DownloadStatusLog()
 
-        #DOCDOC
+        # Used to tell the main thread to raise an exception.
         self._raiseMe = None
 
     def start(self):
@@ -85,13 +85,17 @@ class DownloadManager:
     def wait(self):
         """Pause until we have no active or pending jobs."""
         while not self.finished():
+            # Wait till somebody tells us to do something.
             self.done.acquire()
             self.done.wait()
             self.done.release()
 
+            # Did something go wrong?
             if self._raiseMe:
                 raise self._raiseMe
 
+            # Suck functions out of resultQueue and run them until
+            # resultQueue is empty.
             try:
                 while True:
                     item = self.resultQueue.get(block=False)
@@ -130,7 +134,7 @@ class DownloadManager:
     def _thread(self, idx):
         # Run in the background per thread.  idx is the number of the thread.
         while True:
-            job = self.downloadQueue.get() # Grab job from queue.
+            job = self.downloadQueue.get() # Grab a job from queue.
             rp = job.getRelativePath()
             success = False
             try:
@@ -215,7 +219,6 @@ _STATUS_LOG_SCHEMA = S.Obj(
     networkFailures=_FAIL_SCHEMA)
 del S
 
-
 class DownloadStatusLog:
     """Tracks when we can retry downloading from various mirrors.
 
@@ -708,7 +711,8 @@ def getConnection(url, useTor, have_length=None):
 
 
 if __name__ == '__main__':
-    # Trivial CLI to test out downloading.
+    # Trivial CLI to test out downloading: fetches files with urrlib2
+    # as specified on the command line.  Launches them all in parallel.
 
     import getopt
     options, args = getopt.getopt(sys.argv[1:], "",
diff --git a/lib/thandy/encodeToXML.py b/lib/thandy/encodeToXML.py
index c563d77..b2fe351 100644
--- a/lib/thandy/encodeToXML.py
+++ b/lib/thandy/encodeToXML.py
@@ -1,5 +1,12 @@
 # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.
 
+"""This module converts JSon to XML.  It is allegedly for people who have
+   libraries that can read XML, but nothing that can read JSon.
+
+   Nick questions whether it's actually useful.  Certainly, it's quite
+   unlikely that you'll be able to check any keys once you've converted it.
+"""
+
 import re
 import thandy
 
diff --git a/lib/thandy/formats.py b/lib/thandy/formats.py
index d6f646a..a59f696 100644
--- a/lib/thandy/formats.py
+++ b/lib/thandy/formats.py
@@ -15,9 +15,16 @@ import Crypto.Hash.SHA256
 
 class KeyDB:
     """A KeyDB holds public keys, indexed by their key IDs."""
+    ## Fields:
+    #   _keys: a map from keyid to public key.
     def __init__(self):
+        """Create a new empty KeyDB."""
         self._keys = {}
     def addKey(self, k):
+        """Insert a thandy.keys.PublicKey object, 'k', into this KeyDB.  If
+           we already had this key, retain the old one, but add any roles in
+           the new key 'k'.
+        """
         keyid = k.getKeyID()
         try:
             oldkey = self._keys[keyid]
@@ -28,8 +35,12 @@ class KeyDB:
             pass
         self._keys[k.getKeyID()] = k
     def getKey(self, keyid):
+        """Return the key whose key ID is 'keyid'.  If there is no such key,
+           raise KeyError."""
         return self._keys[keyid]
     def getKeysByRole(self, role, path):
+        """Return a list of all keys that have the role 'role' set for files
+           in 'path'."""
         results = []
         for key in self._keys.itervalues():
             for r,p in key.getRoles():
@@ -39,14 +50,17 @@ class KeyDB:
         return results
 
     def getKeysFuzzy(self, keyid):
+        """Return a list of all keys whose key IDs begin with 'keyid'."""
         r = []
         for k,v in self._keys.iteritems():
             if k.startswith(keyid):
                 r.append(v)
         return r
     def iterkeys(self):
+        """Return a new iterator of all the keys in this KeyDB."""
         return self._keys.itervalues()
 
+# Internal cache that maps role paths to regex objects that parse them.
 _rolePathCache = {}
 def rolePathMatches(rolePath, path):
     """Return true iff the relative path in the filesystem 'path' conforms
@@ -109,7 +123,12 @@ class SignatureStatus:
 
 def checkSignatures(signed, keyDB, role=None, path=None):
     """Given an object conformant to SIGNED_SCHEMA and a set of public keys
-       in keyDB, verify the signed object in 'signed'."""
+       in keyDB, verify the signed object is signed.  If 'role' and 'path'
+       are provided, verify that the signing key has the correct role to
+       sign this document as stored in 'path'.
+
+       Returns a SignatureStatus.
+    """
 
     SIGNED_SCHEMA.checkMatch(signed)
 
@@ -156,7 +175,10 @@ def checkSignatures(signed, keyDB, role=None, path=None):
 
     return SignatureStatus(goodSigs, badSigs, unknownSigs, tangentialSigs)
 
-def canonical_str_encoder(s):
+def _canonical_str_encoder(s):
+    """Helper for encodeCanonical: encodes a string as the byte sequence
+       expected for canonical JSON format.
+    """
     s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s)
     if isinstance(s, unicode):
         return s.encode("utf-8")
@@ -168,7 +190,7 @@ def _encodeCanonical(obj, outf):
     # even let us replace the separators.
 
     if isinstance(obj, basestring):
-        outf(canonical_str_encoder(obj))
+        outf(_canonical_str_encoder(obj))
     elif obj is True:
         outf("true")
     elif obj is False:
@@ -191,12 +213,12 @@ def _encodeCanonical(obj, outf):
             items = obj.items()
             items.sort()
             for k,v in items[:-1]:
-                outf(canonical_str_encoder(k))
+                outf(_canonical_str_encoder(k))
                 outf(":")
                 _encodeCanonical(v, outf)
                 outf(",")
             k, v = items[-1]
-            outf(canonical_str_encoder(k))
+            outf(_canonical_str_encoder(k))
             outf(":")
             _encodeCanonical(v, outf)
         outf("}")
@@ -206,11 +228,11 @@ def _encodeCanonical(obj, outf):
 def encodeCanonical(obj, outf=None):
     """Encode the object obj in canoncial JSon form, as specified at
        http://wiki.laptop.org/go/Canonical_JSON .  It's a restricted
-       dialect of json in which keys are always lexically sorted,
+       dialect of JSON in which keys are always lexically sorted,
        there is no whitespace, floats aren't allowed, and only quote
-       and backslash get escaped.  The result is encoded in UTF-8,
-       and the resulting bits are passed to outf (if provided), or joined
-       into a string and returned.
+       and backslash get escaped.  The result is encoded in UTF-8, and
+       the resulting bytes are passed to outf (if provided) in several
+       calls, or joined into a string and returned.
 
        >>> encodeCanonical("")
        '""'
@@ -222,6 +244,14 @@ def encodeCanonical(obj, outf=None):
        '{"A":[99]}'
        >>> encodeCanonical({"x" : 3, "y" : 2})
        '{"x":3,"y":2}'
+       >>> total = 0
+       >>> def increment(s):
+       ...   global total
+       ...   total += len(s)
+       ...
+       >>> encodeCanonical({"x" : 3, "y" : 2, 'z' : [99,3]}, outf=increment)
+       >>> total
+       24
     """
 
     result = None
@@ -236,10 +266,9 @@ def encodeCanonical(obj, outf=None):
 
 def getDigest(obj, digestObj=None):
     """Update 'digestObj' (typically a SHA256 object) with the digest of
-       the canonical json encoding of obj.  If digestObj is none,
-       compute the SHA256 hash and return it.
-
-       DOCDOC string equivalence.
+       obj, first encoding it in canonical form if it's a JSON object,
+       and taking its UTF-8 encoding if it's in unicode.  If digestObj
+       is none, just compute and return the SHA256 hash.
     """
     useTempDigestObj = (digestObj == None)
     if useTempDigestObj:
@@ -291,6 +320,9 @@ def getFileDigest(f, digestObj=None):
         return digestObj.digest()
 
 def makeSignable(obj):
+    """Return a new JSON object of type 'signed' wrapping 'obj', and containing
+       no signatures.
+    """
     return { 'signed' : obj, 'signatures' : [] }
 
 def sign(signed, key):
@@ -349,13 +381,23 @@ def parseBase64(s):
         raise thandy.FormatException("Invalid base64 encoding")
 
 def parseHash(s):
+    """Parse a base64-encoded digest.
+
+       (This is just like paseBase64, but it checks the size.)
+    """
     h = parseBase64(s)
     if len(h) != Crypto.Hash.SHA256.digest_size:
         raise thandy.FormatException("Bad hash length")
     return h
 
+# Abbreviate the thandy.checkJson module here, since we're going to be
+# using all of its members a lot here.
 S = thandy.checkJson
 
+#########
+## These schemas describe, in OO constraint-checking form, all the Thandy
+## data formats.
+
 # A date, in YYYY-MM-DD HH:MM:SS format.
 TIME_SCHEMA = S.RE(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
 # A hash, base64-encoded
@@ -370,6 +412,15 @@ RSAKEY_SCHEMA = S.Obj(
     _keytype=S.Str("rsa"),
     e=BASE64_SCHEMA,
     n=BASE64_SCHEMA)
+# An RSA key with private-key informartion: subtype of RSAKEY_SCHEMA.
+RSAKEY_PRIVATE_SCHEMA = S.Obj(
+    _keytype=S.Str("rsa"),
+    e=BASE64_SCHEMA,
+    n=BASE64_SCHEMA,
+    d=BASE64_SCHEMA,
+    p=BASE64_SCHEMA,
+    q=BASE64_SCHEMA,
+    u=BASE64_SCHEMA)
 # Any public key.
 PUBKEY_SCHEMA = S.Obj(
     _keytype=S.AnyStr())
@@ -393,6 +444,7 @@ SIGNED_SCHEMA = S.Obj(
     signed=S.Any(),
     signatures=S.ListOf(SIGNATURE_SCHEMA))
 
+# The name of a role
 ROLENAME_SCHEMA = S.AnyStr()
 
 # A role: indicates that a key is allowed to certify a kind of
@@ -459,6 +511,7 @@ def checkWinRegistryKeyname(keyname):
     elif not key or not value:
         raise thandy.FormatException("Bad registry entry.")
 
+# A string holding the name of a windows registry key
 REGISTRY_KEY_SCHEMA = S.Func(checkWinRegistryKeyname)
 
 CHECK_ITEM_SCHEMA = S.TaggedObj(
@@ -516,6 +569,10 @@ PACKAGE_SCHEMA = S.Func(checkPackageFormatConsistency, PACKAGE_SCHEMA)
 ALL_ROLES = ('timestamp', 'mirrors', 'bundle', 'package', 'master')
 
 class Keylist(KeyDB):
+    """A list of keys, as extracted from a Thandy keys.txt JSon file.
+
+       This class extends KeyDB, so you can acces keys more easily.
+    """
     def __init__(self):
         KeyDB.__init__(self)
 
@@ -542,6 +599,16 @@ class Keylist(KeyDB):
             self.addKey(key)
 
 class StampedInfo:
+    """This class holds a single entry in a timestamp file.  Each
+       StampedInfo says when a file was last modified, and what its
+       hash was.  It may also provide useful info about where to find it,
+       its version, its length, and so on.
+    """
+    ## _ts -- the time when the file was last modified
+    ## _hash -- the hash of the most recent version of the file
+    ## _version -- version of the most recent file. May be None
+    ## _relpath -- where to find this file in the repository
+    ## _length -- the length of the file
     def __init__(self, ts, hash, version=None, relpath=None, length=None):
         self._ts = ts
         self._hash = hash
@@ -565,6 +632,11 @@ class StampedInfo:
         return self._length
 
 class TimestampFile:
+    """This class holds all the fields parsed from a thandy timestamp file."""
+    ## _time -- the time when this file was generated
+    ## _mirrorListInfo -- a StampedInfo for the keylist.
+    ## _keyListInfo -- a StampedInfo for the mirrorlist
+    ## _bundleInfo -- map from bundle name to StampedInfo
     def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo):
         self._time = at
         self._mirrorListInfo = mirrorlistinfo
@@ -575,6 +647,9 @@ class TimestampFile:
     def fromJSon(obj):
         # must be validated.
         at = parseTime(obj['at'])
+        # We slice these lists because we want to support old thandys
+        # that didn't include the length on these, and new ones that
+        # might include more fields
         m = StampedInfo.fromJSonFields(*obj['m'][:3])
         k = StampedInfo.fromJSonFields(*obj['k'][:3])
         b = {}
@@ -606,6 +681,13 @@ class TimestampFile:
         return self._bundleInfo
 
 def readConfigFile(fname, needKeys=(), optKeys=(), preload={}):
+    """Read a configuration file from 'fname'.  A configuration file is a
+       python script that runs in a temporary namespace prepopulated
+       with the contents of 'reload'.  It is a thandy.FormatException
+       if the file finishes executation without setting every variable
+       listed in 'needKeys'.  These settings, plus any variables whose names
+       are listed in 'optKeys', are returned in a new dict.
+    """
     parsed = preload.copy()
     result = {}
     execfile(fname, parsed)
@@ -625,6 +707,10 @@ def readConfigFile(fname, needKeys=(), optKeys=(), preload={}):
     return result
 
 def makePackageObj(config_fname, package_fname):
+    """Given a description of a thandy package in config_fname, and the
+       name of the one file (only one is supported now!) in package_fname,
+       return a new unsigned package object.
+    """
     preload = {}
     shortDescs = {}
     longDescs = {}
@@ -702,6 +788,12 @@ def makePackageObj(config_fname, package_fname):
     return result
 
 def makeBundleObj(config_fname, getPackage, getPackageLength):
+    """Given a description of a thandy  bundle in config_fname,
+       return a new unsigned bundle object.  getPackage must be a function
+       returning a package object for every package the bundle requires
+       when given the package's name as input.  getPacakgeLength
+       must be a function returning the length of the package file.
+    """
     packages = []
     def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val
     def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val
@@ -749,20 +841,39 @@ def makeBundleObj(config_fname, getPackage, getPackageLength):
     return result
 
 def versionIsNewer(v1, v2):
+    """Return true if version v1 is newer than v2.  Both versions are
+       given as lists of version components.
+       >>> versionIsNewer([1,2,3], [1,2,3,4])
+       False
+       >>> versionIsNewer([1,2,3,5], [1,2,3,4])
+       True
+       >>> versionIsNewer([1,3,3,5], [1,2,3,5])
+       True
+    """
     return v1 > v2
 
 def getBundleKey(bundlePath):
     """
+       Return all parts of a bundle's "key" as used in a timestamp file,
+       given its full filename.
+
        >>> getBundleKey("/bundleinfo/tor-browser/win32/some-file-name.txt")
        '/bundleinfo/tor-browser/win32/'
     """
-    # No, we can't use "os.path.directory."  That isn't os-independent.
+    # No, we can't use "os.path.directory" or "os.path.split".  Those are
+    # OD-dependent, and all of our paths are in Unix format.
     idx = bundlePath.rindex("/")
     return bundlePath[:idx+1]
 
 def makeTimestampObj(mirrorlist_obj, mirrorlist_len,
                      keylist_obj, keylist_len,
                      bundle_objs):
+    """Return a new unsigned timestamp object for a given set of inputs,
+       where mirrorlist_obj and mirrorlist_len are a (signed, unencoded)
+       mirror list, and its length on disk; keylist_obj and keylist_len
+       are the same for the key list, and bundle_objs is a list of
+       (object, length) tuples for all the bundles.
+    """
     result = { '_type' : 'Timestamp',
                'at' : formatTime(time.time()) }
     result['m'] = [ mirrorlist_obj['ts'],
@@ -784,6 +895,8 @@ def makeTimestampObj(mirrorlist_obj, mirrorlist_len,
     return result
 
 class MirrorInfo:
+    """A MirrorInfo holds the parsed value of a thandy mirror list's entry
+       for a single mirror."""
     def __init__(self, name, urlbase, contents, weight):
         self._name = name
         self._urlbase = urlbase
@@ -809,6 +922,9 @@ class MirrorInfo:
                  'weight' : self._weight }
 
 def makeMirrorListObj(mirror_fname):
+    """Return a new unsigned mirrorlist object for the mirrors described in
+       'mirror_fname'.
+    """
     mirrors = []
     def Mirror(*a, **kw): mirrors.append(MirrorInfo(*a, **kw))
     preload = {'Mirror' : Mirror}
@@ -821,6 +937,9 @@ def makeMirrorListObj(mirror_fname):
     return result
 
 def makeKeylistObj(keylist_fname, includePrivate=False):
+    """Return a new unsigned keylist object for the keys described in
+       'mirror_fname'.
+    """
     keys = []
     def Key(obj): keys.append(obj)
     preload = {'Key': Key}
@@ -851,7 +970,11 @@ SCHEMAS_BY_TYPE = {
     }
 
 def checkSignedObj(obj, keydb=None):
-    # Returns signaturestatus, role, path on sucess.
+    """Given a signed object, check whether it is well-formed and correctly
+       signed with some key in keydb having the appropriate role.  On
+       success, returns a SignatureStatus, the rule used to sign it,
+       and the object's path in the repository.
+    """
 
     SIGNED_SCHEMA.checkMatch(obj)
     try:
diff --git a/lib/thandy/keys.py b/lib/thandy/keys.py
index 3385ddb..9eb98d0 100644
--- a/lib/thandy/keys.py
+++ b/lib/thandy/keys.py
@@ -1,6 +1,12 @@
 # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.
 
-# These require PyCrypto.
+"""thandy.keys --
+
+   This module defines functionality for public keys, and for a simple
+   encrypted keystore.
+"""
+
+# These imports require PyCrypto.
 import Crypto.PublicKey.RSA
 import Crypto.Hash.SHA256
 import Crypto.Cipher.AES
@@ -18,28 +24,46 @@ import thandy.util
 json = thandy.util.importJSON()
 
 class PublicKey:
-    """Abstract base class for public keys."""
+    """An abstract base class for public keys.  A public key object
+       always implements some kind of public key, and may also contain
+       the corresponding private key data."""
+    ## Fields:
+    # _roles: a list of (rolename, path) tuples that indicates which
+    #     roles we consider this public key to have.
     def __init__(self):
+        """Constructor: Initialize a public key."""
         self._roles = []
     def format(self):
+        """Return this public key converted into a JSon object"""
         raise NotImplemented()
     def sign(self, obj=None, digest=None):
         """Sign either a JSon object provided in 'obj', or a digest provided
            in 'digest'.  Return a list of (method name, base64-encoded
-           signature) tuple.
+           signature) tuples.
 
            Requires that this is a private key."""
         raise NotImplemented()
-    def checkSignature(self, method, data, signature):
+    def checkSignature(self, method, signature, obj=None, digest=None):
+        """Check the base64-encoded signature in 'signature', which was
+           generating using the method with the name 'method', to see
+           if it is a correct signature made with this key for either
+           a JSon object provided in 'obj' or a digest provided in 'digest'.
+
+           Returns True if the signature is value; False if it's invalid, and
+           UnknownMethod if we don't recognize 'method'.
+        """
         # returns True, False, or raises UnknownMethod.
         raise NotImplemented()
     def getKeyID(self):
+        """Return a base-64-encoded key ID for this key.  No two distinct
+           keys may share the same key ID.
+        """
         raise NotImplemented()
     def getRoles(self):
         """Return a list of all roles supported by this key.  A role is
            a doctype,pathPattern tuple.
         """
-        return self._roles
+        return self._roles[:]
     def addRole(self, role, path):
         """Add a role to the list of roles supported by this key.
            A role is a permission to sign a given kind of document
@@ -62,8 +86,14 @@ class PublicKey:
         return False
 
 if hex(1L).upper() == "0X1L":
+    # It looks like integers and longs aren't unified in this version
+    # of Python: converting a long to a hex string means there's a trailing
+    # 'L' we need to remove.
     def intToBinary(number):
         """Convert an int or long into a big-endian series of bytes.
+
+           >>> intToBinary(92807287956601L)
+           'Thandy'
         """
         # This "convert-to-hex, then use binascii" approach may look silly,
         # but it's over 10x faster than the Crypto.Util.number approach.
@@ -73,6 +103,8 @@ if hex(1L).upper() == "0X1L":
             h = "0"+h
         return binascii.a2b_hex(h)
 elif hex(1L).upper() == "0X1":
+    # It looks like integers and longs _are_ unified.  Maybe this is Python 3?
+    # In any case, we don't need to remove the trailing L.
     def intToBinary(number):
         "Variant for future versions of pythons that don't append 'L'."
         h = hex(long(number))
@@ -87,18 +119,31 @@ else:
 
 def binaryToInt(binary):
    """Convert a big-endian series of bytes into a long.
+
+      >>> binaryToInt('Hi')
+      18537L
    """
    return long(binascii.b2a_hex(binary), 16)
 
 def intToBase64(number):
-    """Convert an int or long to a big-endian base64-encoded value."""
+    """Convert an int or long to a big-endian base64-encoded value.
+
+        >>> intToBase64(0x4e16a777)
+        'Thandw'
+    """
     return thandy.formats.formatBase64(intToBinary(number))
 
 def base64ToInt(number):
-    """Convert a big-endian base64-encoded value to a long."""
+    """Convert a big-endian base64-encoded value to a long.
+
+        >>> base64ToInt('Thandy')
+        1310107511L
+    """
     return binaryToInt(thandy.formats.parseBase64(number))
 
 def _pkcs1_padding(m, size):
+    """Add PKCS padding to the message 'm', so that it's appropriate for
+       use with a public key of 'size' bytes."""
     # I'd rather use OAEP+, but apparently PyCrypto barely supports
     # signature verification, and doesn't seem to support signature
     # verification with nondeterministic padding.  "argh."
@@ -108,13 +153,16 @@ def _pkcs1_padding(m, size):
     return r
 
 def _xor(a,b):
-    if a:
-        return not b
-    else:
-        return b
+    """Return true iff exactly one of and b are true.  Used to check
+       some conditions."""
+    return bool(a) ^ bool(b)
 
 class RSAKey(PublicKey):
     """
+    An RSAKey is an implementation of the abstract class 'PublicKey' that
+    can sign documents and check signatures using RSA keys of arbitrary
+    size.
+
     >>> k = RSAKey.generate(bits=512)
     >>> obj = k.format()
     >>> obj['_keytype']
@@ -136,27 +184,33 @@ class RSAKey(PublicKey):
     >>> k.checkSignature(method, sig, obj=s2)
     False
     """
+    ## Fields: (beyond those inherited from PublicKey)
+    # key -- a PyCrypto RSA key, public or private.  See Crypto.PublicKey.RSA.
+    # keyid -- the base64 key ID for this key, or 'None' if we haven't
+    #   generated one yet.
     def __init__(self, key):
+        """Constructure: Initialize a new RSAKey from a PyCrypto RSA key."""
         PublicKey.__init__(self)
         self.key = key
         self.keyid = None
 
     @staticmethod
     def generate(bits=2048):
-        """Generate a new RSA key, with modulus length 'bits'."""
+        """Generate and return a new RSA key, with modulus length 'bits'."""
         key = Crypto.PublicKey.RSA.generate(bits=bits, randfunc=os.urandom)
         return RSAKey(key)
 
     @staticmethod
     def fromJSon(obj):
-        """Construct an RSA key from the output of the format() method.
+        """Construct and return a RSAKey from the JSon object output of the
+           RSAKey.format() method.  May raise thandy.FormatException.
         """
         # obj must match RSAKEY_SCHEMA
 
         thandy.formats.RSAKEY_SCHEMA.checkMatch(obj)
         n = base64ToInt(obj['n'])
         e = base64ToInt(obj['e'])
-        if obj.has_key('d'):
+        if thandy.formats.RSAKEY_PRIVATE_SCHEMA.matches(obj):
             d = base64ToInt(obj['d'])
             p = base64ToInt(obj['p'])
             q = base64ToInt(obj['q'])
@@ -173,11 +227,11 @@ class RSAKey(PublicKey):
         return result
 
     def isPrivateKey(self):
-        """Return true iff this key has private-key components"""
+        """Return True iff this key has private-key components"""
         return hasattr(self.key, 'd')
 
     def format(self, private=False, includeRoles=False):
-        """Return a new object to represent this key in json format.
+        """Return a new json object to represent this key in json format.
            If 'private', include private-key data.  If 'includeRoles',
            include role information.
         """
@@ -205,6 +259,7 @@ class RSAKey(PublicKey):
         return self.keyid
 
     def sign(self, obj=None, digest=None):
+        # See PublicKey.sign for documentation
         assert _xor(obj == None, digest == None)
         method = "sha256-pkcs1"
         if digest == None:
@@ -214,6 +269,7 @@ class RSAKey(PublicKey):
         return [ (method, sig) ]
 
     def checkSignature(self, method, sig, obj=None, digest=None):
+        # See PublicKey.checkSignature for documentation
         assert _xor(obj == None, digest == None)
         if method != "sha256-pkcs1":
             raise thandy.UnknownMethod(method)
@@ -223,6 +279,7 @@ class RSAKey(PublicKey):
         m = _pkcs1_padding(digest, (self.key.size()+1) // 8)
         return bool(self.key.verify(m, (sig,)))
 
+# Length of salt to pass to secretToKey
 SALTLEN=16
 
 def secretToKey(salt, secret):
@@ -233,14 +290,15 @@ def secretToKey(salt, secret):
 
        (The goal is to make offline password-guessing attacks harder by
        increasing the time required to convert a password to a key, and to
-       make precomputed password tables impossible to generate by )
+       make precomputed password tables impossible to generate by using
+       a really huge salt.)
     """
     assert len(salt) == SALTLEN+1
 
     # The algorithm is basically, 'call the last byte of the salt the
     # "difficulty", and all other bytes of the salt S.  Now make
     # an infinite stream of S|secret|S|secret|..., and hash the
-    # first N bytes of that, where N is determined by the difficulty.
+    # first N bytes of that, where N is determined by the difficulty.'
     #
     # Obviously, this wants a hash algorithm that's tricky to
     # parallelize.
@@ -270,8 +328,10 @@ def secretToKey(salt, secret):
     return d.digest()
 
 def encryptSecret(secret, password, difficulty=0x80):
-    """Encrypt the secret 'secret' using the password 'password',
-       and return the encrypted result."""
+    """Encrypt the secret 'secret' using the password 'password', and
+       return the encrypted result.  The 'difficulty' parameter is a
+       one-byte value determining how hard to make the password-to-key
+       derivation"""
     # The encrypted format is:
     #    "GKEY1"  -- 5 octets, fixed, denotes data format.
     #    SALT     -- 17 bytes, used to hash password
@@ -354,25 +414,44 @@ def decryptSecret(encrypted, password):
     return secret
 
 class KeyStore(thandy.formats.KeyDB):
-    """Helper to store private keys in an encrypted file."""
+    """Helper class used to store private keys in a (usually) encrypted file.
+
+       It implements thandy.formats.KeyDB, so you can add keys to it
+       and get keys from it in a useful indexed way.
+    """
+    ## Fields:
+    # _loaded -- boolean: Have we loaded the keys from disk yet?
+    # _fname -- The filename that we use to store the keys.
+    # _passwd -- The password used to encrypt the keystore, or None if we
+    #    haven't asked for it yet.
+    # _encrypted -- boolean: Should we treat this as an encrypted keystore?
+    #
+    # File format:
+    #   A JSon object containing a single field, "keys", which in turn
+    #   contains a list of json-encoded keys.  This object is stored encrypted
+    #   with encryptSecret.
     def __init__(self, fname, encrypted=True):
         thandy.formats.KeyDB.__init__(self)
 
-        self._loaded = None
+        self._loaded = False
         self._fname = fname
         self._passwd = None
         self._encrypted = encrypted
 
     def getpass(self, reprompt=False):
+        """If we have already asked for the passphrase, return it.
+           If 'reprompt' is true, ask for the password twice,  to make sure
+           the user didn't mistype.  Otherwise, only ask once.
+        """
         if self._passwd != None:
             return self._passwd
         while 1:
-            sys.stderr.write("Password: ")
+            sys.stderr.write("Passphrase: ")
             pwd = getpass.getpass("")
             if not reprompt:
                 return pwd
 
-            sys.stderr.write("Confirm: ")
+            sys.stderr.write("   Confirm: ")
             pwd2 = getpass.getpass("")
             if pwd == pwd2:
                 return pwd
@@ -380,6 +459,10 @@ class KeyStore(thandy.formats.KeyDB):
                 print "Mismatch; try again."
 
     def load(self, password=None):
+        """Load the keyring into memory, decrypting as needed.
+
+           May raise various exceptions on failure.
+        """
         logging.info("Loading private keys from %r...", self._fname)
         if not os.path.exists(self._fname):
             logging.info("...no such file.")
@@ -405,12 +488,17 @@ class KeyStore(thandy.formats.KeyDB):
         self._loaded = True
 
     def setPassword(self, passwd):
+        """Set the cached password to 'passwd'."""
         self._passwd = passwd
 
     def clearPassword(self):
+        """Clear the cached password."""
         self._passwd = None
 
     def save(self, password=None):
+        """Save the keyring to disk.  Note that you must call this method,
+           or changes will not be persistent.
+        """
         if not self._loaded and self._encrypted:
             self.load(password)
 
diff --git a/lib/thandy/master_keys.py b/lib/thandy/master_keys.py
index 4d014a7..4d0bdac 100644
--- a/lib/thandy/master_keys.py
+++ b/lib/thandy/master_keys.py
@@ -1,5 +1,9 @@
 # Copyright 2008 The Tor Project, Inc.  See LICENSE for licensing information.
 
+""" This file is where a master keylist (and a default initial mirror list)
+    can currently ship with the Thandy code.
+"""
+
 MASTER_KEYS = [
     {
         "_keytype": "rsa",
diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py
index f5a4de2..bc4d73b 100644
--- a/lib/thandy/repository.py
+++ b/lib/thandy/repository.py
@@ -52,10 +52,13 @@ class RepositoryFile:
         self._mtime = None
 
     def clear(self):
-        """DOCDOC"""
+        """Clear all cached fields loaded from disk; the next time we need one,
+           we'll reload the file entirely.
+        """
         self._main_obj = self._signed_obj = None
         self._sigStatus = None
         self._mtime = None
+        self._length = None
 
     def getRelativePath(self):
         """Return the filename for this item relative to the top of the
@@ -118,6 +121,7 @@ class RepositoryFile:
         return signed_obj, main_obj
 
     def checkFile(self, fname, needhash=None):
+        """DOCDOC"""
         f = open(fname, 'r')
         try:
             s = f.read()
@@ -167,7 +171,9 @@ class RepositoryFile:
         return self._sigStatus
 
 class PkgFile:
+    """DOCDOC"""
     def __init__(self, repository, relativePath, needHash):
+        """DOCDOC"""
         self._repository = repository
         self._relativePath = relativePath
         self._needHash = needHash
@@ -175,22 +181,28 @@ class PkgFile:
         self._mtime = None
 
     def clear(self):
+        """DOCDOC"""
         self._mtime = None
 
     def load(self):
+        """DOCDOC"""
         pass
 
     def getRelativePath(self):
+        """DOCDOC"""
         return self._relativePath
 
     def getPath(self):
+        """DOCDOC"""
         fname = self._repository.getFilename(self._relativePath)
         return os.path.normpath(fname)
 
     def getExpectedHash(self):
+        """DOCDOC"""
         return self._needHash
 
     def checkFile(self, fname, needHash=None):
+        """DOCDOC"""
         if needHash:
             if thandy.formats.getFileDigest(fname) != needHash:
                 raise thandy.FormatException("Digest for %s not as expected.")
diff --git a/lib/thandy/tests.py b/lib/thandy/tests.py
index a46258d..e55c07d 100644
--- a/lib/thandy/tests.py
+++ b/lib/thandy/tests.py
@@ -68,6 +68,7 @@ class CryptoTests(unittest.TestCase):
         ks = thandy.keys.KeyStore(fname)
         key1 = thandy.keys.RSAKey.generate(512)
         key2 = thandy.keys.RSAKey.generate(512)
+        key1.addRole('master', '**')
         ks.addKey(key1)
         ks.addKey(key2)
         ks.save(passwd)
@@ -75,6 +76,7 @@ class CryptoTests(unittest.TestCase):
         ks2 = thandy.keys.KeyStore(fname)
         ks2.load(passwd)
         self.assertEquals(key1.key.n, ks2.getKey(key1.getKeyID()).key.n)
+        self.assertEquals(key1.getRoles(), [("master", "**")])
 
 class UtilTests(unittest.TestCase):
     def setUp(self):
diff --git a/lib/thandy/util.py b/lib/thandy/util.py
index 90f251f..29c7b87 100644
--- a/lib/thandy/util.py
+++ b/lib/thandy/util.py
@@ -19,6 +19,11 @@ import thandy.master_keys
 _jsonModule = None
 
 def importJSON():
+    """Load and return the json module.
+
+       When everybody has Python 2.6 or later, we can just replace this with
+       'import json; return json'
+    """
     global _jsonModule
     if _jsonModule is not None:
         return _jsonModule
@@ -200,6 +205,7 @@ def getRegistryValue(keyname):
 _controlLog = logging.getLogger("thandy-ctrl")
 
 def formatLogString(s):
+    """DOCDOC"""
     s = '"%s"' % re.sub(r'(["\\])', r'\\\1', s)
     s = s.replace("\n", "\\n")
     return s
diff --git a/specs/thandy-spec.txt b/specs/thandy-spec.txt
index eae908b..0db9df1 100644
--- a/specs/thandy-spec.txt
+++ b/specs/thandy-spec.txt
@@ -216,7 +216,6 @@
     /meta/keys.txt
 
          Signed by the root keys; indicates keys and roles.
-         [???? I'm using the txt extension here.  Is that smart?]
 
     /meta/mirrors.txt
 
-- 
1.7.1



More information about the tor-commits mailing list