[or-cvs] r16692: {} Initial auto-updater commit: s-expression libray and format (in updater: . trunk trunk/lib trunk/lib/sexp trunk/specs)

nickm at seul.org nickm at seul.org
Sat Aug 30 16:33:47 UTC 2008


Author: nickm
Date: 2008-08-30 12:33:47 -0400 (Sat, 30 Aug 2008)
New Revision: 16692

Added:
   updater/branches/
   updater/tags/
   updater/trunk/
   updater/trunk/glider/
   updater/trunk/lib/
   updater/trunk/lib/sexp/
   updater/trunk/lib/sexp/__init__.py
   updater/trunk/lib/sexp/access.py
   updater/trunk/lib/sexp/encode.py
   updater/trunk/lib/sexp/parse.py
   updater/trunk/lib/sexp/tests.py
   updater/trunk/specs/
   updater/trunk/specs/U1-automatic-updates.txt
   updater/trunk/specs/U2-formats.txt
Log:
Initial auto-updater commit: s-expression libray and format spec.

Added: updater/trunk/lib/sexp/__init__.py
===================================================================
--- updater/trunk/lib/sexp/__init__.py	                        (rev 0)
+++ updater/trunk/lib/sexp/__init__.py	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,2 @@
+
+__all__ = [ 'parse', 'encode', 'access', 'tests' ]

Added: updater/trunk/lib/sexp/access.py
===================================================================
--- updater/trunk/lib/sexp/access.py	                        (rev 0)
+++ updater/trunk/lib/sexp/access.py	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,373 @@
+
+import re
+import sys
+
+def s_tag(s):
+    """
+    >>> s_tag("a string") is None
+    True
+    >>> s_tag(["a-tagged", "list"])
+    'a-tagged'
+    >>> s_tag([["untagged"], "list"]) is None
+    True
+    """
+    if len(s) and not isinstance(s, str) and isinstance(s[0],str):
+        return s[0]
+    else:
+        return None
+
+def s_child(s, tag):
+    for child in s:
+        if s_tag(child) == tag:
+            return child
+    return None
+
+def s_children(s, tag):
+    return (ch for ch in s if s_tag(ch) == tag)
+
+def s_descendants(s, tags=()):
+    stack = [ ]
+    push = stack.append
+    pop = stack.pop
+
+    idx = 0
+    while 1:
+        while idx == len(s):
+            try:
+                s, idx = pop()
+            except IndexError:
+                return
+        if isinstance(s[idx], str):
+            idx += 1
+            continue
+        if s_tag(s[idx]) in tags:
+            yield s[idx]
+
+
+class SExpr(list):
+    def __init__(self, stuff=()):
+        list.__init__(self, stuff)
+        self._d = None
+
+    def __getattr__(self, item):
+        if self._d is None: self._buildDict()
+        return self[self._d[item]]
+
+    def __getitem__(self, idx):
+        item = list.__getitem__(self, idx)
+        if type(item) in (list, tuple): #exact match only.
+            item = self[idx] = SExpr(item)
+        return item
+
+    def _buildDict(self):
+        self._d = d = {}
+        for idx in xrange(len(self)):
+            item = list.__getitem__(self, idx)
+            t = s_tag(item)
+            if t is not None:
+                d[t] = idx
+
+def _s_lookup_all(s, path, callback):
+
+    # XXXX: Watch out; ** gets pretty heavy pretty fast.
+
+    if isinstance(path, str):
+        path = path.split(".")
+
+    if len(path) == 0:
+        callback(s)
+        return
+
+    for p_idx in xrange(len(path)):
+        p_item = path[p_idx]
+
+        if p_item == '*':
+            for ch in s:
+                if not isinstance(ch, str):
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+            return
+        elif p_item == '**':
+            for ch in s_descendants(s):
+                if not isinstance(ch, str):
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+            return
+        elif p_item.startswith('**'):
+            for ch in s_descendants(s):
+                if s_tag(ch) == p_item[2:]:
+                    _s_lookup_all(s, path[p_idx+1:], callback)
+        else:
+            s = s_child(s, p_item)
+            if s is None:
+                return
+
+    callback(s)
+
+def s_lookup_all(s, path):
+    result = []
+    _s_lookup_all(s, path, result.append)
+    return result
+
+def s_lookup(s, path):
+    r = s_lookup_all(s, path)
+    if len(r):
+        return r[0]
+    return None
+
+class _Singleton:
+    def isSingleton(self):
+        return True
+    def matches(self, item):
+        raise NotImplemented()
+
+    def clear(self):
+        self._got = False
+    def matchItem(self, item):
+        if not self._got and self.matches(item):
+            self._got = True
+            return True
+        else:
+            return False
+    def isSatisfied(self):
+        return self._got
+
+class _Span:
+    def isSingleton(self):
+        return False
+    def clear(self):
+        pass
+    def matchItem(self, item):
+        raise NotImplemented()
+    def isSatisfied(self):
+        raise NotImplemented()
+
+class _AnyItem(_Singleton):
+    def matches(self,item):
+        return True
+    def rep(self):
+        return "?"
+class _AnyString(_Singleton):
+    def matches(self,item):
+        return isinstance(item, str)
+    def rep(self):
+        return "."
+class _ExactString(_Singleton):
+    def __init__(self, s):
+        self._s = s
+    def matches(self, item):
+        return item == self._s
+    def rep(self):
+        return "=%s"%self._s
+class _ReString(_Singleton):
+    def __init__(self, s, regex=None):
+        if regex is None:
+            regex = re.compile(s)
+        self._re = regex
+        self._s = s
+    def matches(self, item):
+        if not isinstance(item, str):
+            return False
+        m = self._re.match(item)
+        return m and m.end() == len(item)
+    def rep(self):
+        return "%s"%self._s
+class _List(_Singleton):
+    def __init__(self, subpatterns):
+        self._pats = subpatterns
+    def clear(self):
+        _Singleton.clear(self)
+        for p in self._pats:
+            p.clear()
+    def matches(self, item):
+        if isinstance(item, str):
+            return False
+
+        i_idx = 0
+        pat_idx = 0
+        while i_idx < len(item):
+            try:
+                subpat = self._pats[pat_idx]
+            except:
+                return False # Too many items.
+
+            if subpat.isSingleton():
+                if not subpat.matches(item[i_idx]):
+                    return False
+                i_idx += 1
+                pat_idx += 1
+            else:
+                subpat.clear()
+                while i_idx < len(item) and subpat.matchItem(item[i_idx]):
+                    i_idx += 1
+                if not subpat.isSatisfied():
+                    return False
+                pat_idx += 1
+
+        # Out of items, but we have more patterns.  Make sure they all accept
+        # 0 items.
+        if pat_idx < len(self._pats):
+            for subpat in self._pats[pat_idx:]:
+                subpat.clear()
+                if not subpat.isSatisfied():
+                    return False
+        return True
+
+    def rep(self):
+        return [ p.rep() for p in self._pats ]
+
+class _AnyItems(_Span):
+    def matchItem(self, item):
+        return True
+    def isSatisfied(self):
+        return True
+    def rep(self):
+        return "*"
+
+class _NMatches(_Span):
+    def __init__(self, alternatives, lo, hi):
+        self.lo = lo
+        self.hi = hi
+        self.count = 0
+        self.alternatives = alternatives
+        for a in alternatives:
+            if not a.isSingleton():
+                raise SchemaFormatError("Nexted span inside span")
+    def clear(self):
+        self.count = 0
+        for p in self.alternatives:
+            p.clear()
+    def matchItem(self, item):
+        if self.count == self.hi:
+            return False
+        for p in self.alternatives:
+            if p.matches(item):
+                self.count += 1
+                return True
+        return False
+    def isSatisfied(self):
+        return self.lo <= self.count <= self.hi
+
+    def rep(self):
+        name = { (1,1): ":oneof",
+                 (0,1): ":maybe",
+                 (0,sys.maxint): ":anyof",
+                 (1,sys.maxint): ":someof" }.get((self.lo, self.hi))
+        if name is None:
+            name = ":%d-%d"%(self.lo, self.hi)
+        result = [ name ]
+        result.extend(p.rep() for p in self.alternatives)
+        return result
+
+class _Unordered(_Span):
+    def __init__(self, alternatives):
+        self.alternatives = alternatives
+    def clear(self):
+        for p in self.alternatives:
+            p.clear()
+    def matchItem(self, item):
+        for p in self.alternatives:
+            if p.matchItem(item):
+                return True
+        return False
+    def isSatisfied(self):
+        for p in self.alternatives:
+            if not p.isSatisfied():
+                return False
+        return True
+    def rep(self):
+        result = [ ":unordered" ]
+        result.extend(p.rep() for p in self.alternatives)
+        return result
+
+class SchemaFormatError(Exception):
+    pass
+
+_RE_PAT = re.compile(r'/((?:[\\.]|[^\\/]+)*)/([ilmstx]*)', re.I)
+
+def parseSchema(s, table):
+
+    if isinstance(s, str):
+        if not len(s):
+            raise SchemaFormatError("Empty string encountered")
+        if s == '*':
+            return _AnyItems()
+        elif s == '?':
+            return _AnyItem()
+        elif s == '.':
+            return _AnyString()
+        elif s.startswith('='):
+            return _ExactString(s[1:])
+        elif s.startswith('.'):
+            try:
+                return table[s[1:]]
+            except KeyError:
+                raise SchemaFormatError("Unknown reference %s"%s)
+        else:
+            m = _RE_PAT.match(s)
+            if m:
+                flags = 0
+                for char in m.group(2):
+                    flags |= { "i":re.I, "l":re.L, "m":re.M, "s":re.S,
+                               "t":re.T, "x":re.X }[char.lower()]
+                try:
+                    p = re.compile(m.group(1), flags)
+                except re.error, e:
+                    raise SchemaFormatError("Couldn't compile %s"%s)
+
+                return _ReString(s, p)
+
+            raise SchemaFormatError("Confusing entry %s"%s)
+    elif len(s) and isinstance(s[0], str) and s[0].startswith(':'):
+        tag = s[0]
+
+        m = re.match(r'\:(\d*)(\-\d*)?$', tag)
+        if m:
+            g = m.groups()
+            if g[0]:
+                lo = int(g[0], 10)
+            else:
+                lo = 0
+            if g[1]:
+                if len(g[1]) > 1:
+                    hi = int(g[1][1:], 10)
+                else:
+                    hi = sys.maxint
+            else:
+                hi = lo
+        else:
+            try:
+                lo,hi = { ":maybe": (0,1),
+                          ":oneof": (1,1),
+                          ":anyof": (0,sys.maxint),
+                          ":someof":(1,sys.maxint),
+                          ":unordered": (None, None) }[tag]
+            except KeyError:
+                raise SchemaFormatError("Unknown tag %s"%tag)
+
+        subitems = [ parseSchema(i, table) for i in s[1:] ]
+        if lo is not None:
+            return _NMatches(subitems, lo, hi)
+        else:
+            return _Unordered(subitems)
+    else:
+        return _List([ parseSchema(i, table) for i in s ])
+
+# Schema format:
+#   "=string" matches a string itself.
+#   "*" matches any number of items and can occur only at the end of a list.
+#   "?" matches a single item.
+#   "." matches any string.
+#   "/re/" matches a regex.
+#   ".name" matches a named schema
+#
+#   (i1 i2 i3)         matches a list of i1 followed by i2 followed by i3.
+#
+#   (:maybe  i1)       matches zero or one of i1.
+#   (:oneof  i1 i2 i3) matches any one of the items i1, i2, i3.
+#   (:anyof  i1 i2 i3) matches zero or more of the items i1, i2, i3.
+#   (:someof i1 i2 i3) matches one or more of i1, i2, and i3.
+#
+#   (:unordered i1 i2 i3) matches all of i1, i2, and i3, in any order.
+#
+# The matching algorithm is a stupid greedy algorithm.  If you need to
+# check stuff it can't handle, write a new thing.
+

Added: updater/trunk/lib/sexp/encode.py
===================================================================
--- updater/trunk/lib/sexp/encode.py	                        (rev 0)
+++ updater/trunk/lib/sexp/encode.py	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,223 @@
+
+
+import base64
+import binascii
+import re
+import hashlib
+
+def _encodeHex(s):
+    """
+      Encode a string in hex format.
+
+      >>> _encodeHex("Hello world")
+      '#48656c6c6f20776f726c64#'
+      >>> _encodeHex("")
+      '##'
+    """
+    return "#%s#"%binascii.b2a_hex(s)
+
+def _encodeBase64(s):
+    """
+       Encode a string in base64 format, with embedded newlines.
+
+       >>> _encodeBase64("")
+       '||'
+       >>> _encodeBase64("Hello world")
+       '|SGVsbG8gd29ybGQ=|'
+       >>> print _encodeBase64("Hello world")
+       |SGVsbG8gd29ybGQ=|
+       >>> _encodeBase64("Good night, sweet prince! A flock of angels "
+       ...               "sing thee to thy rest")
+       '|R29vZCBuaWdodCwgc3dlZXQgcHJpbmNlISBBIGZsb2NrIG9mIGFuZ2VscyBzaW5nIHRoZWUgdG8g\\ndGh5IHJlc3Q=|'
+
+    """
+    return "|%s|"%base64.encodestring(s).strip()
+
+# Map from a character value to its representation in a quoted-string.
+_QUOTED_MAP = { '\b' : "\\b",
+                '\t' : "\\t",
+                '\v' : "\\v",
+                '\n' : "\\n",
+                '\f' : "\\f",
+                '\r' : "\\r",
+                '"'  : "\"",
+                '\b' : "\\b",
+                '\\' : "\\", }
+for x in xrange(256):
+    if 32 <= x <= 126:
+        _QUOTED_MAP[chr(x)] = chr(x)
+    elif not _QUOTED_MAP.has_key(chr(x)):
+        _QUOTED_MAP[chr(x)] = "\\x%02x"%x
+del x
+
+
+_QUOTED_CHAR_RE = re.compile(r'[^\ -\~]')
+def _replaceQuotedChar(match, _Q=_QUOTED_MAP):
+    """Helper function for replacing ."""
+    return _Q[match.group(0)]
+
+def _encodeQuoted(s, _Q=_QUOTED_MAP):
+    """
+       >>> _encodeQuoted("")
+       '""'
+       >>> _encodeQuoted("Hello world")
+       '"Hello world"'
+       >>> print _encodeQuoted("Hello \xff\b")
+       "Hello \\xff\\b"
+    """
+    # This implementation is a slower for the case where lots of stuff
+    # needs quoting, but faster for the case where only some stuff
+    # needs quoting.  If more than about 1/4 of the characters need
+    # quoting, then the commented-out version below is faster.  Yes,
+    # this is a stupid overoptimization.
+    return '"%s"'%(_QUOTED_CHAR_RE.sub(_replaceQuotedChar, s))
+
+    #return '"%s"'%("".join(map(_QUOTED_MAP.__getitem__, s)))
+
+def _encodeRaw(s):
+    """
+       Encode a string in the "raw" format used for canonical encodings.
+
+       >>> _encodeRaw("")
+       '0:'
+       >>> _encodeRaw(" ")
+       '1: '
+       >>> _encodeRaw(" \\n")
+       '2: \\n'
+    """
+    return "%d:%s"%(len(s),s)
+
+_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*"
+
+_TOKEN_RE = re.compile(_TOKEN_PAT)
+def _writeToken(write,s):
+    """Write a string in the token (unencoded) format.  Only works for strings
+       matching _TOKEN_RE.
+    """
+    assert _TOKEN_RE.match(s)
+    return s
+
+def _encodeCleanest(s, indent=0):
+    """Encode s in whatever format seems most human-readable."""
+
+    if _TOKEN_RE.match(s):
+        return s
+    n = 0
+    for ch in s:
+        if _QUOTED_MAP[ch] != ch:
+            n += 1
+    if n > 3 and n > len(s)//4:
+        if len(s) > 16:
+            return _encodeBase64(s).replace("\n", " "*(indent+1)+"\n")
+        else:
+            return _encodeHex(s)
+    else:
+        return _encodeQuoted(s)
+
+def _encodePrettyPrint(s, write, indent=0, niceWidth=80):
+    if isinstance(s, str):
+        write(_encodeCleanest(s))
+        return
+    elif len(s) == 0:
+        write("()")
+        return
+
+    if isinstance(s[0], str):
+        parts = [ " "*indent, "(", _encodeCleanest(s), "\n" ]
+    else:
+        parts = [ "(" ]
+
+def _encodeCanonical(rep, append):
+    """Given an s-expression in <rep>, encode it in canonical format,
+       passing each part to the function "append" as it is done.
+    """
+    if isinstance(rep, str):
+        append(_encodeRaw(rep))
+        return
+
+    append("(")
+
+    stack = [ ]
+    push = stack.append
+    pop = stack.pop
+    idx = 0
+    while 1:
+        while idx == len(rep):
+            append(")")
+            try:
+                rep,idx = pop()
+            except IndexError:
+                return
+        if isinstance(rep[idx], str):
+            append(_encodeRaw(rep[idx]))
+            idx += 1
+            continue
+        push((rep,idx+1))
+        rep = rep[idx]
+        idx = 0
+        append("(")
+
+def encode_canonical(rep):
+    """Return the canonical encoding of the s-expression <rep>.
+
+       >>> encode_canonical("abc")
+       '3:abc'
+       >>> encode_canonical(["a"])
+       '(1:a)'
+       >>> encode_canonical(["a", "bc"])
+       '(1:a2:bc)'
+       >>> encode_canonical([[["X", "ab c"]], "d"])
+       '(((1:X4:ab c))1:d)'
+    """
+    parts = []
+    _encodeCanonical(rep, parts.append)
+    return "".join(parts)
+
+def hash_canonical(rep, hashobj):
+    """Given a hashlib hash object <hashobj>, adds the canonical
+       encoding of the s-expression <rep> to hashobj.
+
+       >>> import hashlib
+       >>> s = hashlib.sha256()
+       >>> s.update("(3:abc(6:hello 5:world)(1:9))")
+       >>> s.hexdigest()
+       '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397'
+       >>> s = hashlib.sha256()
+       >>> hash_canonical(["abc", ["hello ", "world"], ["9"] ], s)
+       >>> s.hexdigest()
+       '43f7726155f2700ff0d84240f3aaa9e5a1ee2e2c9e4702f7ac3ebcd45fd2f397'
+    """
+    _encodeCanonical(rep, hashobj.update)
+
+def _encodePretty(rep, append, indent_step=2, niceWidth=80):
+    stack = []
+    idx = 0
+    indent = 0
+    append("(")
+    pop = stack.pop
+    push = stack.append
+
+    while 1:
+        while idx == len(rep):
+            append(")")
+            indent -= indent_step
+            try:
+                rep,idx = pop()
+            except IndexError:
+                append("\n")
+                return
+            else:
+                append(" ")
+        if isinstance(rep[idx], str):
+            _encodePrettyPrint(rep[idx], append, indent, niceWidth)
+            idx += 1
+            if idx < len(rep):
+                append(" ")
+            continue
+        push((rep,idx+1))
+        rep = rep[idx]
+        idx = 0
+        indent += indent_step
+        append("\n%s("%(" "*indent))
+
+

Added: updater/trunk/lib/sexp/parse.py
===================================================================
--- updater/trunk/lib/sexp/parse.py	                        (rev 0)
+++ updater/trunk/lib/sexp/parse.py	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,210 @@
+
+import re
+import base64
+import binascii
+import re
+
+# Partial implementation of Rivest's proposed S-Expressions standard
+# as documented at
+#      http://people.csail.mit.edu/rivest/Sexp.txt
+#
+# It's slightly optimized.
+#
+# Not implemented:
+#    [display hints]
+#    {basic transport}
+
+__all__ = [ 'FormatError', 'parse' ]
+
+class FormatError(Exception):
+    """Raised when parsing fails."""
+    pass
+
+_TOKEN_PAT = r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*"
+# Regular expression to match a single lexeme from an encode s-expression.
+_LEXEME_START_RE = re.compile(
+    r""" \s* (?: (%s) |     # Grp 0: A token.
+                 ([0-9]*(?: [\:\|\#\{]  |   # Grp1 : start of string...
+                            \"(?:[^\\\"]+|\\.)*\"))  | # or qstring.
+                 ([\(\)])  # Grp 2: a paren of some kind.
+              )"""
+                              %_TOKEN_PAT,re.X|re.M|re.S)
+
+class _P:
+    """Helper class for parenthesis tokens."""
+    def __init__(self, val):
+        self.val = val
+    def __repr__(self):
+        return "_P(%r)"%self.val
+
+_OPEN_PAREN = _P("(")
+_CLOSE_PAREN = _P(")")
+del _P
+_SPACE_RE = re.compile(r'\s+')
+
+# Matches all characters in a string that we need to unquote.
+_UNQUOTE_CHAR_RE = re.compile(r'''
+     \\  (?: [abtnvfr] | \r \n ? | \n \r ? | [xX] [A-Fa-f0-9]{2} | [0-8]{1,3} )
+     ''')
+
+# Map from quoted representation to unquoted format.
+_UNQUOTE_CHAR_MAP = { 'a': '\a',
+                      'b': '\b',
+                      't': '\t',
+                      'n': '\n',
+                      'v': '\v',
+                      'f': '\f',
+                      'r': '\r' }
+def _unquoteChar(ch, _U=_UNQUOTE_CHAR_MAP):
+    ch = ch[1:]
+    try:
+        return _U[ch]
+    except KeyError:
+        pass
+    if ch[0] in "\n\r":
+        return ""
+    elif ch[0] in 'xX':
+        return chr(int(ch[1:], 16))
+    else:
+        i = int(ch[1:], 8)
+        if i >= 256:
+            raise FormatError("Octal character format out of range.")
+        return chr(i)
+
+def _lexItems(s):
+    """Generator that iterates over the lexical items in an encoded
+       s-expression.  Yields a string for strings, or the special objects
+       _OPEN_PAREN and _CLOSE_PAREN.
+
+       >>> list(_lexItems('(4:a)b   hello) (world 1:a 0: '))
+       [_P('('), 'a)b ', 'hello', _P(')'), _P('('), 'world', 'a', '']
+
+       >>> list(_lexItems('a b-c 1#20#2#2061##686877# |aGVsbG8gd29ybGQ|'))
+       ['a', 'b-c', ' ', ' a', 'hhw', 'hello world']
+
+       >>> list(_lexItems('#2 0# |aGVs\\nbG8 gd29yb   GQ|  '))
+       [' ', 'hello world']
+
+       >>> list(_lexItems('|YWJjZA==| x |YWJjZA| 3|Y   W J  j|'))
+       ['abcd', 'x', 'abcd', 'abc']
+
+       >>> list(_lexItems('("1""234""hello world" 3"abc" 4"    " )'))
+       [_P('('), '1', '234', 'hello world', 'abc', '    ', _P(')')]
+
+    """
+    s = s.strip()
+    while s:
+        m = _LEXEME_START_RE.match(s)
+        if not m:
+            raise FormatError("No pattern match at %r"%s[:30])
+        g = m.groups()
+        if g[2]:
+            if g[2] == "(":
+                yield _OPEN_PAREN
+            else:
+                yield _CLOSE_PAREN
+            s = s[m.end():]
+        elif g[0]:
+            # we have a token.  Go with that.
+            yield g[0]
+            s = s[m.end():]
+        else:
+            assert g[1]
+            lastChar = g[1][-1]
+            if lastChar == '"':
+                qidx = g[1].index('"')
+                quoted = g[1][qidx+1:-1] # All but quotes.
+                data = _UNQUOTE_CHAR_RE.sub(_unquoteChar, quoted)
+                if qidx != 0:
+                    num = int(g[1][:qidx], 10)
+                    if num != len(data):
+                        raise FormatError("Bad length on quoted string")
+                yield data
+                s = s[m.end():]
+                continue
+
+            num = g[1][:-1]
+            if len(num):
+                num = int(num, 10)
+            else:
+                num = None
+
+            if lastChar == ':':
+                if num is None:
+                    raise FormatError()
+                s = s[m.end():]
+                if len(s) < num:
+                    raise FormatError()
+                yield s[:num]
+                s = s[num:]
+            elif lastChar == '#':
+                s = s[m.end():]
+                try:
+                    nextHash = s.index('#')
+                except ValueError:
+                    raise FormatError("Unterminated # string")
+                dataStr = _SPACE_RE.sub("", s[:nextHash])
+                try:
+                    data = binascii.a2b_hex(dataStr)
+                except TypeError:
+                    raise FormatError("Bad hex string")
+                if num is not None and len(data) != num:
+                    raise FormatError("Bad number on hex string")
+                yield data
+                s = s[nextHash+1:]
+            elif lastChar == '|':
+                s = s[m.end():]
+                try:
+                    nextBar = s.index('|')
+                except ValueError:
+                    raise FormatError("Unterminated | string")
+                dataStr = _SPACE_RE.sub("", s[:nextBar])
+                # Re-pad.
+                mod = len(dataStr) % 4
+                if mod:
+                    dataStr += "=" * (4 - mod)
+                try:
+                    data = binascii.a2b_base64(dataStr)
+                except TypeError:
+                    raise FormatError("Bad base64 string")
+                if num is not None and len(data) != num:
+                    raise FormatError("Bad number on base64 string")
+                yield data
+                s = s[nextBar+1:]
+            else:
+                assert None
+
+def parse(s):
+    """
+       >>> parse("()")
+       []
+       >>> parse("(1:X3:abc1:d)")
+       ['X', 'abc', 'd']
+       >>> parse("(1:X((3:abc))1:d)")
+       ['X', [['abc']], 'd']
+       >>> parse("(a b (d\\ne f) (g) #ff00ff# |aGVsbG8gd29ybGQ|)")
+       ['a', 'b', ['d', 'e', 'f'], ['g'], '\\xff\\x00\\xff', 'hello world']
+
+    """
+    outermost = []
+    stack = [ ]
+    push = stack.append
+    pop = stack.pop
+    add = outermost.append
+
+    for item in _lexItems(s):
+        if item is _OPEN_PAREN:
+            next = []
+            add(next)
+            push(add)
+            add = next.append
+        elif item is _CLOSE_PAREN:
+            add = pop()
+        else:
+            # it's a string.
+            add(item)
+
+    if len(outermost) != 1:
+        raise FormatError("No enclosing parenthesis on list")
+    return outermost[0]
+

Added: updater/trunk/lib/sexp/tests.py
===================================================================
--- updater/trunk/lib/sexp/tests.py	                        (rev 0)
+++ updater/trunk/lib/sexp/tests.py	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,31 @@
+
+import unittest
+import doctest
+
+import sexp.parse
+import sexp.access
+import sexp.encode
+
+class EncodingTest(unittest.TestCase):
+    def testQuotedString(self):
+        self.assertEquals(1,1)
+
+
+
+def suite():
+    import sexp.tests
+    suite = unittest.TestSuite()
+
+    suite.addTest(doctest.DocTestSuite(sexp.encode))
+    suite.addTest(doctest.DocTestSuite(sexp.parse))
+    suite.addTest(doctest.DocTestSuite(sexp.access))
+
+    loader = unittest.TestLoader()
+    suite.addTest(loader.loadTestsFromModule(sexp.tests))
+
+    return suite
+
+
+if __name__ == '__main__':
+
+    unittest.TextTestRunner(verbosity=1).run(suite())

Added: updater/trunk/specs/U1-automatic-updates.txt
===================================================================
--- updater/trunk/specs/U1-automatic-updates.txt	                        (rev 0)
+++ updater/trunk/specs/U1-automatic-updates.txt	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,377 @@
+
+Filename: xxx-automatic-updates.txt
+Title: Automatic Software Update Protocol
+Version: $Revision$
+Last-Modified: $Date$
+Author: Matt Edman
+Created: 30-July-2008
+Status: Open
+
+
+Scope
+
+  This proposal specifies the method by which an automatic update client can
+  determine the most recent recommended Tor installation package for the
+  user's platform, download the package, and then verify that the package was
+  downloaded successfully. While this proposal focuses on only the Tor
+  software, the protocol defined is sufficiently extensible such that other
+  components of the Tor bundles, like Vidalia, Polipo, and Torbutton, can be
+  managed and updated by the automatic update client as well.
+
+  The initial target platform for the automatic update framework is Windows,
+  given that's the platform used by a majority of our users and that it lacks
+  a sane package management system that many Linux distributions already have.
+  Our second target platform will be Mac OS X, and so the protocol will be
+  designed with this near-future direction in mind.
+
+  Other client-side aspects of the automatic update process, such as user
+  interaction, the interface presented, and actual package installation
+  procedure, are outside the scope of this proposal.
+
+
+Motivation
+
+  Tor releases new versions frequently, often with important security,
+  anonymity, and stability fixes. Thus, it is important for users to be able
+  to promptly recognize when new versions are available and to easily
+  download, authenticate, and install updated Tor and Tor-related software
+  packages.
+  
+  Tor's control protocol [2] provides a method by which controllers can
+  identify when the user's Tor software is obsolete or otherwise no longer
+  recommended. Currently, however, no mechanism exists for clients to
+  automatically download and install updated Tor and Tor-related software for
+  the user.
+
+
+Design Overview
+
+  The core of the automatic update framework is a well-defined file called a
+  "recommended-packages" file. The recommended-packages file is accessible via
+  HTTP[S] at one or more well-defined URLs. An example recommended-packages
+  URL may be:
+
+    https://updates.torproject.org/recommended-packages
+
+  The recommended-packages document is formatted according to Section 1.2
+  below and specifies the most recent recommended installation package
+  versions for Tor or Tor-related software, as well as URLs at which the
+  packages and their signatures can be downloaded.
+
+  An automatic update client process runs on the Tor user's computer and
+  periodically retrieves the recommended-packages file according to the method
+  described in Section 2.0. As described further in Section 1.2, the
+  recommended-packages file is signed and can be verified by the automatic
+  update client with one or more public keys included in the client software.
+  Since it is signed, the recommended-packages file can be mirrored by
+  multiple hosts (e.g., Tor directory authorities), whose URLs are included in
+  the automatic update client's configuration.
+
+  After retrieving and verifying the recommended-packages file, the automatic
+  update client compares the versions of the recommended software packages
+  listed in the file with those currently installed on the end-user's
+  computer. If one or more of the installed packages is determined to be out
+  of date, an updated package and its signature will be downloaded from one of
+  the package URLs listed in the recommended-packages file as described in
+  Section 2.2.
+
+  The automatic update system uses a multilevel signing key scheme for package
+  signatures. There are a small number of entities we call "packaging
+  authorities" that each have their own signing key. A packaging authority is
+  responsible for signing and publishing the recommended-packages file.
+  Additionally, each individual packager responsible for producing an
+  installation package for one or more platforms has their own signing key.
+  Every packager's signing key must be signed by at least one of the packaging
+  authority keys.
+
+
+Specification
+
+  1. recommended-packages Specification
+
+  In this section we formally specify the format of the published
+  recommended-packages file.
+
+  1.1. Document Meta-format
+  
+  The recommended-packages document follows the lightweight extensible
+  information format defined in Tor's directory protocol specification [1]. In
+  the interest of self-containment, we have reproduced the relevant portions
+  of that format's specification in this Section. (Credits to Nick Mathewson
+  for much of the original format definition language.)
+  
+  The highest level object is a Document, which consists of one or more
+  Items.  Every Item begins with a KeywordLine, followed by zero or more
+  Objects. A KeywordLine begins with a Keyword, optionally followed by
+  whitespace and more non-newline characters, and ends with a newline.  A
+  Keyword is a sequence of one or more characters in the set [A-Za-z0-9-].
+  An Object is a block of encoded data in pseudo-Open-PGP-style
+  armor. (cf. RFC 2440)
+
+  More formally:
+
+    Document     ::= (Item | NL)+
+    Item         ::= KeywordLine Object*
+    KeywordLine  ::= Keyword NL | Keyword WS ArgumentChar+ NL
+    Keyword      ::= KeywordChar+
+    KeywordChar  ::= 'A' ... 'Z' | 'a' ... 'z' | '0' ... '9' | '-'
+    ArgumentChar ::= any printing ASCII character except NL.
+    WS           ::= (SP | TAB)+
+    Object       ::= BeginLine Base-64-encoded-data EndLine
+    BeginLine    ::= "-----BEGIN " Keyword "-----" NL
+    EndLine      ::= "-----END " Keyword "-----" NL
+
+    The BeginLine and EndLine of an Object must use the same keyword.
+  
+  In our Document description below, we also tag Items with a multiplicity in
+  brackets. Possible tags are:
+
+    "At start, exactly once": These items MUST occur in every instance of the
+    document type, and MUST appear exactly once, and MUST be the first item in
+    their documents.
+        
+    "Exactly once": These items MUST occur exactly one time in every
+    instance of the document type.
+
+    "Once or more": These items MUST occur at least once in any instance
+    of the document type, and MAY occur more than once.
+
+    "At end, exactly once": These items MUST occur in every instance of
+    the document type, and MUST appear exactly once, and MUST be the
+    last item in their documents.
+
+  1.2. recommended-packages Document Format
+
+  When interpreting a recommended-packages Document, software MUST ignore
+  any KeywordLine that starts with a keyword it doesn't recognize; future
+  implementations MUST NOT require current automatic update clients to
+  understand any KeywordLine not currently described.
+
+  In lines that take multiple arguments, extra arguments SHOULD be
+  accepted and ignored.
+
+  The currently defined Items contained in a recommended-packages document
+  are:
+
+    "recommended-packages-format" SP number NL
+
+      [Exactly once]
+
+      This Item specifies the version of the recommended-packages format that
+      is contained in the subsequent document. The version defined in this
+      proposal is version "1". Subsequent iterations of this protocol MUST
+      increment this value if they introduce incompatible changes to the
+      document format and MAY increment this value if they only introduce
+      additional Keywords.
+
+    "published" SP YYYY-MM-DD SP HH:MM:SS NL
+
+      [Exactly once]
+
+      The time, in GMT, when this recommended-packages document was generated.
+      Automatic update clients SHOULD ignore Documents over 60 days old.
+
+    "tor-stable-win32-version" SP TorVersion NL
+
+      [Exactly once]
+
+      This keyword specifies the latest recommended release of Tor's "stable"
+      branch for the Windows platform that has an installation package
+      available. Note that this version does not necessarily correspond to the
+      most recently tagged stable Tor version, since that version may not yet
+      have an installer package available, or may have known issues on
+      Windows.
+
+      The TorVersion field is formatted according to Section 2 of Tor's 
+      version specification [3].
+
+    "tor-stable-win32-package" SP Url NL
+
+      [Once or more]
+
+      This Item specifies the location from which the most recent
+      recommended Windows installation package for Tor's stable branch can be
+      downloaded.
+    
+      When this Item appears multiple times within the Document, automatic
+      update clients SHOULD select randomly from the available package
+      mirrors.
+
+    "tor-dev-win32-version" SP TorVersion NL
+
+      [Exactly once]
+    
+      This Item specifies the latest recommended release of Tor's
+      "development" branch for the Windows platform that has an installation
+      package available. The same caveats from the description of
+      "tor-stable-win32-version" also apply to this keyword.
+
+      The TorVersion field is formatted according to Section 2 of Tor's
+      version specification [3].
+
+    "tor-dev-win32-package" SP Url NL
+  
+      [Once or more]
+
+      This Item specifies the location from which the most recent recommended
+      Windows installation package and its signature for Tor's development
+      branch can be downloaded.
+
+      When this Keyword appears multiple times within the Document, automatic
+      update clients SHOULD select randomly from the available package
+      mirrors.
+
+    "signature" NL SIGNATURE NL
+
+      [At end, exactly once]
+
+      The "SIGNATURE" Object contains a PGP signature (using a packaging
+      authority signing key) of the entire document, taken from the beginning
+      of the "recommended-packages-format" keyword, through the newline after
+      the "signature" Keyword.
+
+
+  2. Automatic Update Client Behavior
+
+  The client-side component of the automatic update framework is an
+  application that runs on the end-user's machine. It is responsible for
+  fetching and verifying a recommended-packages document, as well as
+  downloading, verifying, and subsequently installing any necessary updated
+  software packages.
+
+  2.1. Download and verify a recommended-packages document
+
+  The first step in the automatic update process is for the client to download
+  a copy of the recommended-packages file. The automatic update client
+  contains a (hardcoded and/or user-configurable) list of URLs from which it
+  will attempt to retrieve a recommended-packages file.
+
+  Connections to each of the recommended-packages URLs SHOULD be attempted in
+  the following order:
+
+    1) HTTPS over Tor
+    2) HTTP over Tor
+    3) Direct HTTPS
+    4) Direct HTTP
+
+  If the client fails to retrieve a recommended-packages document via any of
+  the above connection methods from any of the configured URLs, the client
+  SHOULD retry its download attempts following an exponential back-off
+  algorithm. After the first failed attempt, the client SHOULD delay one hour
+  before attempting again, up to a maximum of 24 hours delay between retry
+  attempts.
+
+  After successfully downloading a recommended-packages file, the automatic
+  update client will verify the signature using one of the public keys
+  distributed with the client software. If more than one recommended-packages
+  file is downloaded and verified, the file with the most recent "published"
+  date that is verified will be retained and the rest discarded.
+
+  2.2. Download and verify the updated packages
+
+  The automatic update client next compares the latest recommended package
+  version from the recommended-packages document with the currently installed
+  Tor version. If the user currently has installed a Tor version from Tor's
+  "development" branch, then the version specified in "tor-dev-*-version" Item
+  is used for comparison. Similarly, if the user currently has installed a Tor
+  version from Tor's "stable" branch, then the version specified in the
+  "tor-stable-*version" Item is used for comparison. Version comparisons are
+  done according to Tor's version specification [3].
+
+  If the automatic update client determines an installation package newer than
+  the user's currently installed version is available, it will attempt to
+  download a package appropriate for the user's platform and Tor branch from a
+  URL specified by a "tor-[branch]-[platform]-package" Item. If more than one
+  mirror for the selected package is available, a mirror will be chosen at
+  random from all those available.
+
+  The automatic update client must also download a ".asc" signature file for
+  the retrieved package. The URL for the package signature is the same as that
+  for the package itself, except with the extension ".asc" appended to the
+  package URL.
+
+  Connections to download the updated package and its signature SHOULD be
+  attempted in the same order described in Section 2.1.
+
+  After completing the steps described in Sections 2.1 and 2.2, the automatic
+  update client will have downloaded and verified a copy of the latest Tor
+  installation package. It can then take whatever subsequent platform-specific
+  steps are necessary to install the downloaded software updates.
+
+  2.3. Periodic checking for updates
+
+  The automatic update client SHOULD maintain a local state file in which it
+  records (at a minimum) the timestamp at which it last retrieved a
+  recommended-packages file and the timestamp at which the client last
+  successfully downloaded and installed a software update.
+
+  Automatic update clients SHOULD check for an updated recommended-packages
+  document at most once per day but at least once every 30 days.
+
+
+  3. Future Extensions
+
+  There are several possible areas for future extensions of this framework.
+  The extensions below are merely suggestions and should be the subject of
+  their own proposal before being implemented.
+  
+  3.1. Additional Software Updates
+  
+  There are several software packages often included in Tor bundles besides
+  Tor, such as Vidalia, Privoxy or Polipo, and Torbutton. The versions and
+  download locations of updated installation packages for these bundle
+  components can be easily added to the recommended-packages document
+  specification above.
+
+  3.2. Including ChangeLog Information
+
+  It may be useful for automatic update clients to be able to display for
+  users a summary of the changes made in the latest Tor or Tor-related
+  software release, before the user chooses to install the update. In the
+  future, we can add keywords to the specification in Section 1.2 that specify
+  the location of a ChangeLog file for the latest recommended package
+  versions. It may also be desirable to allow localized ChangeLog information,
+  so that the automatic update client can fetch release notes in the
+  end-user's preferred language.
+
+  3.3. Weighted Package Mirror Selection
+
+  We defined in Section 1.2 a method by which automatic update clients can
+  select from multiple available package mirrors. We may want to add a Weight
+  argument to the "*-package" Items that allows the recommended-packages file
+  to suggest to clients the probability with which a package mirror should be
+  chosen. This will allow clients to more appropriately distribute package
+  downloads across available mirrors proportional to their approximate
+  bandwidth.
+
+
+Implementation
+
+  Implementation of this proposal will consist of two separate components.
+
+  The first component is a small "au-publish" tool that takes as input a
+  configuration file specifying the information described in Section 1.2 and a
+  private key. The tool is run by a "packaging authority" (someone responsible
+  for publishing updated installation packages), who will be prompted to enter
+  the passphrase for the private key used to sign the recommended-packages
+  document. The output of the tool is a document formatted according to
+  Section 1.2, with a signature appended at the end. The resulting document
+  can then be published to any of the update mirrors.
+
+  The second component is an "au-client" tool that is run on the end-user's
+  machine. It periodically checks for updated installation packages according
+  to Section 2 and fetches the packages if necessary. The public keys used
+  to sign the recommended-packages file and any of the published packages are
+  included in the "au-client" tool.
+
+
+References
+  
+  [1] Tor directory protocol (version 3),
+  https://tor-svn.freehaven.net/svn/tor/trunk/doc/spec/dir-spec.txt
+  
+  [2] Tor control protocol (version 2),
+  https://tor-svn.freehaven.net/svn/tor/trunk/doc/spec/control-spec.txt
+
+  [3] Tor version specification,
+  https://tor-svn.freehaven.net/svn/tor/trunk/doc/spec/version-spec.txt

Added: updater/trunk/specs/U2-formats.txt
===================================================================
--- updater/trunk/specs/U2-formats.txt	                        (rev 0)
+++ updater/trunk/specs/U2-formats.txt	2008-08-30 16:33:47 UTC (rev 16692)
@@ -0,0 +1,430 @@
+
+
+Scope
+
+   This document descibes a repository and document format for use in
+   distributing Tor bundle updates.  It is meant to be a component of
+   an overall automatic update system.
+
+   Not described in this document is the design the packages or their
+   install process, though some requirements are listed.
+
+Proposed code name
+
+   Since "auto-update" is so generic, I've been thinking about going with
+   with "hapoc" or "glider" or "petaurus", all based on the sugar
+   glider you get when you search for "handy pocket creature".
+
+Metaformat
+
+   All documents use Rivest's SEXP meta-format as documented at
+     http://people.csail.mit.edu/rivest/sexp.html
+   with the restriction that no "display hint" fields are to be used,
+   and the base64 transit encoding isn't used either.
+
+   In descriptions of syntax below, we use regex-style qualifiers, so
+   that in
+        (sofa slipcover? occupant* leg+)
+   the sofa will have an optional slipcover, zero or more occupants,
+   and one or more legs.  This pattern matches (sofa leg) and (sofa
+   slipcover occupant occupant leg leg leg leg) but not (sofa leg
+   slipcover).
+
+   We also use a braces notation to indicate elements that can occur
+   in any order.  For example,
+        (bread {flour+ eggs? yeast})
+   matches a list starting with "bread", and then containing a one or
+   more occurances of flours, zero or one occurances of eggs, and one
+   occurance of yeast, in any order.  This pattern matches (bread eggs
+   yeast flour) but not (bread yeast) or (bread flour eggs yeast
+   macadamias).
+
+
+Goals
+
+   It should be possible to mirror a repository using only rsync and cron.
+
+   Separate keys should be used for different people and different
+   roles.
+
+   Only a minimal set of keys should have to be kept online to keep
+   the system running.
+
+   The system should handle any single computer or system or person
+   being unavailable.
+
+   The system should be pretty future-proof.
+
+   The client-side of the architecture should be really easy to implement.
+
+Non-goals:
+
+   This is not a package format.  Instead, we reuse existing package
+   formats for each platform.
+
+   This is not a general-purpose package manager like yum or apt: it
+   assumes that users will want to have one or more of a set of
+   "bundles", not an arbitary selection of packages dependant on one
+   another.
+
+   This is also not a general-purpose package format.  It assumes the
+   existance of an external package format that can handle install,
+   update, remove, and version query.
+
+Architecture: Repository
+
+   A "repository" is a directory hierarchy containing packages,
+   bundles, and metadata, all signed.
+
+   A "package" is a single independent downloadable, installable
+   binary archive.  It could be an MSI, an RPM, a DMG, or so on.
+   Every package is compiled version of some version of some piece of
+   software (an 'application') for some (os, architecture,
+   version) combinations.  Some packages are "glue" that make other
+   packages work well together or get configured properly.
+
+   A "bundle" is a list of of packages to be installed together.
+   Examples might be "Tor Browser Bundle" or "Tor plus controller".  A
+   bundle is versioned, and every bundle is for a particular (os,
+   architecture) combination.  Bundles specify which order to install
+   or update their components.
+
+   Metadata is used to:
+     - Find mirrors
+     - Validate packages, bundles, and metadata
+     - Make sure information is up-to-date
+     - Determine which packages are in a bundle
+
+   The filesystem layout in the repository is used for two purposes:
+     - To give mirrors an easy way to mirror only some of the repository.
+     - To specify which parts of the repository a given key has the
+       authority to sign.
+
+Architecture: Roles
+
+   Every role in the system are associated with a key.  Replacing
+   anything but a root key is supposed to be relatively easy.
+
+   Root-keys sign other keys, and certify them as belonging to roles.
+   Clients are configured to know the root keys.
+
+   Bundle keys certify the contents of a bundle.
+
+   Package keys certify packages for a given program or set of
+   programs.
+
+   Mirror keys certify a list of mirrors.  We expect this to be an
+   automated process.
+
+   Timestamp keys certify that given versions of other metadata
+   documents are up-to-date.  They are the only keys that absolutely
+   need to be kept online.  (If they are not, timestamps won't be
+   generated.)
+
+Directory layout
+
+   The following files exist in all repositories and mirrors:
+
+    /meta/keys.txt
+
+         Signed by the root keys; indicates keys and roles.
+         [XXXX I'm using the txt extension here.  Is that smart?]
+
+    /meta/mirrors.txt
+
+         Signed by the mirror key; indicates which parts of the repo
+         are mirrored where.
+
+    /meta/timestamp.txt
+
+         Signed by the timestamp key; indicates hashes and timestamps
+         for the latest versions of keys.txt and mirrors.txt.  Also
+         indicates the latest version of each bundle for each os/arch.
+
+    /bundleinfo/bundlename/os-arch/bundlename-os-arch-bundleversion.txt
+
+         Signed by the appropriate bundle key.  Describes what
+         packages make up a bundle, and what order to install,
+         uninstall, and upgrade them in.
+
+    /pkginfo/packagename/os-arch/version/packagename-os-arch-packageversion.txt
+
+         Signed by the appropriate package key.  Tells the name of the
+         file that makes up the bundle, its hash, and what procedure
+         is used to install it.
+
+    /packages/packagename/os-arch/version/(some filename)
+
+         The actual files 
+
+File formats: general principles
+
+   We use tagged lists (lists whose first element is a string) to
+   indicate typed objects.  Tags are generally lower-case, with
+   hyphens used for separation.  Think Lispy.
+
+   We use attrlists [lists of (key value) lists] to indicate a
+   multimap from keys to values.  Clients MUST accept unrecognized
+   keys in these attrlists.  The syntax for an attrlist with two
+   recognized and required keys is typically given as ({(key1 val1)
+   (key2 val2) (ATTR VAL)*}), indicating that the keys can occur in
+   any order, intermixed with other attributes.
+
+   Timestamp files will be downloaded very frequently; all other files
+   will be much smaller in size than package files.  Thus,
+   size-optimization for timestamp files makes sense and most other
+   other space optimizations don't.
+
+   Versions are represented as lists of the form (v I1 I2 I3 I4 ...)
+   where each item is a number or alphanumeric version component.  For
+   example, the version "0.2.1.5-alpha" is represented as (v 0 2 1 5
+   alpha).
+
+   All signed files are of the format:
+
+       (signed
+          X
+          (signature ({(keyid K) (method M) (ATTR VAL)*}) SIG)+
+       )
+
+   where: X is a list whose fist element describes the signed object.
+          K is the identifier of a key signing the document
+          M is the method to be used to make the signature
+          (ATTR VAL) is an arbitrary list whose first element is a
+             string.
+          SIG is a signature of the canonical encoding of X using the
+          identified key.
+
+   We define two signing methods at present:
+       sha256-oaep : A signature of the SHA256 hash of the canonical
+         encoding of X, using OAEP+ padding. [XXXX say more about mgf]
+
+   All times are given as strings of the format "YYYY-MM-DD HH:MM:SS",
+   in UTC.
+
+   All keys are of the format:
+      (pubkey ({(type TYPE) (ATTR VAL)*}) KEYVAL)
+   where TYPE is a string describing the type of the key and how it's
+   used to sign documents.  The type determines the interpretation of
+   KEYVAL.
+
+   The ID of a key is the type field concatenated with the SHA-256
+   hash of the canonical encoding of the KEYVAL field.
+
+   We define one keytype at present: 'rsa'.  The KEYVAL in this case is a
+   2-element list of (e p), with both values given in big-endian
+   binary format.  [This makes keys 45-60% more compact.]
+
+File formats: key list
+
+   (keylist
+     (ts TIME)
+     (keys
+       ((key ({(roles (ROLE PATH)+) (ATTR VAL)*}) KEY)*)
+     ...
+   )
+
+   The "ts" line describes when the keys file was updated.  Clients
+   MUST NOT replace a file with an older one, and SHOULD NOT accept a
+   file too far in the future.
+
+   A ROLE is one of "timestamp" "mirrors" "bundle" or "package"
+
+   PATH is a path relative to the top of the directory hierarchy.  It
+   may contain "*" elements to indicate "any file", and may end with a
+   "/**" element to indicate all files under a given point.
+
+File formats: mirror list
+
+   (mirrorlist
+     (ts TIME)
+     (mirrors
+       ( (mirror ({(name N) (urlbase U) (contents PATH+) (ATTR VAL)})) * )
+     ...
+  )
+
+  Every mirror is a copy of some or all of the directory hierarchy
+  containing at least the /meta, /bundles/, and /pkginfo directories.
+
+  N is a descriptive name for the mirror; U is the URL of the mirror's
+  base (i.e., the parent of the "meta" directory); and the PATH
+  elements are the components describing how much of the packages
+  directory is mirrored.  Their format is as in the keylist file.
+
+File formats: timestamp files
+  (ts
+    ({(at TIME)
+      (m TIME MIRRORLISTHASH)
+      (k TIME KEYLISTHASH)
+      (b NAME VERSION TIME PATH HASH)*})
+  )
+
+  TIME is when the timestamp was signed.  MIRRORLISTHASH is the digest
+  of the mirror-list file; KEYLISTHASH is the digest of the key list
+  file; and the 'b' entries are a list of the latest version of each
+  bundles and their locations and hashes.
+
+File formats: bundle files
+
+  (bundle
+    (at TIME)
+    (os OS)
+    [(arch ARCH)]
+    (version V)
+    (packages
+      (NAME VERSION PATH HASH ({(order INST UPDATE REMOVE)
+                                (optional)?
+                                (gloss LANG TEXT)*
+                                (longloss LANG TEXT)*
+                                 (ATTR VAL)*})? )* )
+  )
+
+  Most elements are self-explanatory; the INST, UPDATE, and REMOVE
+  elements of the order element are numbers defining the order in
+  which the packages are installed, updated, and removed respectively.
+  The "optional" element is present if the package is optional.
+  "Gloss" is an short utf-8 human-readable string explaining what the
+  package provides for the bundle; "longloss" is a longer such
+  utf-8 string.
+
+  (Note that the gloss strings are meant, not to describe the package,
+  but to describe what the package provides for the bundle.  For
+  example, "The Anonymous Email Bundle needs the Python Runtime to run
+  Mixminion.")
+
+File formats: package files
+  (package
+    ({(name NAME)
+     (version VERSION)
+     (format FMT ((ATTR VAL)*)? )
+     (path PATH)
+     (ts TIME)
+     (digest HASH)
+     (shortdesc LANG TEXT)*
+     (longdesc LANG TEXT)*
+     (ATTR VAL)* })
+  )
+
+  Most elements are self-explanatory.  The "FMT" element describes the
+  file format of the package, which should give enough information
+  about how to install it.
+
+  No two package files in the same repository should have the same
+  name and version.  If a package needs to be changed, the version
+  MUST be incremented.
+
+Workflows: The client application
+
+  Periodically, the client updater fetches a timestamp file from a
+  mirror.  If the timestamp in the file is up-to-date, the client
+  first checks to see whether the keys file listed is one that the
+  client has.  If not, the client fetches it, makes sure the hash of
+  the keys file matches the hash in the timestamp file, makes sure its
+  date is more recent than any keys file they have but not too far in
+  the future, and that it is signed by enough root keys that the
+  client recognizes.
+
+       [If the timestamp file is not up-to-date, the client tries a
+       few mirrors until it finds one with a good timestamp.]
+
+       [If the keys file from a mirror does not match the timestamp
+       file, the client tries a new mirror for both.]
+
+       [If the keys file is not signed by enough root keys, the client
+       warns the user and tries another mirror for both the timestamp
+       file and the keys file.]
+
+  Once the client has an up-to-date keys file, the client checks the
+  signature on the timestamp file.  Assuming it checks out, the client
+  refreshes the mirror list as needed, and refreshes any bundle files
+  to which the user is subscribed if the client does not have
+  the latest version of those files.  The client checks signatures on
+  these files, and fetches package metadata for any packages listed in
+  the bundle file that the client does not have, checks signatures on
+  these, and fetches binaries for packages that might need to be
+  installed or updated.  As the packages arrive, clients check their
+  hashes.
+
+  Once the client has gotten enough packages, it informs the user that
+  new packages have arrived, and asks them if they want to update.
+
+  Clients SHOULD cache at least the latest versions they have received
+  of all files.
+
+Workflow: Mirrors
+
+  Periodically, mirrors do an rsync or equivalent to fetch the latest
+  version of whatever parts of the repository have changed since the
+  version they currently hold.  Mirrors SHOULD replace older versions
+  of the repository idempotently, so that clients are less likely to
+  see inconsistant state.  Mirrors SHOULD validate the information
+  they receive, and not serve partial or inconsistant files.
+
+Workflow: Packagers
+
+  When a new binary package is done, the person making the package
+  runs a tool to generate and sign a package file, and sends both the
+  package and the package file to a repository admin.  Typically, the
+  base package file will be generated by inserting a version into a
+  template.
+
+  Packages MAY have as part of their build process a script to
+  generate the appropriately versioned package file.  This script
+  should at a minimum demand a build version, or use a timestamp in
+  place of a build version, to prevent two packages with the same
+  version from being created.
+
+Workflow: bundlers
+
+  When the packages in a bundle are done, the bundler runs a tool on
+  the package files to generate and sign a bundle file.  Typically,
+  this tool uses a template bundle file.
+
+Workflow: repository administrators
+
+  Repository administrators use a tool to validate signed files into the
+  repository.  The repository should not be altered manually.
+
+  This tool acts as follows:
+     - Package files may be added, but never replaced.
+     - Bundle files may be added, but never replaced.
+     - No file may be added unless it is syntactically valid and
+       signed by a key in the keys file authorized to sign files of
+       this type in this file's location location.
+
+     - A package file may not be added unless all of its binary
+       packages match their hashes.
+
+     - A bundle file may not be added unless all of its package files
+       are present and match their hashes.
+
+     - When adding a new keylist, bundle, or mirrors list, the
+       timestamp file must be regenerated immediately.
+
+Timing:
+
+  The timestamp file SHOULD be regenerated every 15 minutes.  Mirrors
+  SHOULD attempt to update every hour.  Clients SHOULD accept a
+  timestamp file up to 6 hours old.
+
+Format versioning and forward-compatibility:
+
+  All of the above formats include the ability to add more
+  attribute-value fields for backwards-compatible format changes.  If
+  we need to make a backwards incompatible format change, we create a
+  new filename for the new format.
+
+Key management and migration:
+
+  Root keys should be kept offline.  All keys except timestamp and
+  mirror keys should be stored encrypted.
+
+  All the formats above allow for multiple keys to sign a single
+  document.  To replace a compromised master key, it suffices to sign
+  keylist documents with both the compromised key and its replacement
+  until all clients have updated to a new version of the autoupdater.
+
+  To replace another key, it suffices to authorize the new key in the
+  keylist.  Note that a new package or bundle key must re-sign and
+  issue new versions of all packages or bundles it has generated.
+



More information about the tor-commits mailing list