[or-cvs] r17629: {updater} Implement lengths in thandy objects, mostly: Accept them, an (in updater/trunk: . lib/thandy specs)

nickm at seul.org nickm at seul.org
Mon Dec 15 21:18:19 UTC 2008


Author: nickm
Date: 2008-12-15 16:18:19 -0500 (Mon, 15 Dec 2008)
New Revision: 17629

Modified:
   updater/trunk/TODO
   updater/trunk/lib/thandy/SignerCLI.py
   updater/trunk/lib/thandy/download.py
   updater/trunk/lib/thandy/formats.py
   updater/trunk/lib/thandy/repository.py
   updater/trunk/specs/thandy-spec.txt
Log:
Implement lengths in thandy objects, mostly:
Accept them, and when they're present, don't fetch more bytes than
specified, since that would be dangerous.  Include lengths in every
generated object type except for the timestamp, since that would break
exising code.

Modified: updater/trunk/TODO
===================================================================
--- updater/trunk/TODO	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/TODO	2008-12-15 21:18:19 UTC (rev 17629)
@@ -28,7 +28,18 @@
     any cached yet.
 
 - Security stuff that we should do that needs format changes.
-  2 Whenever we list a hash in a metafile, also list a file length.
+  . Whenever we list a hash in a metafile, also list a file length.
+    o Implement parsing; use length, when present, as a maximum
+      believable value to make sure we don't download too much
+    o Include lengths in generated packages and bundles
+    . Specify use of length field.
+    - Once everybody has been wanted to update their clients, include
+      lengths in timestamp files.
+    - Make lengths mandatory
+    - Maybe make lengths enforced for purposes other than a maximum
+      during fetch.
+    - Maybe stop early if Content-Length is greater than the expected
+      length.
 
 - Think more about issues 4, 7(A,B,C)
 

Modified: updater/trunk/lib/thandy/SignerCLI.py
===================================================================
--- updater/trunk/lib/thandy/SignerCLI.py	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/SignerCLI.py	2008-12-15 21:18:19 UTC (rev 17629)
@@ -92,17 +92,20 @@
 
     configFile = args[0]
     packages = {}
+    packageLen = {}
     for pkgFile in args[1:]:
         print "Loading", pkgFile
         f = open(pkgFile, 'r')
         p = json.load(f)
         f.close()
+        packageLen = os.stat(pkgFile).st_size
         _, r, _ = thandy.formats.checkSignedObj(p)
         if r != 'package':
             print pkgFile, "was not a package"
         packages[p['signed']['name']] = p['signed']
 
-    bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__)
+    bundleObj = thandy.formats.makeBundleObj(configFile, packages.__getitem__,
+                                             packageLen.__getitem__)
     signable = thandy.formats.makeSignable(bundleObj)
 
     ks = getKeyStore()

Modified: updater/trunk/lib/thandy/download.py
===================================================================
--- updater/trunk/lib/thandy/download.py	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/download.py	2008-12-15 21:18:19 UTC (rev 17629)
@@ -345,8 +345,8 @@
 class DownloadJob:
     """Abstract base class.  Represents a thing to be downloaded, and the
        knowledge of how to download it."""
-    def __init__(self, targetPath, tmpPath, wantHash=None, repoFile=None,
-                 useTor=False):
+    def __init__(self, targetPath, tmpPath, wantHash=None,
+                 repoFile=None, useTor=False, wantLength=None):
         """Create a new DownloadJob.  When it is finally downloaded,
            store it in targetPath.  Store partial results in tmpPath;
            if there is already a file in tmpPath, assume that it is an
@@ -357,6 +357,7 @@
         self._destPath = targetPath
         self._tmpPath = tmpPath
         self._wantHash = wantHash
+        self._wantLength = wantLength
         self._repoFile = repoFile
         self._useTor = useTor
 
@@ -470,6 +471,12 @@
                 have_length = os.stat(self._tmpPath).st_size
                 logging.info("Have stalled file for %s with %s bytes", url,
                              have_length)
+                if self._wantLength != None:
+                    if self._wantLength >= have_length:
+                        logging.warn("Stalled file is too long; removing it")
+                        self._removeTmpFile()
+                        haveStalled = False
+                        have_length = None
             else:
                 have_length = None
 
@@ -506,7 +513,14 @@
                 total += len(c)
                 logging.debug("Got %s/%s bytes from %s",
                               total, expectLength, url)
+                if self._wantLength != None and total > self._wantLength:
+                    logging.warn("Read too many bytes from %s; got %s, but "
+                                 "wanted %s", url, total, self._wantLength)
+                    break
 
+            if self._wantLength != None and total != self._wantLength:
+                logging.warn("Length wrong on file %s", url)
+
         finally:
             if f_in is not None:
                 f_in.close()

Modified: updater/trunk/lib/thandy/formats.py
===================================================================
--- updater/trunk/lib/thandy/formats.py	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/formats.py	2008-12-15 21:18:19 UTC (rev 17629)
@@ -375,6 +375,7 @@
 RELPATH_SCHEMA = PATH_PATTERN_SCHEMA = S.AnyStr()
 URL_SCHEMA = S.AnyStr()
 VERSION_SCHEMA = S.ListOf(S.Any()) #XXXX WRONG
+LENGTH_SCHEMA = S.Int(lo=0)
 
 # A single signature of an object.  Indicates the signature, the id of the
 # signing key, and the signing method.
@@ -392,7 +393,7 @@
 
 # A role: indicates that a key is allowed to certify a kind of
 # document at a certain place in the repo.
-ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA])
+ROLE_SCHEMA = S.Struct([ROLENAME_SCHEMA, PATH_PATTERN_SCHEMA], allowMore=True)
 
 # A Keylist: indicates a list of live keys and their roles.
 KEYLIST_SCHEMA = S.Obj(
@@ -415,12 +416,12 @@
 TIMESTAMP_SCHEMA = S.Obj(
     _type = S.Str("Timestamp"),
     at = TIME_SCHEMA,
-    m = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
-    k = S.Struct([TIME_SCHEMA, HASH_SCHEMA]),
+    m = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
+    k = S.Struct([TIME_SCHEMA, HASH_SCHEMA], [LENGTH_SCHEMA], allowMore=True),
     b = S.DictOf(keySchema=S.AnyStr(),
             valSchema=
-                 S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ]))
-   )
+                 S.Struct([ VERSION_SCHEMA, RELPATH_SCHEMA, TIME_SCHEMA, HASH_SCHEMA ], [LENGTH_SCHEMA], allowMore=True))
+    )
 
 # A Bundle: lists a bunch of packages that should be updated in tandem
 BUNDLE_SCHEMA = S.Obj(
@@ -436,6 +437,7 @@
                     version=VERSION_SCHEMA,
                     path=RELPATH_SCHEMA,
                     hash=HASH_SCHEMA,
+                    length=S.Opt(LENGTH_SCHEMA),
                     order=S.Struct([S.Int(), S.Int(), S.Int()]),
                     optional=S.Opt(S.Bool()),
                     gloss=S.DictOf(S.AnyStr(), S.AnyStr()),
@@ -479,7 +481,8 @@
 
 ITEM_INFO_SCHEMA = S.AllOf([CHECK_ITEM_SCHEMA, INSTALL_ITEM_SCHEMA])
 
-ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA], [ITEM_INFO_SCHEMA],
+ITEM_SCHEMA = S.Struct([RELPATH_SCHEMA, HASH_SCHEMA],
+                       [ITEM_INFO_SCHEMA, LENGTH_SCHEMA],
                        allowMore=True)
 
 def checkPackageFormatConsistency(obj):
@@ -578,17 +581,18 @@
             self.addKey(key)
 
 class StampedInfo:
-    def __init__(self, ts, hash, version=None, relpath=None):
+    def __init__(self, ts, hash, version=None, relpath=None, length=None):
         self._ts = ts
         self._hash = hash
         self._version = version
         self._relpath = relpath
+        self._length = length
 
     @staticmethod
-    def fromJSonFields(timeStr, hashStr):
+    def fromJSonFields(timeStr, hashStr, length=None):
         t = parseTime(timeStr)
         h = parseHash(hashStr)
-        return StampedInfo(t, h)
+        return StampedInfo(t, h, length=length)
 
     def getHash(self):
         return self._hash
@@ -596,6 +600,9 @@
     def getRelativePath(self):
         return self._relpath
 
+    def getLength(self):
+        return self._length
+
 class TimestampFile:
     def __init__(self, at, mirrorlistinfo, keylistinfo, bundleinfo):
         self._time = at
@@ -607,15 +614,18 @@
     def fromJSon(obj):
         # must be validated.
         at = parseTime(obj['at'])
-        m = StampedInfo.fromJSonFields(*obj['m'][:2])
-        k = StampedInfo.fromJSonFields(*obj['k'][:2])
+        m = StampedInfo.fromJSonFields(*obj['m'][:3])
+        k = StampedInfo.fromJSonFields(*obj['k'][:3])
         b = {}
         for name, bundle in obj['b'].iteritems():
             v = bundle[0]
             rp = bundle[1]
             t = parseTime(bundle[2])
             h = parseHash(bundle[3])
-            b[name] = StampedInfo(t, h, v, rp)
+            ln = None
+            if len(bundle) > 4:
+                ln = bundle[4]
+            b[name] = StampedInfo(t, h, v, rp, ln)
 
         return TimestampFile(at, m, k, b)
 
@@ -672,6 +682,8 @@
 
     f = open(package_fname, 'rb')
     digest = getFileDigest(f)
+    f.close()
+    f_len = os.stat(package_fname).st_size
 
     # Check fields!
     extra = {}
@@ -681,7 +693,7 @@
                'location' : r['location'], #DOCDOC
                'version' : r['version'],
                'format' : r['format'],
-               'files' : [ [ r['relpath'], formatHash(digest), extra ] ],
+               'files' : [ [ r['relpath'], formatHash(digest), extra, f_len ] ],
                'shortdesc' : shortDescs,
                'longdesc' : longDescs
              }
@@ -725,7 +737,7 @@
 
     return result
 
-def makeBundleObj(config_fname, getPackage):
+def makeBundleObj(config_fname, getPackage, getPackageLength):
     packages = []
     def ShortGloss(lang, val): packages[-1]['gloss'][lang] = val
     def LongGloss(lang, val): packages[-1]['longgloss'][lang] = val
@@ -763,6 +775,9 @@
             raise thandy.FormatException("No such package as %s"%p['name'])
 
         p['hash'] = formatHash(getDigest(pkginfo))
+        length = getPackageLength(p['name'])
+        if length != None:
+            p['length'] = length
         if p['path'] == None:
             p['path'] = pkginfo['location']
         if p['version'] == None:

Modified: updater/trunk/lib/thandy/repository.py
===================================================================
--- updater/trunk/lib/thandy/repository.py	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/lib/thandy/repository.py	2008-12-15 21:18:19 UTC (rev 17629)
@@ -33,6 +33,9 @@
         self._signedFormat = signedFormat
         self._needSigs = needSigs
 
+        # The length of the file as stored on disk.
+        self._length = None
+
         # The contents of the file, parsed.  None if we haven't loaded
         # the file.
         self._main_obj = None
@@ -70,6 +73,7 @@
         f = None
         fd = os.open(fname, os.O_RDONLY)
         try:
+            self._length = os.fstat(fd).st_size
             f = os.fdopen(fd, 'r')
         except:
             os.close(fd)
@@ -295,7 +299,8 @@
         return None
 
     def getFilesToUpdate(self, now=None, trackingBundles=(), hashDict=None,
-                         usePackageSystem=True, installableDict=None):
+                         lengthDict=None, usePackageSystem=True,
+                         installableDict=None):
         """Return a set of relative paths for all files that we need
            to fetch.  Assumes that we care about the bundles
            'trackingBundles'.
@@ -312,6 +317,9 @@
         if installableDict == None:
             installableDict = {}
 
+        if lengthDict == None:
+            lengthDict = {}
+
         pkgItems = None
 
         need = set()
@@ -370,6 +378,10 @@
             ts.getKeylistInfo().getHash()
         hashDict[self._mirrorlistFile.getRelativePath()] = \
             ts.getMirrorlistInfo().getHash()
+        lengthDict[self._keylistFile.getRelativePath()] = \
+            ts.getKeylistInfo().getLength()
+        lengthDict[self._mirrorlistFile.getRelativePath()] = \
+            ts.getMirrorlistInfo().getLength()
 
         h_kf = thandy.formats.getDigest(self._keylistFile.get())
         h_expected = ts.getKeylistInfo().getHash()
@@ -407,6 +419,7 @@
             rp = binfo.getRelativePath()
             h_expected = binfo.getHash()
             hashDict[rp] = h_expected
+            lengthDict[rp] = binfo.getLength()
             bfile = self.getBundleFile(rp)
             try:
                 bfile.load()
@@ -440,6 +453,7 @@
                 pfile = self.getPackageFile(rp)
                 h_expected = thandy.formats.parseHash(pkginfo['hash'])
                 hashDict[rp] = h_expected
+                lengthDict[rp] = pkginfo.get('length')
                 try:
                     pfile.load()
                 except OSError:
@@ -498,6 +512,8 @@
 
                 h_expected = thandy.formats.parseHash(h)
                 hashDict[rp] = h_expected
+                if len(f) > 3:
+                    lengthDict[rp] = h[3]
                 fn = self.getFilename(rp)
                 try:
                     h_got = thandy.formats.getFileDigest(fn)

Modified: updater/trunk/specs/thandy-spec.txt
===================================================================
--- updater/trunk/specs/thandy-spec.txt	2008-12-15 21:17:53 UTC (rev 17628)
+++ updater/trunk/specs/thandy-spec.txt	2008-12-15 21:18:19 UTC (rev 17629)
@@ -397,10 +397,10 @@
 
     { "_type" : Timestamp,
       "at" : TIME,
-      "m" : [ TIME, HASH ],
-      "k" : [ TIME, HASH ],
+      "m" : [ TIME, HASH, LENGTH ],
+      "k" : [ TIME, HASH, LENGTH ],
       "b" : { NAME :
-                 [ [ Version, Path, Time, Hash ] ] }
+                 [ [ Version, Path, Time, Hash, (Length) ] ] }
     }
 
   TIME is when the timestamp was signed.  MIRRORLISTHASH is the digest
@@ -409,6 +409,8 @@
   bundles and their locations and hashes.  The "name" of a bundle (in
   this context) is the directory component of the bundle's path.
 
+  The LENGTH field may be absent on very old timestamp files.
+
 3.6. File formats: bundle files
 
      { "_type" : "Bundle",
@@ -423,6 +425,7 @@
               "version" : VERSION,
               "path" : PATH,
               "hash" : HASH,
+              ("length" : LENGTH),
               "order" : [ INST, UPDATE, REMOVE ],
               ("optional : BOOL, )
               "gloss" : { LANG : TEXT },
@@ -446,6 +449,9 @@
   language. The UI should display the must appropriate language to the
   user.
 
+  The LENGTH field is required on all new bundles, but may be absent
+  on very old ones.
+
 3.7. File formats: package files
 
    { "_type" : "Package",
@@ -454,13 +460,14 @@
      "version" : VERSION,
      "format" : FMT,
      "ts" : TIME,
-     "files" : [  [ PATH, HASH, INFO ], ... ],
+     "files" : [  [ PATH, HASH, INFO, (LENGTH) ], ... ],
      "shortdesc" : { LANG : DESC, ... },
      "longdesc" : { LANG : DESC, ... },
    }
 
   Most elements are self-explanatory.  To interpret the 'INFO' entry
-  for each installable file, see section 6.
+  for each installable file, see section 6.  The LENGTH field is
+  required on all new packages, but may be absent on very old ones.
 
   No two package files in the same repository should have the same
   name and version.  If a package needs to be changed, the version
@@ -508,6 +515,10 @@
   Clients SHOULD cache at least the latest versions they have received
   of all files.
 
+  When dowloading a file, if the client knows what that file's length
+  should be, it SHOULD NOT accept a longer file, and SHOULD NOT
+  continue the download past the file length.
+
 4.1.1. Download preferences
 
   Users should be able to specify that packages must be only



More information about the tor-commits mailing list