[or-cvs] [thandy/master] Add the ability to download files via BitTorrent

Nick Mathewson nickm at seul.org
Tue Aug 25 21:01:45 UTC 2009


Author: Sebastian Hahn <sebastian at torproject.org>
Date: Sun, 16 Aug 2009 22:21:11 +0200
Subject: Add the ability to download files via BitTorrent
Commit: 4c383af36602612be745764463dd76fee17ae205

The Client has learned a new option, --download-method, too specify
whether we're downloading directly or via BitTorrent.

This implementation has a few remaining issues, the biggest one is
that seeding isn't implemented at all (when the download stops,
Thandy stops sharing). Failure to download due to no available
peers also doesn't work.
---
 doc/interface.txt        |    6 +++++
 lib/thandy/ClientCLI.py  |   49 ++++++++++++++++++++++++++++++----------
 lib/thandy/bt_compat.py  |   56 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/thandy/download.py   |   47 ++++++++++++++++++++++++++++++++++++++
 lib/thandy/repository.py |   50 ++++++++++++++++++++++++++++++++++------
 5 files changed, 188 insertions(+), 20 deletions(-)

diff --git a/doc/interface.txt b/doc/interface.txt
index 22a35ed..b50442b 100644
--- a/doc/interface.txt
+++ b/doc/interface.txt
@@ -39,6 +39,12 @@ Recognized options for thandy-client update are:
                    Output log messages in a format designed to be easy
                    for controllers to read.  (See below.)
 
+   --download-method=<method>
+                   Choose this download method. Can be used in conjunction
+                   with the socks-port option. Supported methods are:
+                     - direct: downloads the file directly
+                     - bittorrent: use bittorrent for downloading
+
 
 THE CONTROLLER LOG FORMAT:
 
diff --git a/lib/thandy/ClientCLI.py b/lib/thandy/ClientCLI.py
index 73c96df..bcbdfc7 100644
--- a/lib/thandy/ClientCLI.py
+++ b/lib/thandy/ClientCLI.py
@@ -68,7 +68,8 @@ def update(args):
     options, args = getopt.getopt(args, "",
         [ "repo=", "no-download", "loop", "no-packagesys",
           "install", "socks-port=", "debug", "info",
-          "warn", "force-check", "controller-log-format"
+          "warn", "force-check", "controller-log-format",
+          "download-method="
           ])
     download = True
     keep_looping = False
@@ -76,6 +77,7 @@ def update(args):
     install = False
     socksPort = None
     forceCheck = False
+    downloadMethod = "direct"
 
     for o, v in options:
         if o == '--repo':
@@ -92,12 +94,20 @@ def update(args):
             socksPort = int(v)
         elif o == '--force-check':
             forceCheck = True
+        elif o == '--download-method':
+            downloadMethod = v
 
     configureLogs(options)
 
     if socksPort:
         thandy.socksurls.setSocksProxy("127.0.0.1", socksPort)
 
+    if downloadMethod == "bittorrent":
+        thandy.bt_compat.BtCompat.setUseBt(True)
+    elif downloadMethod != "direct":
+        usage()
+        sys.exit()
+
     repo = thandy.repository.LocalRepository(repoRoot)
     downloader = thandy.download.DownloadManager()
     downloader.start()
@@ -109,11 +119,15 @@ def update(args):
         hashes = {}
         lengths = {}
         installable = {}
+        btMetadata = {}
         logging.info("Checking for files to update.")
-        files = repo.getFilesToUpdate(trackingBundles=args, hashDict=hashes,
-                                      lengthDict=lengths,
-                                      usePackageSystem=use_packagesys,
-                                      installableDict=installable)
+        files, downloadingFiles = repo.getFilesToUpdate(
+              trackingBundles=args,
+              hashDict=hashes,
+              lengthDict=lengths,
+              usePackageSystem=use_packagesys,
+              installableDict=installable,
+              btMetadataDict=btMetadata)
 
         if forceCheck:
             files.add("/meta/timestamp.txt")
@@ -183,13 +197,23 @@ def update(args):
                 logging.info("Waiting a while before we fetch %s", f)
                 continue
 
-            dj = thandy.download.ThandyDownloadJob(
-                f, repo.getFilename(f),
-                mirrorlist,
-                wantHash=hashes.get(f),
-                wantLength=lengths.get(f),
-                repoFile=repo.getRequestedFile(f),
-                useTor=(socksPort!=None))
+            dj = None
+            if thandy.bt_compat.BtCompat.shouldUseBt() and downloadingFiles:
+                dj = thandy.download.ThandyBittorrentDownloadJob(
+                    repo.getFilename(btMetadata[f]), f,
+                    repo.getFilename(f),
+                    wantHash=hashes.get(f),
+                    wantLength=lengths.get(f),
+                    repoFile=repo.getRequestedFile(f))
+
+            else:
+                dj = thandy.download.ThandyDownloadJob(
+                    f, repo.getFilename(f),
+                    mirrorlist,
+                    wantHash=hashes.get(f),
+                    wantLength=lengths.get(f),
+                    repoFile=repo.getRequestedFile(f),
+                    useTor=(socksPort!=None))
 
             def successCb(rp=f):
                 rf = repo.getRequestedFile(rp)
@@ -219,6 +243,7 @@ def usage():
     print "         [--no-packagesys] [--install] [--socks-port=port]"
     print "         [--debug|--info|--warn] [--force-check]"
     print "         [--controller-log-format]"
+    print "         [--download-method=direct|bittorrent]"
     print "         bundle1, bundle2, ..."
     print "  json2xml file"
     sys.exit(1)
diff --git a/lib/thandy/bt_compat.py b/lib/thandy/bt_compat.py
index 43cecad..e9298a6 100644
--- a/lib/thandy/bt_compat.py
+++ b/lib/thandy/bt_compat.py
@@ -2,6 +2,7 @@
 
 import os.path
 import time
+import threading
 
 import thandy.master_keys
 
@@ -74,3 +75,58 @@ class BtCompat:
                 'creation date': long(time.time())}
         return BitTorrent.bencode.bencode(data)
 
+    def getFileLength(self, file):
+        """Parse the .torrent metainfo file and return the length of the
+           file it refers to.
+        """
+        f = open(file, 'rb')
+        metainfo = BitTorrent.bencode.bdecode(f.read())['info']
+        f.close()
+        assert(metainfo['length'])
+        return metainfo['length']
+
+    def getFileHash(self, file):
+        """Parse the .torrent metainfo file and return the hash of the
+           file it refers to.
+        """
+        f = open(file, 'rb')
+        metainfo = BitTorrent.bencode.bdecode(f.read())['info']
+        f.close()
+        return sha(BitTorrent.bencode.bencode(metainfo)).hexdigest()
+
+    def download(self, metaFile, saveTo ):
+        """Initiate a download via bittorrent."""
+
+        event = threading.Event()
+
+        params = ['--responsefile', metaFile, '--saveas', saveTo]
+
+        def filefunc(default, size, saveas, dir):
+            return saveas
+
+        def statusfunc(dict):
+            # XXX we should see how fast we upload/download here.
+            # If we don't get a connection for quite a while, or we are
+            # _very_ slow, we should cancel bt, disable it, and start fetching
+            # via http.
+            pass
+
+        def finfunc():
+            # XXX here we can set a timer for how long to seed, or
+            # wait for statusfunc to have shared some data, or something.
+            # Not the real solution, though, because installation will be
+            # delayed by the time we sleep...
+            # time.sleep(60)
+            event.set()
+            pass
+
+        def errorfunc(msg):
+            # XXX Not really sure how to encounter an error here. Our best bet
+            # is to cancel the download, stop bittorrent, and move on.
+            BtCompat.setUseBt(False)
+            event.set()
+
+
+        BitTorrent.download.download(params, filefunc, statusfunc, finfunc,
+                                     errorfunc, event, 80)
+
diff --git a/lib/thandy/download.py b/lib/thandy/download.py
index fb1b9f3..8e774ec 100644
--- a/lib/thandy/download.py
+++ b/lib/thandy/download.py
@@ -634,6 +634,53 @@ class ThandyDownloadJob(DownloadJob):
     def getMirror(self):
         return self._usingMirror
 
+class ThandyBittorrentDownloadJob(DownloadJob):
+    """Thandy's subtype of DownloadJob with BitTorrent support. Makes sure the
+       file downloaded via BitTorrent is the file we wanted, and moves
+       it into the right place.
+    """
+    def __init__(self, metaFile, relPath, destPath, wantHash=None,
+                 supportedURLTypes=None, useTor=None, repoFile=None,
+                 downloadStatusLog=None, wantLength=None):
+
+        DownloadJob.__init__(self, destPath, None, wantHash=wantHash,
+                             wantLength=wantLength,
+                             useTor=useTor, repoFile=repoFile)
+        self._relPath = relPath
+        self._metaFile = metaFile
+
+        tmppath = thandy.util.userFilename("tmp")
+        if relPath.startswith("/"):
+            relPath = relPath[1:]
+        self._tmpPath = os.path.join(tmppath, relPath)
+
+        d = os.path.dirname(self._tmpPath)
+        if not os.path.exists(d):
+            os.makedirs(d, 0700)
+
+        self._downloadStatusLog = downloadStatusLog
+
+    def setDownloadStatusLog(self, log):
+        self._downloadStatusLog = log
+
+    def getRelativePath(self):
+        return self._relPath
+
+    def _download(self):
+
+        btcomp = thandy.bt_compat.BtCompat()
+        btcomp.download(self._metaFile, self._tmpPath)
+
+        try:
+            self._checkTmpFile()
+        except (thandy.FormatException, thandy.DownloadError), err:
+            self._removeTmpFile()
+            if haveStalled:
+                raise BadCompoundData(err)
+            else:
+                raise
+        thandy.util.ensureParentDir(self._destPath)
+        thandy.util.moveFile(self._tmpPath, self._destPath)
 
 _socks_opener = thandy.socksurls.build_socks_opener()
 
diff --git a/lib/thandy/repository.py b/lib/thandy/repository.py
index af20904..f5a4de2 100644
--- a/lib/thandy/repository.py
+++ b/lib/thandy/repository.py
@@ -3,12 +3,14 @@
 import thandy.formats
 import thandy.util
 import thandy.packagesys.PackageSystem
+import thandy.bt_compat
 
 json = thandy.util.importJSON()
 
 import logging
 import os
 import time
+import sys
 
 MAX_TIMESTAMP_AGE = 3*60*60
 
@@ -285,9 +287,10 @@ class LocalRepository:
 
     def getFilesToUpdate(self, now=None, trackingBundles=(), hashDict=None,
                          lengthDict=None, usePackageSystem=True,
-                         installableDict=None):
+                         installableDict=None, btMetadataDict=None):
         """Return a set of relative paths for all files that we need
-           to fetch.  Assumes that we care about the bundles
+           to fetch, and True if we're fetching actual files to install
+           instead of metadata.  Assumes that we care about the bundles
            'trackingBundles'.
            DOCDOC installableDict, hashDict, usePackageSystem
         """
@@ -305,6 +308,9 @@ class LocalRepository:
         if lengthDict == None:
             lengthDict = {}
 
+        if btMetadataDict == None:
+            btMetadataDict = {}
+
         pkgItems = None
 
         need = set()
@@ -341,7 +347,7 @@ class LocalRepository:
                 need.add(self._keylistFile.getRelativePath())
 
         if need:
-            return need
+            return need, False
 
         # Import the keys from the keylist.
         self._keyDB.addFromKeylist(self._keylistFile.get())
@@ -354,7 +360,7 @@ class LocalRepository:
                          "timestamp file and keylist.")
             need.add(self._keylistFile.getRelativePath())
             need.add(self._timestampFile.getRelativePath())
-            return need
+            return need, False
 
         # FINALLY, we know we have an up-to-date, signed timestamp
         # file.  Check whether the keys and mirrors file are as
@@ -375,7 +381,7 @@ class LocalRepository:
             need.add(self._keylistFile.getRelativePath())
 
         if need:
-            return need
+            return need, False
 
         s = self._mirrorlistFile.checkSignatures()
         if not s.isValid():
@@ -389,7 +395,7 @@ class LocalRepository:
             need.add(self._mirrorlistFile.getRelativePath())
 
         if need:
-            return need
+            return need, False
 
         # Okay; that's it for the metadata.  Do we have the right
         # bundles?
@@ -459,11 +465,35 @@ class LocalRepository:
                 s = pfile.checkSignatures()
                 if not s.isValid():
                     logging.warn("Package hash was as expected, but signature "
-                                 "did nto match")
+                                 "did not match")
                     # Can't use it.
                     continue
                 packages[rp] = pfile
 
+        # We have the packages. If we're downloading via bittorrent, we need
+        # the .torrent metafiles, as well.
+        if thandy.bt_compat.BtCompat.shouldUseBt():
+            btcomp = thandy.bt_compat.BtCompat()
+            for pfile in packages.values():
+                package = pfile.get()
+                for f in package['files']:
+                    rp = btcomp.getBtMetadataLocation(pfile.getRelativePath(),f[:1][0])
+                    try:
+                        l = btcomp.getFileLength(self.getFilename(rp))
+                    except IOError:
+                        need.add(rp)
+                        continue
+                    # XXX The following sanity check is a weak hack.
+                    # In reality, we want to check a signature here.
+                    if l != f[3:4][0]:
+                        # We got a bad .torrent file. Disable BitTorrent.
+                        logging.warn("Disable BitTorrent, bad metadata file!")
+                        thandy.bt_compat.BtCompat.setUseBt(False)
+                    btMetadataDict[f[:1][0]] = rp
+
+        if need:
+            return need, False
+
         # Finally, we have some packages.  Do we have their underlying
         # files?
         for pfile in packages.values():
@@ -514,8 +544,12 @@ class LocalRepository:
                     logging.info("Hash for %s not as expected; must load.", rp)
                     need.add(rp)
                 else:
+                    # XXX What if not? Maybe this should always be true.
+                    # if that works, we can get rid of the second return
+                    # value and just use installableDict from the caller.
                     if pkgItems.has_key(rp):
                         installableDict.setdefault(pkg_rp, {})[rp] = pkgItems[rp]
 
+
         # Okay; these are the files we need.
-        return need
+        return need, True
-- 
1.5.6.5



More information about the tor-commits mailing list