[tor-commits] [ooni-probe/master] Refactor verification of input file and implement caching

art at torproject.org art at torproject.org
Tue Aug 27 09:21:51 UTC 2013


commit fb8a90c6f2ec136eb477bbb2bf91d61456e66c80
Author: Arturo Filastò <art at fuffa.org>
Date:   Thu Aug 22 17:38:38 2013 +0200

    Refactor verification of input file and implement caching
---
 ooni/deck.py                   |   28 +++---------
 ooni/oonibclient.py            |   93 ++++++++++++++++++++++++++++++++--------
 ooni/tests/test_oonibclient.py |    2 +-
 3 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/ooni/deck.py b/ooni/deck.py
index f99916b..ef1b58a 100644
--- a/ooni/deck.py
+++ b/ooni/deck.py
@@ -1,6 +1,5 @@
 #-*- coding: utf-8 -*-
 
-from hashlib import sha1
 from ooni.nettest import NetTestLoader
 from ooni.settings import config
 from ooni.utils.txagentwithsocks import Agent
@@ -10,14 +9,6 @@ import os
 import re
 import yaml
 
-def verifyFile(filePath):
-    # get the filename component of the file path
-    digest = os.path.basename(filePath)
-    with open(filePath) as f:
-        sha1digest = sha1(f.read())
-        return sha1digest.hexdigest() == digest
-    return False
-
 class Deck(object):
     def __init__(self, oonibclient, deckFile=None):
         self.netTestLoaders = []
@@ -51,17 +42,10 @@ class Deck(object):
             if 'url' in input_file:
                 oonib = OONIBClient(input_file['address'])
 
-                cached_input_dir = os.path.join(config.advanced.data_dir,
-                        'inputs')
-                cached_path = os.path.join(cached_input_dir, input_file['hash'])
-                self.inputs.append(cached_path)
-
-                if os.path.exists(cached_path) and verifyFile(cached_path):
-                        test_class.localOptions[inputArg] = cached_path
-                        continue
-                yield oonib.downloadInput(input_file['hash'], cached_path)
-                if verifyFile(cached_path):
-                    test_class.localOptions[input_file['key']] = cached_path
-                    continue
+                input_file = yield oonib.downloadInput(input_file['hash'])
+                try:
+                    input_file.verify()
+                except AssertionError:
+                    raise UnableToLoadDeckInput, cached_path
                 
-                raise UnableToLoadDeckInput, cached_path
+                test_class.localOptions[input_file['key']] = input_file.cached_file
diff --git a/ooni/oonibclient.py b/ooni/oonibclient.py
index 4f555b2..9665ee1 100644
--- a/ooni/oonibclient.py
+++ b/ooni/oonibclient.py
@@ -1,30 +1,67 @@
-from hashlib import sha256
-
+import os
 import json
 
+from hashlib import sha256
+
 from twisted.internet import defer, reactor
 from twisted.web.client import Agent
 
+from ooni.settings import config
 from ooni.utils import log
 from ooni.utils.net import BodyReceiver, StringProducer, Downloader
 
 class InputFile(object):
-    def __init__(self, descriptor):
-        self.id = descriptor['id']
+    def __init__(self, input_hash):
+        self.id = input_hash
+        cached_input_dir = os.path.join(config.advanced.data_dir,
+                'inputs')
+        cache_path = os.path.join(cached_input_dir, input_hash)
+        self.cached_file = cache_path
+        self.cached_descriptor = cache_path + '.desc'
+    
+    @property
+    def descriptorCached(self):
+        if os.path.exists(self.cached_descriptor):
+            with open(self.cached_descriptor) as f:
+                descriptor = json.load(f)
+                self.load(descriptor)
+            return True
+        return False
+    
+    @property
+    def fileCached(self):
+        if os.path.exists(self.cached_file):
+            try:
+                self.verify()
+            except AssertionError:
+                log.err("The input %s failed validation. Going to consider it not cached." % self.id)
+                return False
+            return True
+        return False
+
+    def save(self):
+        with open(self.cached_descriptor, 'w+') as f:
+            json.dump({
+                'name': self.name,
+                'id': self.id,
+                'version': self.version,
+                'author': self.author,
+                'date': self.date,
+                'description': self.description
+            }, f)
+
+    def load(self, descriptor):
         self.name = descriptor['name']
         self.version = descriptor['version']
         self.author = descriptor['author']
         self.date = descriptor['date']
         self.description = descriptor['description']
 
-        self.file_path = None
-
     def verify(self):
-        digest = os.path.basename(self.file_path)
-        with open(self.file_path) as f:
+        digest = os.path.basename(self.cached_file)
+        with open(self.cached_file) as f:
             file_hash = sha256(f.read())
-            return file_hash.hexdigest() == digest
-        return False
+            assert file_hash.hexdigest() == digest
 
 class OONIBClient(object):
     def __init__(self, address):
@@ -73,25 +110,47 @@ class OONIBClient(object):
         pass
 
     def getInput(self, input_hash):
-        try:
-            return defer.succeed(self.input_files[input_hash])
-        except KeyError:
+        input_file = InputFile(input_hash)
+        if input_file.descriptorCached:
+            return defer.succeed(input_file)
+        else:
             d = self.queryBackend('GET', '/input/' + input_hash)
+
             @d.addCallback
             def cb(descriptor):
-                self.input_files[input_hash] = InputFile(descriptor)
-                return self.input_files[input_hash]
+                input_file.load(descriptor)
+                input_file.save()
+                return input_file
+
             @d.addErrback
             def err(err):
                 log.err("Failed to get descriptor for input %s" % input_hash)
                 log.exception(err)
+
             return d
 
     def getInputList(self):
         return self.queryBackend('GET', '/input')
 
-    def downloadInput(self, input_hash, download_path):
-        return self.download('/input/'+input_hash+'/file', download_path)
+    def downloadInput(self, input_hash):
+        input_file = InputFile(input_hash)
+
+        if input_file.fileCached:
+            return defer.succeed(input_file)
+        else:
+            d = self.download('/input/'+input_hash+'/file', input_file.cached_file)
+
+            @d.addCallback
+            def cb(res):
+                input_file.verify()
+                return input_file
+
+            @d.addErrback
+            def err(err):
+                log.err("Failed to download the input file %s" % input_hash)
+                log.exception(err)
+
+            return d
 
     def getInputPolicy(self):
         pass
diff --git a/ooni/tests/test_oonibclient.py b/ooni/tests/test_oonibclient.py
index 02f9f8f..26e2f62 100644
--- a/ooni/tests/test_oonibclient.py
+++ b/ooni/tests/test_oonibclient.py
@@ -28,7 +28,7 @@ class TestOONIBClient(unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_download_input(self):
-        yield self.oonibclient.downloadInput(input_id, input_id)
+        yield self.oonibclient.downloadInput(input_id)
 
     @defer.inlineCallbacks
     def test_get_deck_list(self):





More information about the tor-commits mailing list