[or-cvs] r19289: {torflow} Wrap pickle in some error handling to avoid botched saves+re (torflow/trunk/NetworkScanners)

mikeperry at seul.org mikeperry at seul.org
Sat Apr 11 13:05:37 UTC 2009


Author: mikeperry
Date: 2009-04-11 09:05:37 -0400 (Sat, 11 Apr 2009)
New Revision: 19289

Modified:
   torflow/trunk/NetworkScanners/libsoat.py
   torflow/trunk/NetworkScanners/snakeinspector.py
   torflow/trunk/NetworkScanners/soat.py
Log:

Wrap pickle in some error handling to avoid botched
saves+reloads and to better encapsulate our
depickle_upgrade() versioning pattern.



Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py	2009-04-11 12:06:27 UTC (rev 19288)
+++ torflow/trunk/NetworkScanners/libsoat.py	2009-04-11 13:05:37 UTC (rev 19289)
@@ -195,10 +195,7 @@
 
   def __str__(self):
     ret = TestResult.__str__(self)
-    ssl_file = open(self.ssl_file, 'r')
-    ssl_domain = pickle.load(ssl_file)
-    ssl_domain.depickle_upgrade()
-    ssl_file.close()
+    ssl_domain = SnakePickler.load(self.ssl_file)
     ret += " Rotates: "+str(ssl_domain.cert_rotates)
     ret += " Changed: "+str(ssl_domain.cert_changed)+"\n" 
     if self.verbose:
@@ -448,8 +445,7 @@
 
       if soup and tor_soup and old_soup:
         if self.soupdiffer and os.path.exists(self.soupdiffer):
-          soupdiff = pickle.load(open(self.soupdiffer, 'r'))
-          soupdiff.depickle_upgrade()
+          soupdiff = SnakePickler.load(self.soupdiffer)
         else:
           soupdiff = SoupDiffer(old_soup, soup)
 
@@ -597,18 +593,13 @@
     for root, dirs, files in os.walk(rdir):
       for f in files:
         if f.endswith('.result'):
-          fh = open(os.path.join(root, f))
-          result = pickle.load(fh)
-          result.depickle_upgrade()
+          result = SnakePickler.load(os.path.join(root, f))
           result.rebase(self.data_dir)
           results.append(result)
     return results
 
   def getResult(self, file):
-    fh = open(file, 'r')
-    res = pickle.load(fh)
-    res.depickle_upgrade()
-    return res
+    return SnakePickler.load(file)
 
   def uniqueFilename(afile):
     (prefix,suffix)=os.path.splitext(afile)
@@ -656,9 +647,7 @@
   def saveResult(self, result):
     ''' generic method for saving test results '''
     result.filename = self.__resultFilename(result)
-    result_file = open(result.filename, 'w')
-    pickle.dump(result, result_file)
-    result_file.close()
+    SnakePickler.dump(result, result.filename)
 
   def __testFilename(self, test, position=-1):
     if position == -1:
@@ -673,19 +662,14 @@
       while os.path.exists(filename+"."+str(i)+".test"):
         i+=1
       position = i-1
-
-    test_file = open(filename+"."+str(position)+".test", 'r')
-    test = pickle.load(test_file)
-    test.depickle_upgrade()
-    test_file.close()
+    
+    test = SnakePickler.load(filename+"."+str(position)+".test")
     return test
 
   def saveTest(self, test):
     if not test.filename:
       test.filename = self.__testFilename(test)
-    test_file = open(test.filename, 'w')
-    pickle.dump(test, test_file)
-    test_file.close()
+    SnakePickler.dump(test, test.filename)
 
 # These three bits are needed to fully recursively strain the parsed soup.
 # For some reason, the SoupStrainer does not get applied recursively..
@@ -733,6 +717,48 @@
     soup.append(tag)
   return soup      
 
+class SnakePickler:
+  def dump(obj, filename):
+    if not "depickle_upgrade" in dir(obj.__class__):
+      plog("WARN", "Pickling instance of "+obj.__class__.__name__+" without upgrade method")
+    f = file(filename, "w")
+    try:
+      pickle.dump(obj, f)
+    except KeyboardInterrupt:
+      finished = False
+      while not finished:
+        try:
+          f.close()
+          f = file(filename, "w")
+          pickle.dump(obj, f)
+          f.close()
+          finished = True
+        except KeyboardIterrupt:
+          pass
+      raise KeyboardInterrupt
+    except Exception, e:
+      plog("WARN", "Exception during pickle dump: "+e)
+      try:
+        os.unlink(filename)
+      except: pass
+    f.close()
+  dump = Callable(dump)
+
+  def load(filename):
+    f = file(filename, "r")
+    try:
+      obj = pickle.load(f)
+    except Exception, e:
+      plog("WARN", "Error loading object from "+filename+": "+str(e))
+      return None
+    if not "depickle_upgrade" in dir(obj.__class__):
+      plog("WARN", "De-pickling instance of "+obj.__class__.__name__+" without upgrade method")
+    else:
+      obj.depickle_upgrade()
+    f.close()
+    return obj
+  load = Callable(load)
+     
 class SoupDiffer:
   """ Diff two soup tag sets, optionally writing diffs to outfile. """
   def __init__(self, soup_old, soup_new):

Modified: torflow/trunk/NetworkScanners/snakeinspector.py
===================================================================
--- torflow/trunk/NetworkScanners/snakeinspector.py	2009-04-11 12:06:27 UTC (rev 19288)
+++ torflow/trunk/NetworkScanners/snakeinspector.py	2009-04-11 13:05:37 UTC (rev 19289)
@@ -22,6 +22,11 @@
 
 TorCtl.TorUtil.loglevel="NOTICE"
 
+if TorCtl.TorUtil.loglevels[TorCtl.TorUtil.loglevel] > TorCtl.TorUtil.loglevels["INFO"]:
+  # Kill stderr (jsdiffer and exception noise) if our loglevel is above INFO
+  sys.stderr = file("/dev/null", "w")
+
+
 def usage(argv):
   print "Usage: "+argv[0]+" with 0 or more of the following filters: "
   print "  --dir <datadir>"
@@ -124,6 +129,8 @@
        (not resultfilter or r.__class__.__name__ == resultfilter):
       try:
         print r
+      except KeyboardInterrupt:
+        raise KeyboardInterrupt
       except IOError, e:
         traceback.print_exc()
       except Exception, e:

Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-04-11 12:06:27 UTC (rev 19288)
+++ torflow/trunk/NetworkScanners/soat.py	2009-04-11 13:05:37 UTC (rev 19289)
@@ -45,7 +45,6 @@
 import sha
 import Queue
 import threading
-import pickle
 
 from libsoat import *
 
@@ -790,9 +789,7 @@
       added_cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
       self.cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
 
-      header_file = open(content_prefix+'.headerdiff', 'r')
-      headerdiffer = pickle.load(header_file)
-      header_file.close()
+      headerdiffer = SnakePickler.load(content_prefix+'.headerdiff')
 
       content = None
       mime_type = None 
@@ -822,11 +819,9 @@
       content_file = open(content_prefix+'.content', 'w')
       content_file.write(content)
       content_file.close()
-
-      header_file = open(content_prefix+'.headerdiff', 'w')
+      
       headerdiffer = HeaderDiffer(resp_headers)
-      pickle.dump(headerdiffer, header_file)
-      header_file.close()
+      SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
       
       # Need to do set subtraction and only save new cookies.. 
       # or extract/make_cookies
@@ -996,9 +991,7 @@
     headerdiffer.prune_differences(resp_headers_new)
     hdiffs = headerdiffer.show_differences(presp_headers)
 
-    header_file = open(content_prefix+'.headerdiff', 'w')
-    pickle.dump(headerdiffer, header_file)
-    header_file.close()
+    SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
 
     sha1sum_new = sha.sha(content_new)
 
@@ -1307,14 +1300,13 @@
 
     if os.path.exists(content_prefix+".jsdiff"):
       plog("DEBUG", "Loading jsdiff for "+address)
-      jsdiff = pickle.load(open(content_prefix+".jsdiff", 'r'))
-      jsdiff.depickle_upgrade()
+      jsdiff = SnakePickler.load(content_prefix+".jsdiff")
     else:
       plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
       jsdiff = JSDiffer(orig_js)
     
     jsdiff.prune_differences(new_js)
-    pickle.dump(jsdiff, open(content_prefix+".jsdiff", 'w'))
+    SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
 
     has_js_changes = jsdiff.contains_differences(tor_js)
 
@@ -1415,14 +1407,13 @@
     #    were added to additional tags
     if os.path.exists(content_prefix+".soupdiff"):
       plog("DEBUG", "Loading soupdiff for "+address)
-      soupdiff = pickle.load(open(content_prefix+".soupdiff", 'r'))
-      soupdiff.depickle_upgrade()
+      soupdiff = SnakePickler.load(content_prefix+".soupdiff")
       soupdiff.prune_differences(new_soup)
     else:
       plog("DEBUG", "No soupdiff for "+address+". Creating+dumping")
       soupdiff = SoupDiffer(orig_soup, new_soup)
 
-    pickle.dump(soupdiff, open(content_prefix+".soupdiff", 'w'))
+    SnakePickler.dump(soupdiff, content_prefix+".soupdiff")
     
     more_tags = soupdiff.show_changed_tags(tor_soup)     
     more_attrs = soupdiff.show_changed_attrs(tor_soup)
@@ -1443,14 +1434,13 @@
     if false_positive:
       if os.path.exists(content_prefix+".jsdiff"):
         plog("DEBUG", "Loading jsdiff for "+address)
-        jsdiff = pickle.load(open(content_prefix+".jsdiff", 'r'))
-        jsdiff.depickle_upgrade()
+        jsdiff = SnakePickler.load(content_prefix+".jsdiff")
       else:
         plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
         jsdiff = JSSoupDiffer(orig_soup)
       
       jsdiff.prune_differences(new_soup)
-      pickle.dump(jsdiff, open(content_prefix+".jsdiff", 'w'))
+      SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
 
       differences = jsdiff.show_differences(tor_soup)
       false_positive = not differences
@@ -1580,10 +1570,7 @@
     # load the original cert and compare
     # if we don't have the original cert yet, get it
     try:
-      ssl_file = open(ssl_file_name, 'r')
-      ssl_domain = pickle.load(ssl_file)
-      ssl_domain.depickle_upgrade()
-      ssl_file.close()
+      ssl_domain = SnakePickler.load(ssl_file_name)
     except IOError:
       ssl_domain = SSLDomain(address)
 
@@ -1607,9 +1594,7 @@
       return TEST_INCONCLUSIVE
 
     if self._update_cert_list(ssl_domain, check_ips):
-      ssl_file = open(ssl_file_name, 'w')
-      pickle.dump(ssl_domain, ssl_file)
-      ssl_file.close()
+      SnakePickler.dump(ssl_domain, ssl_file_name)
 
     if not ssl_domain.cert_map:
       plog('WARN', 'Error getting the correct cert for ' + address)
@@ -1620,9 +1605,7 @@
       ssl_domain = SSLDomain(address)
       plog('INFO', 'Fetching all new certs for '+address)
       if self._update_cert_list(ssl_domain, check_ips):
-        ssl_file = open(ssl_file_name, 'w')
-        pickle.dump(ssl_domain, ssl_file)
-        ssl_file.close()
+        SnakePickler.dump(ssl_domain, ssl_file_name)
       if ssl_domain.cert_changed:
         plog("NOTICE", "Fully dynamic certificate host "+address)
 



More information about the tor-commits mailing list