[or-cvs] r20279: {torflow} Begin refactoring, reorganzing, and streamlining soat in pre (in torflow/trunk/NetworkScanners: . BwAuthority ExitAuthority ExitAuthority/data ExitAuthority/data/soat ExitAuthority/libs data)

mikeperry at seul.org mikeperry at seul.org
Thu Aug 13 19:34:13 UTC 2009


Author: mikeperry
Date: 2009-08-13 15:34:12 -0400 (Thu, 13 Aug 2009)
New Revision: 20279

Added:
   torflow/trunk/NetworkScanners/ExitAuthority/
   torflow/trunk/NetworkScanners/ExitAuthority/README.ExitScanning
   torflow/trunk/NetworkScanners/ExitAuthority/data/
   torflow/trunk/NetworkScanners/ExitAuthority/data/soat/
   torflow/trunk/NetworkScanners/ExitAuthority/libs/
   torflow/trunk/NetworkScanners/ExitAuthority/libsoat.py
   torflow/trunk/NetworkScanners/ExitAuthority/snakeinspector.py
   torflow/trunk/NetworkScanners/ExitAuthority/soat.py
   torflow/trunk/NetworkScanners/ExitAuthority/soatstats.py
   torflow/trunk/NetworkScanners/ExitAuthority/wordlist.txt
Removed:
   torflow/trunk/NetworkScanners/README.ExitScanning
   torflow/trunk/NetworkScanners/data/soat/
   torflow/trunk/NetworkScanners/libs/
   torflow/trunk/NetworkScanners/libsoat.py
   torflow/trunk/NetworkScanners/snakeinspector.py
   torflow/trunk/NetworkScanners/soat.py
   torflow/trunk/NetworkScanners/soatstats.py
   torflow/trunk/NetworkScanners/wordlist.txt
Modified:
   torflow/trunk/NetworkScanners/BwAuthority/bwauthority.py
Log:

Begin refactoring, reorganzing, and streamlining soat in
preparation to eventually become an Exit Authority component.
Note this commit is broken and things may go in and out of
brokenness for the next while. Start using the stable branch
if this bothers you :)



Modified: torflow/trunk/NetworkScanners/BwAuthority/bwauthority.py
===================================================================
--- torflow/trunk/NetworkScanners/BwAuthority/bwauthority.py	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/BwAuthority/bwauthority.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -30,7 +30,7 @@
 #import gc
 #gc.set_debug(gc.DEBUG_COLLECTABLE|gc.DEBUG_UNCOLLECTABLE|gc.DEBUG_INSTANCES|gc.DEBUG_OBJECTS)
  
-from TorCtl import PathSupport,SQLSupport,TorCtl,TorUtil
+from TorCtl import ScanSupport,PathSupport,SQLSupport,TorCtl,TorUtil
 
 sys.path.append("../libs")
 from SocksiPy import socks
@@ -98,145 +98,32 @@
       #return "https://86.59.21.36/torbrowser/dist/tor-im-browser-1.2.0_ru_split/tor-im-browser-1.2.0_ru_split.part01.exe"
   raise PathSupport.NoNodesRemain("No nodes left for url choice!")
 
-# Note: be careful writing functions for this class. Remember that
-# the PathBuilder has its own thread that it recieves events on
-# independent from your thread that calls into here.
-class BwScanHandler(PathSupport.PathBuilder):
-  def get_exit_node(self):
-    return copy.copy(self.last_exit) # GIL FTW
+def http_request(address):
+  ''' perform an http GET-request and return 1 for success or 0 for failure '''
 
-  def attach_sql_listener(self, db_uri):
-    plog("DEBUG", "Got sqlite: "+db_uri)
-    SQLSupport.setup_db(db_uri, echo=False, drop=True)
-    self.sql_consensus_listener = SQLSupport.ConsensusTrackerListener()
-    self.add_event_listener(self.sql_consensus_listener)
-    self.add_event_listener(SQLSupport.StreamListener())
+  request = urllib2.Request(address)
+  request.add_header('User-Agent', user_agent)
 
-  def write_sql_stats(self, rfilename=None, stats_filter=None):
-    if not rfilename:
-      rfilename="./data/stats/sql-"+time.strftime("20%y-%m-%d-%H:%M:%S")
-    cond = threading.Condition()
-    def notlambda(h):
-      cond.acquire()
-      SQLSupport.RouterStats.write_stats(file(rfilename, "w"),
-                            0, 100, order_by=SQLSupport.RouterStats.sbw,
-                            recompute=True, disp_clause=stats_filter)
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
+  try:
+    reply = urllib2.urlopen(request)
+    decl_length = reply.info().get("Content-Length")
+    read_len = len(reply.read())
+    plog("DEBUG", "Read: "+str(read_len)+" of declared "+str(decl_length))
+    return 1
+  except (ValueError, urllib2.URLError):
+    plog('ERROR', 'The http-request address ' + address + ' is malformed')
+    return 0
+  except (IndexError, TypeError):
+    plog('ERROR', 'An error occured while negotiating socks5 with Tor')
+    return 0
+  except KeyboardInterrupt:
+    raise KeyboardInterrupt
+  except:
+    plog('ERROR', 'An unknown HTTP error occured')
+    traceback.print_exc()
+    return 0
 
-  def write_strm_bws(self, rfilename=None, slice_num=0, stats_filter=None):
-    if not rfilename:
-      rfilename="./data/stats/bws-"+time.strftime("20%y-%m-%d-%H:%M:%S")
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      f=file(rfilename, "w")
-      f.write("slicenum="+str(slice_num)+"\n")
-      SQLSupport.RouterStats.write_bws(f, 0, 100,
-                            order_by=SQLSupport.RouterStats.sbw,
-                            recompute=False, disp_clause=stats_filter)
-      f.close()
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-
-  def set_pct_rstr(self, percent_skip, percent_fast):
-    def notlambda(sm):
-      sm.percent_fast=percent_fast
-      sm.percent_skip=percent_skip
-    self.schedule_selmgr(notlambda)
-
-  def reset_stats(self):
-    def notlambda(this): 
-      this.reset()
-    self.schedule_low_prio(notlambda)
-
-  def save_sql_file(self, sql_file, new_file):
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      SQLSupport.tc_session.close()
-      try:
-        shutil.copy(sql_file, new_file)
-      except Exception,e:
-        plog("WARN", "Error moving sql file: "+str(e))
-      SQLSupport.reset_all()
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-
-  def commit(self):
-    plog("INFO", "Scanner committing jobs...")
-    cond = threading.Condition()
-    def notlambda2(this):
-      cond.acquire()
-      this.run_all_jobs = False
-      plog("INFO", "Commit done.")
-      cond.notify()
-      cond.release()
-
-    def notlambda1(this):
-      plog("INFO", "Committing jobs...")
-      this.run_all_jobs = True
-      self.schedule_low_prio(notlambda2)
-
-    cond.acquire()
-    self.schedule_immediate(notlambda1)
-
-    cond.wait()
-    cond.release()
-    plog("INFO", "Scanner commit done.")
-
-  def close_circuits(self):
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      this.close_all_circuits()
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-
-  def close_streams(self, reason):
-    cond = threading.Condition()
-    plog("NOTICE", "Wedged Tor stream. Closing all streams")
-    def notlambda(this):
-      cond.acquire()
-      this.close_all_streams(reason)
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-
-  def new_exit(self):
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      this.new_nym = True
-      lines = this.c.sendAndRecv("SIGNAL CLEARDNSCACHE\r\n")
-      for _,msg,more in lines:
-        plog("DEBUG", msg)
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-
+class BwScanHandler(ScanSupport.ScanHandler):
   def is_count_met(self, count, position=0):
     cond = threading.Condition()
     cond._finished = True # lol python haxx. Could make subclass, but why?? :)
@@ -266,76 +153,6 @@
     cond.release()
     return cond._finished
 
-  def rank_to_percent(self, rank):
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      cond._pct = (100.0*rank)/len(this.sorted_r) # lol moar haxx
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-    return cond._pct
-
-  def percent_to_rank(self, pct):
-    cond = threading.Condition()
-    def notlambda(this):
-      cond.acquire()
-      cond._rank = int(round((pct*len(this.sorted_r))/100.0,0)) # lol moar haxx
-      cond.notify()
-      cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-    return cond._rank
-
-  def wait_for_consensus(self):
-    cond = threading.Condition()
-    def notlambda(this):
-      if this.sql_consensus_listener.last_desc_at \
-                 != SQLSupport.ConsensusTrackerListener.CONSENSUS_DONE:
-        this.sql_consensus_listener.wait_for_signal = False
-        plog("INFO", "Waiting on consensus result: "+str(this.run_all_jobs))
-        this.schedule_low_prio(notlambda)
-      else:
-        cond.acquire()
-        this.sql_consensus_listener.wait_for_signal = True
-        cond.notify()
-        cond.release()
-    cond.acquire()
-    self.schedule_low_prio(notlambda)
-    cond.wait()
-    cond.release()
-    plog("INFO", "Consensus OK")
-
-def http_request(address):
-  ''' perform an http GET-request and return 1 for success or 0 for failure '''
-
-  request = urllib2.Request(address)
-  request.add_header('User-Agent', user_agent)
-
-  try:
-    reply = urllib2.urlopen(request)
-    decl_length = reply.info().get("Content-Length")
-    read_len = len(reply.read())
-    plog("DEBUG", "Read: "+str(read_len)+" of declared "+str(decl_length))
-    return 1
-  except (ValueError, urllib2.URLError):
-    plog('ERROR', 'The http-request address ' + address + ' is malformed')
-    return 0
-  except (IndexError, TypeError):
-    plog('ERROR', 'An error occured while negotiating socks5 with Tor')
-    return 0
-  except KeyboardInterrupt:
-    raise KeyboardInterrupt
-  except:
-    plog('ERROR', 'An unknown HTTP error occured')
-    traceback.print_exc()
-    return 0 
-
 def speedrace(hdlr, start_pct, stop_pct, circs_per_node, save_every, out_dir,
               max_fetch_time, sleep_start_tp, sleep_stop_tp, slice_num,
               min_streams, sql_file):

Copied: torflow/trunk/NetworkScanners/ExitAuthority/README.ExitScanning (from rev 20213, torflow/trunk/NetworkScanners/README.ExitScanning)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/README.ExitScanning	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/README.ExitScanning	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,209 @@
+              How to run the Snakes on a Tor Exit Scanner
+
+
+
+I. Introduction
+
+The Snakes on a Tor Exit Scanner scans the Tor network for misbehaving
+and misconfigured exit nodes. It has several tests that it performs,
+including HTML, javascript, arbitrary HTTP, SSL and DNS scans. The
+mechanisms by which these scans operate will be covered in another
+document. This document concerns itself only with running the scanner.
+
+
+
+II. Prerequisites
+
+Python 2.4+
+Tor 0.2.1.13 (r18556 or later)
+Super Secret SoaT Sauce
+py-openssl/pyOpenSSL
+Bonus: Secondary external IP address
+
+Having a second external IP address will allow your scanner to filter
+out false positives for dynamic pages that arise due to pages encoding
+your IP address in documents.
+
+
+
+III. Setup
+
+A. Compiling Tor
+
+To run SoaT you will need Tor 0.2.1.13. SVN r18516 contains a timeout
+fix that is essential to scanning the network in any reasonable amount
+of time.
+
+It is also strongly recommended that you have a custom Tor instance that
+is devoted only to exit scanning, and is not performing any other
+function (including serving as a relay or a directory authority).
+
+
+B. Configuring SoaT
+
+To configure SoaT (and even to get it to run), you will need to obtain
+Super Secret SoaT Sauce from Mike Perry's Super Secret SoaT Sauce Stash.
+It contains the necessary pheromones you will need to enable you to
+properly hunt some motherfuckin snakes.
+
+Once you have the Sauce, you should copy it to soat_config.py and have a
+look at its contents. In particular, you'll want to change 'refetch_ip'
+to be set to your secondary IP address. If you don't have a secondary
+IP, set it to None.
+
+If you're feeling ambitious, you can edit soat_config.py to change the
+set of 'scan_filetypes' and increase 'max_content_size' to something
+large enough to support these filetypes. However, you should balance
+this with our more immediate need for the scanner to run quickly so that
+the code is exercised and can stabilize quickly.
+
+You'll also want to edit ./wordlist.txt and change its contents to be a
+smattering of random and/or commonly censored words. If you speak other
+languages (especially any that have unicode characters), using keywords
+from them would be especially useful for testing and scanning. Note that
+these queries WILL be issued in plaintext via non-Tor, and the resulting
+urls fetched via non-Tor as well, so bear that and your server's legal
+jurisdiction in mind when choosing keywords.
+
+You can also separate out the wordlist.txt file into three files by
+changing the soat_config.py settings 'filetype_wordlist_file',
+'filetype_wordlist_file', and 'filetype_wordlist_file'. This will allow
+you to use separate keywords for obtaining SSL, HTML, and Filetype
+urls. This can be useful if you believe it likely for an adversary to
+target only certain keywords/concepts/sites in a particular context.
+
+You can edit the contents of the wordlist files while SoaT runs. It will
+pick up the changes after it completes a full network scan with the old 
+list.
+
+
+IV. Running Tor, The Metatroller, and SoaT
+
+Once you have everything compiled and configured, you should be ready to
+run the pieces. You probably want to do this as a separate, unprivileged
+user.
+
+First, start up your custom Tor with the sample torrc provided in the
+TorFlow svn root:
+
+# ~/src/tor-trunk/src/or/tor -f ~/src/torflow-trunk/torrc >& tor.log &
+
+Then, start up the Metatroller:
+
+# ~/src/torflow-trunk/metatroller.py >& mt.log &
+
+Finally, start up SoaT:
+
+# ./soat.py --ssl --html --http --dnsrebind >& soat.log &
+
+
+If your machine is shared, you probably want to set a control port 
+password. The location for this for metatroller and SoaT is the 
+control_pass variable at the top of torflow-trunk/TorCtl/TorUtil.py.
+Of course you also have to set it in the custom torrc as well.
+
+
+V. Monitoring and Results
+
+A. Watching for Captcha Problems
+
+You'll need to keep an eye on the beginning of the soat.log to make sure
+it is actually retrieving urls from Google. Google's servers can
+periodically decide that you are not worthy to query them, especially if
+you restart soat several times in a row. If this happens, open up
+soat_config.py and change the line:
+
+default_search_mode = google_search_mode
+
+to
+
+default_search_mode = yahoo_search_mode
+
+and remove the --ssl from the soat command line until Google decides it
+hates you a little less (this usually takes less than a day). The SSL
+scanner is hardcoded to use google_search_mode regardless of the
+default_search_mode because Yahoo's "inurl:" modifier does not apply to
+the scheme of the url, which we need in order to obtain fresh https
+urls.
+
+It is possible changing that default_search_mode to yahoo_search_mode
+BEFORE Google starts to hate you while still using --ssl will allow you
+to restart soat more times than with just Google alone, but then if both
+Yahoo and Google begin to hate you, you can't scan at all.
+
+
+B. Handling Crashes
+
+At this stage in the game, your primary task will be to periodically
+check the scanner for exceptions and hangs. For that you'll just want
+to tail the soat.log file to make sure it is putting out recent loglines
+and is continuing to run. If there are any issues, please mail me your
+soat.log.
+
+If/When SoaT crashes, you should be able to resume it exactly where it
+left off with:
+
+# ./soat.py --resume --ssl --html --http --dnsrebind >& soat.log &
+
+Keeping the same options during a --resume is a Really Good Idea.
+
+Soat actually saves a snapshot to a unique name each time you run it
+without --resume, so you can suspend and resume arbitrary runs by
+specifying their number:
+
+# ls ./data/soat/
+# ./soat.py --resume 2 --ssl --html --http --dnsrebind >& soat.log &
+
+
+C. Handling Results
+
+As things stabilize, you'll want to begin grepping your soat.log for
+ERROR lines. These indicate serious scanning errors and content
+modifications. There will likely be false positives at first, and these
+will require you tar up your ./data directory and soat.log and send it
+to me to improve the filters for them:
+
+# tar -jcf soat-data.tbz2 ./data/soat ./soat.log
+
+If you're feeling adventurous, you can inspect the results yourself by
+running snakeinspector.py. Running it with no arguments will dump all
+failures to your screen in a semi-human readable format. You can add a
+--verbose to get unified diffs of content modifications, and you can
+filter on specific Test Result types with --resultfilter, and on
+specific exit idhexes with --exit. Ex:
+
+# ./snakeinspector.py --verbose --exit 80972D30FE33CB8AD60726C5272AFCEBB05CD6F7
+   --resultfilter SSLTestResult 
+
+or just:
+
+# ./snakeinspector.py | less
+
+At some point in the future, I hope to have a script prepared that will
+mail false positives and actual results to me when you run it. Later
+still, soat will automatically mail these results to an email list we
+are all subscribed to as they happen.
+
+
+D. Verifying Results
+
+If you would like to verify a set of results, you can use the --rescan
+option of soat, which crawls your data directory and creates a list of
+nodes to scan that consist only of failures, and then scans those with
+fresh URLs:
+
+# ./soat.py --rescan --ssl --html --http --dnsrebind >& soat.log &
+
+Rescans can also be resumed with --resume should they fail.
+
+SoaT can also do a rescan at the end of every loop through the node
+list. This is governed by the rescan_at_finish soat_config option.
+
+Note that rescanning does not prune out geolocated URLs that differ
+across the majority of exit nodes. It can thus cause many more false
+positives to accumulate than a regular scan.
+
+
+
+Alright that covers the basics. Let's get those motherfuckin snakes off
+this motherfuckin Tor!


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/README.ExitScanning
___________________________________________________________________
Added: svn:mergeinfo
   + 


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/data/soat
___________________________________________________________________
Added: svn:mergeinfo
   + 


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/libs
___________________________________________________________________
Added: svn:externals
   + 

Added: svn:mergeinfo
   + 

Copied: torflow/trunk/NetworkScanners/ExitAuthority/libsoat.py (from rev 20213, torflow/trunk/NetworkScanners/libsoat.py)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/libsoat.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/libsoat.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,1077 @@
+#!/usr/bin/python
+#
+# Common code to soat
+
+import operator
+import os
+import pickle
+import sys
+import time
+import traceback
+import difflib
+import re
+import copy
+import socket
+import struct
+sys.path.append("./libs")
+from OpenSSL import crypto
+from BeautifulSoup.BeautifulSoup import Tag, SoupStrainer
+
+import sets
+from sets import Set
+from soat_config import *
+
+sys.path.append("../")
+from TorCtl.TorUtil import *
+
+# Antlr stuff
+sys.path.append("./libs/jsparser/")
+import antlr3
+from JavaScriptParser import tokenNames as JSTokenNames
+from JavaScriptLexer import JavaScriptLexer
+from JavaScriptParser import JavaScriptParser
+
+class LoggingJSParser(JavaScriptParser):
+  def __init__(self, tokens):
+    JavaScriptParser.__init__(self, tokens)
+    self.parse_errors__ = []
+  def displayRecognitionError(self, tokens, e):
+    self.parse_errors__.append(e)
+    JavaScriptParser.displayRecognitionError(self, tokens, e)
+class LoggingJSLexer(JavaScriptLexer):
+  def __init__(self, tokens):
+    JavaScriptLexer.__init__(self, tokens)
+    self.lex_errors__ = []
+  def displayRecognitionError(self, tokens, e):
+    self.lex_errors__.append(e)
+    JavaScriptLexer.displayRecognitionError(self, tokens, e)
+
+# constants
+
+TEST_SUCCESS = 0
+TEST_INCONCLUSIVE = 1
+TEST_FAILURE = 2
+
+# Sorry, we sort of rely on the ordinal nature of the above constants
+RESULT_STRINGS = {TEST_SUCCESS:"Success", TEST_INCONCLUSIVE:"Inconclusive", TEST_FAILURE:"Failure"}
+RESULT_CODES=dict([v,k] for k,v in RESULT_STRINGS.iteritems())
+
+# Inconclusive reasons
+INCONCLUSIVE_NOLOCALCONTENT = "InconclusiveNoLocalContent"
+INCONCLUSIVE_DYNAMICSSL = "InconclusiveDynamicSSL"
+INCONCLUSIVE_TORBREAKAGE = "InconclusiveTorBreakage"
+INCONCLUSIVE_NOEXIT = "InconclusiveNoExit"
+
+# Failed reasons
+FAILURE_EXITONLY = "FailureExitOnly"
+FAILURE_DYNAMIC = "FailureDynamic" 
+FAILURE_COOKIEMISMATCH = "FailureCookieMismatch"
+FAILURE_BADHTTPCODE = "FailureBadHTTPCode"
+FAILURE_NOEXITCONTENT = "FailureNoExitContent"
+FAILURE_EXITTRUNCATION = "FailureExitTruncation"
+FAILURE_SOCKSERROR = "FailureSocksError"
+FAILURE_HOSTUNREACH = "FailureHostUnreach" # aka DNS issue
+FAILURE_NETUNREACH = "FailureNetUnreach"
+FAILURE_EXITPOLICY = "FailureExitPolicy"
+FAILURE_CONNREFUSED = "FailureConnRefused"
+FAILURE_CONNERROR = "FailureConnError"
+FAILURE_URLERROR = "FailureURLError"
+FAILURE_CRYPTOERROR = "FailureCryptoError"
+FAILURE_TIMEOUT = "FailureTimeout"
+FAILURE_HEADERCHANGE = "FailureHeaderChange"
+FAILURE_MISCEXCEPTION = "FailureMiscException"
+
+# False positive reasons
+FALSEPOSITIVE_HTTPERRORS = "FalsePositiveHTTPErrors"
+FALSEPOSITIVE_DYNAMIC = "FalsePositiveDynamic"
+FALSEPOSITIVE_DYNAMIC_TOR = "FalsePositiveDynamicTor"
+FALSEPOSITIVE_DEADSITE = "FalsePositiveDeadSite"
+
+# classes to use with pickle to dump test results into files
+
+class TestResult(object):
+  ''' Parent class for all test result classes '''
+  def __init__(self, exit_obj, site, status, reason=None):
+    if exit_obj:
+      self.exit_node = exit_obj.idhex
+      self.exit_name = exit_obj.nickname
+      self.exit_ip = exit_obj.ip
+      self.contact = exit_obj.contact
+    else:
+      self.exit_node = "[No Exit Used]"
+      self.exit_name = ""
+      self.exit_ip = 0
+      self.contact = "[No Exit Used]"
+    self.exit_obj = exit_obj
+    self.site = site
+    self.timestamp = time.time()
+    self.status = status
+    self.reason = reason
+    self.extra_info = None
+    self.false_positive=False
+    self.false_positive_reason="None"
+    self.verbose=0
+    self.from_rescan = False
+    self.filename=None
+    self._pickle_revision = 5
+
+  def depickle_upgrade(self):
+    if not "_pickle_revision" in self.__dict__: # upgrade to v0
+      self._pickle_revision = 0
+    if self._pickle_revision < 1:
+      self._pickle_revision = 1
+    if self._pickle_revision < 2:
+      self._pickle_revision = 2
+      self.exit_name = "NameNotStored!"
+    if self._pickle_revision < 3:
+      self._pickle_revision = 3
+      self.exit_ip = "\x00\x00\x00\x00"
+      self.exit_obj = None
+    if self._pickle_revision < 4:
+      self._pickle_revision = 4
+      self.contact = None
+    if self._pickle_revision < 5:
+      self._pickle_revision = 5
+      if type(self.exit_ip) == str or not self.exit_ip: self.exit_ip = 0
+
+
+  def _rebase(self, filename, new_data_root):
+    if not filename: return filename
+    filename = os.path.normpath(filename)
+    split_file = filename.split("/")
+    return os.path.normpath(os.path.join(new_data_root, *split_file[1:]))
+
+  def rebase(self, new_data_root):
+    self.filename = self._rebase(self.filename, new_data_root)
+ 
+  def mark_false_positive(self, reason):
+    self.false_positive=True
+    self.false_positive_reason=reason
+
+  def move_file(self, file, to_dir):
+    if not file: return None
+    try:
+      basename = os.path.basename(file)
+      new_file = to_dir+basename
+      if not os.path.exists(file) and os.path.exists(new_file):
+        return new_file # Already moved by another test (ex: content file)
+      os.rename(file, new_file)
+      return new_file
+    except Exception, e:
+      traceback.print_exc()
+      plog("WARN", "Error moving "+file+" to "+to_dir)
+      return file
+
+  def __str__(self):
+    ret = self.__class__.__name__+" for "+self.site+"\n"
+    ret += " Time: "+time.ctime(self.timestamp)+"\n"
+    ret += " Exit: "+socket.inet_ntoa(struct.pack(">I",self.exit_obj.ip))+" "+self.exit_node+" ("+self.exit_name+")\n"
+    ret += " Contact: "+str(self.contact)+"\n"  
+    ret += " "+str(RESULT_STRINGS[self.status])
+    if self.reason:
+      ret += " Reason: "+self.reason
+    if self.extra_info:
+      ret += "\n Extra info: "+self.extra_info 
+    if self.false_positive:
+      ret += "\n Removed as False Positive: "+self.false_positive_reason
+    if self.from_rescan:
+      ret += "\n From rescan: "+str(self.from_rescan)
+    ret += "\n"
+    return ret
+
+class SSLTestResult(TestResult):
+  ''' Represents the result of an openssl test '''
+  def __init__(self, exit_obj, ssl_site, ssl_file, status, 
+               reason=None, exit_ip=None, exit_cert_pem=None):
+    super(SSLTestResult, self).__init__(exit_obj, ssl_site, status, reason)
+    self.ssl_file = ssl_file
+    self.exit_cert = exit_cert_pem # Meh, not that much space
+    self.exit_ip = exit_ip # XXX: Wrong!
+    self.proto = "ssl"
+
+  def rebase(self, new_data_root):
+    self.ssl_file = self._rebase(self.ssl_file, new_data_root)
+
+  def mark_false_positive(self, reason):
+    TestResult.mark_false_positive(self, reason)
+    self.ssl_file=self.move_file(self.ssl_file, ssl_falsepositive_dir)
+
+  def _dump_cert(self, cert):
+    ret = ""
+    x509 = crypto.load_certificate(crypto.FILETYPE_PEM, cert)
+    ret += "Issuer: "+str(x509.get_issuer())+"\n"
+    ret += "Subject: "+str(x509.get_subject())+"\n"
+    return ret
+
+  def __str__(self):
+    ret = TestResult.__str__(self)
+    ssl_domain = SnakePickler.load(self.ssl_file)
+    ret += " Rotates: "+str(ssl_domain.cert_rotates)
+    ret += " Changed: "+str(ssl_domain.cert_changed)+"\n" 
+    if self.verbose:
+      if self.exit_cert:
+        for cert in ssl_domain.cert_map.iterkeys():
+          ret += "\nCert for "+ssl_domain.cert_map[cert]+":\n"
+          if self.verbose > 1: ret += cert
+          ret += self._dump_cert(cert)
+        if self.exit_ip: 
+          ret += "\nExit node's cert for "+self.exit_ip+":\n"
+        else:
+          ret += "\nExit node's cert:\n"
+        if self.verbose > 1: ret += self.exit_cert
+        ret += self._dump_cert(self.exit_cert)
+    return ret 
+
+class SSLDomain:
+  def __init__(self, domain):
+    self.domain = domain
+    self.cert_map = {}
+    self.ip_map = {}
+    self.cert_rotates = False
+    self.cert_changed = False
+
+  def depickle_upgrade(self):
+    pass
+
+  def add_cert(self, ip, cert_string):
+    if ip in self.ip_map and self.ip_map[ip] != cert_string:
+      plog("NOTICE", self.domain+" has changed certs.")
+      self.cert_changed = True
+    if len(self.cert_map) and cert_string not in self.cert_map:
+      plog("NOTICE", self.domain+" is rotating certs.")
+      self.cert_rotates = True
+    self.cert_map[cert_string] = ip
+    self.ip_map[ip] = cert_string
+    
+  def seen_cert(self, cert_string):
+    return cert_string in self.cert_map
+
+  def seen_ip(self, ip):
+    return ip in self.ip_map
+
+  def num_certs(self):
+    return len(self.cert_map)
+
+class HttpTestResult(TestResult):
+  ''' Represents the result of a http test '''
+  def __init__(self, exit_obj, website, status, reason=None, 
+               sha1sum=None, exit_sha1sum=None, content=None, 
+               content_exit=None, content_old=None, sha1sum_old=None):
+    super(HttpTestResult, self).__init__(exit_obj, website, status, reason)
+    self.proto = "http"
+    self.sha1sum = sha1sum
+    self.sha1sum_old = sha1sum_old
+    self.exit_sha1sum = exit_sha1sum
+    self.content = content
+    self.content_exit = content_exit
+    self.content_old = content_old
+
+  def rebase(self, new_data_root):
+    self.content = self._rebase(self.content, new_data_root)
+    self.content_exit = self._rebase(self.content_exit, new_data_root)
+    self.content_old = self._rebase(self.content_old, new_data_root)
+
+  def mark_false_positive(self, reason):
+    TestResult.mark_false_positive(self, reason)
+    self.content=self.move_file(self.content, http_falsepositive_dir)
+    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
+    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
+
+  def remove_files(self):
+    try: os.unlink(self.content)
+    except: pass
+    try: os.unlink(self.content_old)
+    except: pass
+    try: os.unlink(self.content_exit)
+    except: pass
+
+  def __str__(self):
+    ret = TestResult.__str__(self)
+    if self.content:
+      ret += " "+self.content+" (SHA1: "+self.sha1sum+")\n"
+    if self.content_old:
+      ret += " "+self.content_old+" (SHA1: "+self.sha1sum_old+")\n"
+    if self.content_exit:
+      ret += " "+self.content_exit+" (SHA1: "+self.exit_sha1sum+")\n"
+    return ret
+
+class CookieTestResult(TestResult):
+  def __init__(self, exit_obj, status, reason, plain_cookies, 
+               tor_cookies):
+    super(CookieTestResult, self).__init__(exit_obj, "cookies", status)
+    self.proto = "http"
+    self.reason = reason
+    self.tor_cookies = tor_cookies
+    self.plain_cookies = plain_cookies
+
+  def __str__(self):
+    ret = TestResult.__str__(self)
+    ret += " Plain Cookies:"+self.plain_cookies
+    ret += " Tor Cookies:"+self.tor_cookies
+    return ret
+
+class JsTestResult(TestResult):
+  ''' Represents the result of a JS test '''
+  def __init__(self, exit_obj, website, status, reason=None, 
+               content=None, content_exit=None, content_old=None,
+               jsdiffer=None):
+    super(JsTestResult, self).__init__(exit_obj, website, status, reason)
+    self.proto = "http"
+    self.content = content
+    self.content_exit = content_exit
+    self.content_old = content_old
+    self.jsdiffer = jsdiffer
+
+  def depickle_upgrade(self):
+    if not "_pickle_revision" in self.__dict__ or self._pickle_revision < 1:
+      self.jsdiffer = None
+    TestResult.depickle_upgrade(self)
+
+  def rebase(self, new_data_root):
+    self.content = self._rebase(self.content, new_data_root)
+    self.content_exit = self._rebase(self.content_exit, new_data_root)
+    self.content_old = self._rebase(self.content_old, new_data_root)
+    self.jsdiffer = self._rebase(self.jsdiffer, new_data_root)
+
+  def mark_false_positive(self, reason):
+    TestResult.mark_false_positive(self, reason)
+    self.content=self.move_file(self.content, http_falsepositive_dir)
+    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
+    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
+    self.jsdiffer=self.move_file(self.jsdiffer,http_falsepositive_dir)
+
+  def remove_files(self):
+    try: os.unlink(self.content)
+    except: pass
+    try: os.unlink(self.content_old)
+    except: pass
+    try: os.unlink(self.content_exit)
+    except: pass
+
+  def __str__(self):
+    ret = TestResult.__str__(self)
+    if self.verbose:
+      if self.content and self.content_old:
+        diff = difflib.unified_diff(open(self.content).read().split("\n"),
+                             open(self.content_old).read().split("\n"), 
+                             "Non-Tor1", "Non-Tor2",
+                             lineterm="")
+        for line in diff:
+          ret+=line+"\n"
+      if self.content and self.content_exit:
+        diff = difflib.unified_diff(open(self.content).read().split("\n"),
+                             open(self.content_exit).read().split("\n"), 
+                              "Non-Tor", "Exit",
+                              lineterm="")
+        for line in diff:
+          ret+=line+"\n"
+    else:
+      if self.content:
+        ret += " "+self.content+"\n"
+      if self.content_old:
+        ret += " "+self.content_old+"\n"
+      if self.content_exit:
+        ret += " "+self.content_exit+"\n"
+    return ret
+
+class HtmlTestResult(TestResult):
+  ''' Represents the result of a http test '''
+  def __init__(self, exit_obj, website, status, reason=None, 
+               content=None, content_exit=None, content_old=None, 
+               soupdiffer=None, jsdiffer=None):
+    super(HtmlTestResult, self).__init__(exit_obj, website, status, reason)
+    self.proto = "http"
+    self.content = content
+    self.content_exit = content_exit
+    self.content_old = content_old
+    self.soupdiffer = soupdiffer
+    self.jsdiffer = jsdiffer
+
+  def depickle_upgrade(self):
+    if not "_pickle_revision" in self.__dict__ or self._pickle_revision < 1:
+      self.soupdiffer = None
+      self.jsdiffer = None
+    TestResult.depickle_upgrade(self)
+
+  def rebase(self, new_data_root):
+    self.content = self._rebase(self.content, new_data_root)
+    self.content_exit = self._rebase(self.content_exit, new_data_root)
+    self.content_old = self._rebase(self.content_old, new_data_root)
+    self.soupdiffer = self._rebase(self.soupdiffer, new_data_root)
+    self.jsdiffer = self._rebase(self.jsdiffer, new_data_root)
+
+  def mark_false_positive(self, reason):
+    TestResult.mark_false_positive(self, reason)
+    self.content=self.move_file(self.content,http_falsepositive_dir)
+    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
+    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
+    self.soupdiffer=self.move_file(self.soupdiffer,http_falsepositive_dir)
+    self.jsdiffer=self.move_file(self.jsdiffer,http_falsepositive_dir)
+
+  def remove_files(self):
+    try: os.unlink(self.content)
+    except: pass
+    try: os.unlink(self.content_old)
+    except: pass
+    try: os.unlink(self.content_exit)
+    except: pass
+
+  def __str__(self):
+    ret = TestResult.__str__(self)
+    if self.verbose:
+      soup = old_soup = tor_soup = None
+      if self.content:
+        content = open(self.content).read().decode('ascii', 'ignore')
+        soup = FullyStrainedSoup(content)
+
+      if self.content_old:
+        content_old = open(self.content_old).read().decode('ascii', 'ignore')
+        old_soup = FullyStrainedSoup(content_old)
+
+      if self.content_exit:
+        content_exit = open(self.content_exit).read().decode('ascii', 'ignore')
+        tor_soup = FullyStrainedSoup(content_exit)
+
+      if self.verbose > 1:
+        ret += " Content: "+str(self.content)+"\n"
+        ret += " Content old: "+str(self.content_old)+"\n"
+        ret += " Exit: "+str(self.content_exit)+"\n"
+
+        if self.content and self.content_old:
+          tags = map(str, soup.findAll())
+          old_tags = map(str, old_soup.findAll())
+          diff = difflib.unified_diff(old_tags, tags, "Non-Tor1", "Non-Tor2",
+                                      lineterm="")
+          for line in diff:
+            ret+=line+"\n"
+
+        if self.content and self.content_exit:
+          tags = map(str, soup.findAll())
+          tor_tags = map(str, tor_soup.findAll())
+          diff = difflib.unified_diff(tags, tor_tags, "Non-Tor", "Exit",
+                                      lineterm="")
+          for line in diff:
+            ret+=line+"\n"
+
+      if soup and tor_soup and old_soup:
+        if self.soupdiffer and os.path.exists(self.soupdiffer):
+          soupdiff = SnakePickler.load(self.soupdiffer)
+        else:
+          soupdiff = SoupDiffer(old_soup, soup)
+
+        more_tags = soupdiff.show_changed_tags(tor_soup)     
+        more_attrs = soupdiff.show_changed_attrs(tor_soup)
+        more_content = soupdiff.show_changed_content(tor_soup)
+
+        if more_tags:
+          ret += "\nTor changed tags:\n"
+          ret += more_tags
+        if more_attrs:
+          ret += "\nTor changed attrs:\n"
+          ret += more_attrs
+        if not soupdiff.content_changed and more_content:
+          ret += "\nChanged Content:\n"
+          ret += "\n".join(more_content)+"\n"
+        if (soupdiff.content_changed or not more_content) and not more_tags and not more_attrs:
+          ret += "\nSoupDiffer claims false positive.\n"
+          jsdiff = JSSoupDiffer(old_soup)
+          jsdiff.prune_differences(soup)
+          jsdifferences = jsdiff.show_differences(tor_soup)
+          if not jsdifferences: jsdifferences = "None."
+          ret += "Javascript Differences: "+jsdifferences+"\n"
+    else:
+      if self.content:
+        ret += " "+self.content+"\n"
+      if self.content_old:
+        ret += " "+self.content_old+"\n"
+      if self.content_exit:
+        ret += " "+self.content_exit+"\n"
+    return ret
+
+class SSHTestResult(TestResult):
+  ''' Represents the result of an ssh test '''
+  def __init__(self, exit_obj, ssh_site, status):
+    super(SSHTestResult, self).__init__(exit_obj, ssh_site, status)
+    self.proto = "ssh"
+
+class DNSTestResult(TestResult):
+  ''' Represents the result of a dns test '''
+  def __init__(self, exit_obj, dns_site, status):
+    super(DNSTestResult, self).__init__(exit_obj, dns_site, status)
+    self.proto = "dns"
+
+class DNSRebindTestResult(TestResult):
+  ''' Represents the result of a dns rebind test '''
+  def __init__(self, exit_obj, dns_rebind_site, status):
+    super(DNSRebindTestResult, self).__init__(exit_obj, dns_rebind_site, status)
+    self.proto = "dns"
+
+class SMTPTestResult(TestResult):
+  ''' Represents the result of an smtp test '''
+  def __init__(self, exit_obj, smtp_site, status):
+    super(SMTPTestResult, self).__init__(exit_obj, smtp_site, status)
+    self.proto = "smtp"
+
+class IMAPTestResult(TestResult):
+  ''' Represents the result of an imap test '''
+  def __init__(self, exit_obj, imap_site, status):
+    super(IMAPTestResult, self).__init__(exit_obj, imap_site, status)
+    self.proto = "imap"
+
+class POPTestResult(TestResult):
+  ''' Represents the result of a pop test '''
+  def __init__(self, exit_obj, pop_site, status):
+    super(POPTestResult, self).__init__(exit_obj, pop_site, status)
+    self.proto = "pop"
+
+class DataHandler:
+  def __init__(self, my_data_dir=data_dir):
+    self.data_dir = my_data_dir
+
+  ''' Class for saving and managing test result data '''
+  def filterResults(self, results, protocols=[], show_good=False, 
+      show_bad=False, show_inconclusive=False):
+    ''' filter results based on protocol and success level ''' 
+
+    protocol_filters = []
+    status_filters = []
+
+    for protocol in protocols:
+      protocol_filters.append(lambda x, p=protocol: x.__class__.__name__.lower()[:-10].endswith(p))
+    if show_good:
+      status_filters.append(lambda x: x.status == TEST_SUCCESS)
+    if show_bad:
+      status_filters.append(lambda x: x.status == TEST_FAILURE)
+    if show_inconclusive:
+      status_filters.append(lambda x: x.status == TEST_INCONCLUSIVE)
+
+    if len(protocol_filters) == 0 or len(status_filters) == 0:
+      return []
+     
+    protocol_filter = lambda x: reduce(operator.__or__, [f(x) for f in protocol_filters])
+    status_filter = lambda x: reduce(operator.__or__, [f(x) for f in status_filters])
+
+    return [x for x in results if (protocol_filter(x) and status_filter(x))]
+    
+  def filterByNode(self, results, id):
+    ''' filter by node'''
+    return filter(lambda x: x.exit_node == id, results)
+
+  def getAll(self):
+    ''' get all available results'''
+    return self.__getResults(self.data_dir)
+
+  def getSsh(self):
+    ''' get results of ssh tests '''
+    return self.__getResults(self.data_dir + 'ssh/')
+    
+  def getHttp(self):
+    ''' get results of http tests '''
+    return self.__getResults(self.data_dir + 'http/')
+
+  def getSsl(self):
+    ''' get results of ssl tests '''
+    return self.__getResults(self.data_dir + 'ssl/')
+
+  def getSmtp(self):
+    ''' get results of smtp tests '''
+    return self.__getResults(self.data_dir + 'smtp/')
+
+  def getPop(self):
+    ''' get results of pop tests '''
+    return self.__getResults(self.data_dir + 'pop/')
+
+  def getImap(self):
+    ''' get results of imap tests '''
+    return self.__getResults(self.data_dir + 'imap/')
+
+  def getDns(self):
+    ''' get results of basic dns tests '''
+    return self.__getResults(self.data_dir + 'dns')
+
+  def getDnsRebind(self):
+    ''' get results of dns rebind tests '''
+    return self.__getResults(self.data_dir + 'dnsbrebind/')
+
+  def __getResults(self, rdir):
+    ''' 
+    recursively traverse the directory tree starting with dir
+    gather test results from files ending with .result
+    '''
+    results = []
+
+    for root, dirs, files in os.walk(rdir):
+      for f in files:
+        if f.endswith('.result'):
+          result = SnakePickler.load(os.path.join(root, f))
+          result.rebase(self.data_dir)
+          results.append(result)
+    return results
+
+  def getResult(self, file):
+    return SnakePickler.load(file)
+
+  def uniqueFilename(afile):
+    (prefix,suffix)=os.path.splitext(afile)
+    i=0
+    while os.path.exists(prefix+"."+str(i)+suffix):
+      i+=1
+    return prefix+"."+str(i)+suffix
+  uniqueFilename = Callable(uniqueFilename)
+  
+  def safeFilename(unsafe_file):
+    ''' 
+    remove characters illegal in some systems 
+    and trim the string to a reasonable length
+    '''
+    unsafe_file = unsafe_file.decode('ascii', 'ignore')
+    safe_file = re.sub(unsafe_filechars, "_", unsafe_file)
+    return str(safe_file[:200])
+  safeFilename = Callable(safeFilename)
+
+  def __resultFilename(self, result):
+    address = ''
+    if result.__class__.__name__ == 'HtmlTestResult' or result.__class__.__name__ == 'HttpTestResult':
+      address = DataHandler.safeFilename(result.site[7:])
+    elif result.__class__.__name__ == 'SSLTestResult':
+      address = DataHandler.safeFilename(result.site[8:])
+    elif 'TestResult' in result.__class__.__name__:
+      address = DataHandler.safeFilename(result.site)
+    else:
+      raise Exception, 'This doesn\'t seems to be a result instance.'
+
+    rdir = self.data_dir+result.proto.lower()+'/'
+    if result.false_positive:
+      rdir += 'falsepositive/'
+    elif result.from_rescan:
+      rdir += 'rescan/'
+    elif result.status == TEST_SUCCESS:
+      rdir += 'successful/'
+    elif result.status == TEST_INCONCLUSIVE:
+      rdir += 'inconclusive/'
+    elif result.status == TEST_FAILURE:
+      rdir += 'failed/'
+
+    return DataHandler.uniqueFilename(str((rdir+address+'.'+result.exit_node[1:]+".result").decode('ascii', 'ignore')))
+
+  def saveResult(self, result):
+    ''' generic method for saving test results '''
+    result.filename = self.__resultFilename(result)
+    SnakePickler.dump(result, result.filename)
+
+  def __testFilename(self, test, position=-1):
+    if position == -1:
+      return DataHandler.uniqueFilename(self.data_dir+test.__class__.__name__+".test")
+    else:
+      return self.data_dir+test.__class__.__name__+"."+str(position)+".test"
+
+  def loadTest(self, testname, position=-1):
+    filename = self.data_dir+testname
+    if position == -1:
+      i=0
+      while os.path.exists(filename+"."+str(i)+".test"):
+        i+=1
+      position = i-1
+    
+    test = SnakePickler.load(filename+"."+str(position)+".test")
+    return test
+
+  def saveTest(self, test):
+    if not test.filename:
+      test.filename = self.__testFilename(test)
+    SnakePickler.dump(test, test.filename)
+
+# These three bits are needed to fully recursively strain the parsed soup.
+# For some reason, the SoupStrainer does not get applied recursively..
+__first_strainer = SoupStrainer(lambda name, attrs: name in tags_to_check or 
+   len(Set(map(lambda a: a[0], attrs)).intersection(Set(attrs_to_check))) > 0)
+
+def __tag_not_worthy(tag):
+  if tag.name in tags_to_check:
+    return False
+  for attr in tag.attrs:
+    if attr[0] in attrs_to_check_map:
+      return False
+  return True
+
+def FullyStrainedSoup(html):
+  """ Remove all tags that are of no interest. Also remove content """
+  soup = TheChosenSoup(html, __first_strainer)
+  to_extract = []
+  for tag in soup.findAll():
+    to_prune = []
+    for attr in tag.attrs:
+      if attr[0] in attrs_to_prune:
+        to_prune.append(attr)
+    for attr in to_prune:
+      tag.attrs.remove(attr)
+    if __tag_not_worthy(tag):
+      to_extract.append(tag)
+    if tag.name not in tags_preserve_inner:
+      for child in tag.childGenerator():
+        if not isinstance(child, Tag) or __tag_not_worthy(child):
+          to_extract.append(child)
+  for tag in to_extract:
+    if isinstance(tag, Tag):
+      parent = tag.findParent()
+      for child in tag.findChildren():
+        parent.append(child)
+  for tag in to_extract:
+    tag.extract()
+  # Also flatten the tag structure
+  flattened_tags = soup.findAll()
+  for tag in flattened_tags:
+    if isinstance(tag, Tag): # Don't extract script/CSS strings.
+      tag.extract() 
+  for tag in flattened_tags:
+    soup.append(tag)
+  return soup      
+
+class SnakePickler:
+  def dump(obj, filename):
+    if not "depickle_upgrade" in dir(obj.__class__):
+      plog("WARN", "Pickling instance of "+obj.__class__.__name__+" without upgrade method")
+    f = file(filename, "w")
+    try:
+      pickle.dump(obj, f)
+    except KeyboardInterrupt:
+      finished = False
+      while not finished:
+        try:
+          f.close()
+          f = file(filename, "w")
+          pickle.dump(obj, f)
+          f.close()
+          finished = True
+        except KeyboardIterrupt:
+          pass
+      raise KeyboardInterrupt
+    except Exception, e:
+      plog("WARN", "Exception during pickle dump: "+e)
+      try:
+        os.unlink(filename)
+      except: pass
+    f.close()
+  dump = Callable(dump)
+
+  def load(filename):
+    f = file(filename, "r")
+    try:
+      obj = pickle.load(f)
+    except Exception, e:
+      plog("WARN", "Error loading object from "+filename+": "+str(e))
+      return None
+    if not "depickle_upgrade" in dir(obj.__class__):
+      plog("WARN", "De-pickling instance of "+obj.__class__.__name__+" without upgrade method")
+    else:
+      obj.depickle_upgrade()
+    f.close()
+    return obj
+  load = Callable(load)
+     
+class SoupDiffer:
+  """ Diff two soup tag sets, optionally writing diffs to outfile. """
+  def __init__(self, soup_old, soup_new):
+    tags_old = self._get_tags(soup_old)
+    tags_new = self._get_tags(soup_new)
+    self.tag_pool = tags_new | tags_old
+    self.changed_tag_map = {}
+    self._update_changed_tag_map(tags_old, tags_new)
+    self._update_changed_tag_map(tags_new, tags_old)
+
+    attrs_new = self._get_attributes(soup_new)
+    attrs_old = self._get_attributes(soup_old)
+    self.attr_pool = attrs_new | attrs_old
+    self.changed_attr_map = {}
+    self._update_changed_attr_map(attrs_new, attrs_old)
+    self._update_changed_attr_map(attrs_old, attrs_new)
+
+    cntnt_new = self._get_content(soup_new)
+    cntnt_old = self._get_content(soup_old)
+    self.content_pool = cntnt_new | cntnt_old
+    self.content_changed = bool(cntnt_new ^ cntnt_old) 
+    self._pickle_revision = 0    
+
+  def depickle_upgrade(self):
+    pass
+
+  def _get_tags(self, soup):
+    return sets.Set(map(str, 
+           [tag for tag in soup.findAll() if isinstance(tag, Tag)]))
+
+  def _get_attributes(self, soup):
+    attr_soup = [(tag.name, tag.attrs) for tag in soup.findAll()]
+    attrs = sets.Set([])
+    for (tag, attr_list) in attr_soup:
+      for at in attr_list:
+        attrs.add((tag, at)) 
+    return attrs
+
+  def _get_content(self, soup):
+    return sets.Set(map(str, 
+      [tag for tag in soup.findAll() if not isinstance(tag, Tag)]))
+  
+  def _update_changed_tag_map(self, tags_old, tags_new):
+    """ Create a map of changed tags to ALL attributes that tag
+        has ever had (changed or not) """
+    changed_tags = list(tags_new - tags_old)
+    for tags in map(TheChosenSoup, changed_tags):
+      for t in tags.findAll():
+        if t.name not in changed_tags:
+          self.changed_tag_map[t.name] = sets.Set([])
+        for attr in t.attrs:
+          self.changed_tag_map[t.name].add(attr[0])
+
+  def _update_changed_attr_map(self, attrs_old, attrs_new):
+    """ Transform the list of (tag, attribute) pairings for new/changed
+        attributes into a map. This allows us to quickly see
+        if any attributes changed for a specific tag. """
+    changed_attributes = list(attrs_new - attrs_old)
+    for (tag, attr) in changed_attributes:
+      if tag not in self.changed_attr_map:
+        self.changed_attr_map[tag] = sets.Set([])
+      self.changed_attr_map[tag].add(attr[0])
+
+  def _update_changed_content(self, content_old, content_new):
+    # FIXME: This could be tracked by parent tag+attr
+    if not self.content_changed:
+      self.content_changed = bool(content_old ^ content_new)
+
+  def prune_differences(self, soup):
+    tags = self._get_tags(soup)
+    attrs = self._get_attributes(soup)
+    cntnt = self._get_content(soup)
+
+    self._update_changed_tag_map(self.tag_pool, tags)
+    self._update_changed_attr_map(self.attr_pool, attrs)
+    self._update_changed_content(self.content_pool, cntnt)
+    self.tag_pool.union_update(tags)
+    self.attr_pool.union_update(attrs)
+    self.content_pool.union_update(cntnt)
+
+  def show_changed_tags(self, soup):
+    soup_tags = self._get_tags(soup)
+    new_tags = soup_tags - self.tag_pool
+    ret = ""
+    for tags in map(TheChosenSoup, new_tags):
+      for t in tags.findAll():
+        if t.name not in self.changed_tag_map:
+          ret += " New Tag: "+str(t)+"\n"
+        else:
+          for attr in t.attrs:
+            if attr[0] not in self.changed_tag_map[t.name] \
+                 and attr[0] in attrs_to_check_map:
+              ret += " New Attr "+attr[0]+": "+str(t)+"\n"
+    return ret
+
+  def show_changed_attrs(self, soup):
+    soup_attrs = self._get_attributes(soup)
+    new_attrs = soup_attrs - self.attr_pool
+    ret = ""
+    for (tag, attr) in new_attrs:
+      if tag in self.changed_attr_map:
+        if attr[0] not in self.changed_attr_map[tag] \
+            and attr[0] in attrs_to_check_map:
+          ret += " New Attr "+attr[0]+": "+tag+" "+attr[0]+'="'+attr[1]+'"\n'
+      else:
+        ret += " New Tag: "+tag+" "+attr[0]+'="'+attr[1]+'"\n'
+    return ret
+
+  def show_changed_content(self, soup):
+    """ Return a list of tag contents changed in soup_new """
+    content = self._get_content(soup)
+    ret = list(content - self.content_pool)
+    ret.sort()
+    return ret
+
+class HeaderDiffer:
+  def __init__(self, orig_headers):
+    self.header_pool = sets.Set(orig_headers)
+    self.changed_headers = sets.Set([])
+    self._pickle_revision = 0
+ 
+  def filter_headers(headers):
+    ret = []
+    for h in headers:
+      matched = False
+      for i in ignore_http_headers:
+        if re.match(i, h[0]):
+          matched = True
+      if not matched: ret.append(h)
+    return sets.Set(ret)
+  filter_headers = Callable(filter_headers)
+ 
+  def depickle_upgrade(self):
+    pass
+
+  def prune_differences(self, new_headers):
+    new_headers = sets.Set(new_headers)
+    changed = new_headers - self.header_pool
+    for i in changed:
+      self.changed_headers.add(i[0])
+    self.header_pool.union_update(new_headers)
+
+  def show_differences(self, new_headers):
+    ret = ""
+    changed = sets.Set(new_headers) - self.header_pool
+    for i in changed:
+      if i[0] not in self.changed_headers:
+        ret += " "+i[0]+": "+i[1]+"\n"
+    if ret:
+      return "New HTTP Headers:\n"+ret
+    else: 
+      return ret
+
+class JSDiffer:
+  def __init__(self, js_string):
+    self._pickle_revision = 0    
+    self.ast_cnts = self._count_ast_elements(js_string)
+
+  def depickle_upgrade(self):
+    pass
+
+  def _ast_recursive_worker(ast, ast_cnts):
+    node = JSTokenNames[ast.getType()]
+    if not node in ast_cnts:
+      ast_cnts[node] = 1
+    else: ast_cnts[node] += 1
+
+    for child in ast.getChildren():
+      JSDiffer._ast_recursive_worker(child, ast_cnts)
+  _ast_recursive_worker = Callable(_ast_recursive_worker)
+
+  def _antlr_parse(self, js_string):
+    char_stream = antlr3.ANTLRStringStream(js_string)
+    lexer = LoggingJSLexer(char_stream)
+    tokens = antlr3.CommonTokenStream(lexer)
+    parser = LoggingJSParser(tokens)
+    program = parser.program()
+    program.tree.parse_errors = parser.parse_errors__
+    program.tree.lex_errors = lexer.lex_errors__
+    return program.tree
+                            
+  def _count_ast_elements(self, js_string, name="global"):
+    ast_cnts = {}
+    try:
+      js_string = js_string.replace("\n\r","\n").replace("\r\n","\n").replace("\r","\n")+";"
+      
+      ast = self._antlr_parse(js_string)
+      JSDiffer._ast_recursive_worker(ast, ast_cnts)
+      for e in ast.lex_errors+ast.parse_errors:
+        name+=":"+e.__class__.__name__
+        if "line" in e.__dict__: 
+          name+=":"+str(e.line)
+        if "token" in e.__dict__ and e.token \
+            and "type" in e.token.__dict__: 
+          name+=":"+JSTokenNames[e.token.type]
+        # XXX: Any other things we want to add?
+        plog("INFO", "Parse error "+name+" on "+js_string)
+        if not "ParseError:"+name in ast_cnts:
+          ast_cnts["ParseError:"+name] = 1
+        else: ast_cnts["ParseError:"+name] += 1
+    except UnicodeDecodeError, e:
+      name+=":"+e.__class__.__name__
+      plog("INFO", "Unicode error "+name+" on "+js_string)
+      if not "ParseError:"+name in ast_cnts:
+        ast_cnts["ParseError:"+name] = 1
+      else: ast_cnts["ParseError:"+name] +=1
+    return ast_cnts
+
+  def _difference_pruner(self, other_cnts):
+    for node in self.ast_cnts.iterkeys():
+      if node not in other_cnts:
+        self.ast_cnts[node] = 0
+      elif self.ast_cnts[node] != other_cnts[node]:
+        self.ast_cnts[node] = 0
+    for node in other_cnts.iterkeys():
+      if node not in self.ast_cnts:
+        self.ast_cnts[node] = 0
+
+  def _difference_checker(self, other_cnts):
+    for node in self.ast_cnts.iterkeys():
+      if not self.ast_cnts[node]: continue # pruned difference
+      if node not in other_cnts:
+        return True
+      elif self.ast_cnts[node] != other_cnts[node]:
+        return True
+    for node in other_cnts.iterkeys():
+      if node not in self.ast_cnts:
+        return True
+    return False
+
+  def _difference_printer(self, other_cnts):
+    ret = ""
+    missing = []
+    miscount = []
+    new = []
+    for node in self.ast_cnts.iterkeys():
+      if not self.ast_cnts[node]: continue # pruned difference
+      if node not in other_cnts:
+        missing.append(str(node))
+      elif self.ast_cnts[node] != other_cnts[node]:
+        miscount.append(str(node))
+    for node in other_cnts.iterkeys():
+      if node not in self.ast_cnts:
+        new.append(str(node))
+    if missing:
+      ret += "\nMissing: "
+      for node in missing: ret += node+" "
+    if new:
+      ret += "\nNew: "
+      for node in new: ret += node+" "
+    if miscount:
+      ret += "\nMiscount: "
+      for node in miscount: ret += node+" "
+    return ret
+
+  def prune_differences(self, other_string):
+    other_cnts = self._count_ast_elements(other_string)
+    self._difference_pruner(other_cnts)
+
+  def contains_differences(self, other_string):
+    other_cnts = self._count_ast_elements(other_string)
+    return self._difference_checker(other_cnts) 
+
+  def show_differences(self, other_string):
+    other_cnts = self._count_ast_elements(other_string)
+    return self._difference_printer(other_cnts) 
+
+
+class JSSoupDiffer(JSDiffer):
+  def _add_cnts(tag_cnts, ast_cnts):
+    ret_cnts = {}
+    for n in tag_cnts.iterkeys():
+      if n in ast_cnts:
+        ret_cnts[n] = tag_cnts[n]+ast_cnts[n]
+      else:
+        ret_cnts[n] = tag_cnts[n]
+    for n in ast_cnts.iterkeys():
+      if n not in tag_cnts:
+        ret_cnts[n] = ast_cnts[n]
+    return ret_cnts
+  _add_cnts = Callable(_add_cnts)
+
+  def _count_ast_elements(self, soup, name="Soup"):
+    ast_cnts = {}
+    for tag in soup.findAll():
+      if tag.name == 'script':
+        for child in tag.childGenerator():
+          if isinstance(child, Tag):
+            plog("ERROR", "Script tag with subtag!")
+          else:
+            script = str(child).replace("<!--", "").replace("-->", "").replace("<![CDATA[", "").replace("]]>", "")
+            tag_cnts = JSDiffer._count_ast_elements(self, script, tag.name)
+            ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
+      for attr in tag.attrs:
+        # hrmm.. %-encoding too? Firefox negs on it..
+        parse = ""
+        if attr[1].replace(" ","")[:11] == "javascript:":
+          split_at = attr[1].find(":")+1
+          parse = str(attr[1][split_at:])
+        elif attr[0] in attrs_with_raw_script_map:
+          parse = str(attr[1])
+        if not parse: continue
+        tag_cnts = JSDiffer._count_ast_elements(self,parse,tag.name+":"+attr[0])
+        ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
+    return ast_cnts
+


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/libsoat.py
___________________________________________________________________
Added: svn:mergeinfo
   + 

Copied: torflow/trunk/NetworkScanners/ExitAuthority/snakeinspector.py (from rev 20213, torflow/trunk/NetworkScanners/snakeinspector.py)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/snakeinspector.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/snakeinspector.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,141 @@
+#!/usr/bin/python
+
+import dircache
+import operator
+import os
+import pickle
+import sys
+import time
+
+import sets
+from sets import Set
+
+import getopt
+
+import libsoat
+from libsoat import *
+
+sys.path.append("../")
+
+import TorCtl.TorUtil
+from TorCtl.TorUtil import *
+
+TorCtl.TorUtil.loglevel="INFO"
+
+if TorCtl.TorUtil.loglevels[TorCtl.TorUtil.loglevel] > TorCtl.TorUtil.loglevels["INFO"]:
+  # Kill stderr (jsdiffer and exception noise) if our loglevel is above INFO
+  sys.stderr = file("/dev/null", "w")
+
+
+def usage(argv):
+  print "Usage: "+argv[0]+" with 0 or more of the following filters: "
+  print "  --dir <datadir>"
+  print "  --file <.result file>"
+  print "  --exit <idhex>"
+  print "  --before <timestamp as string>"
+  print "  --after <timestamp as string>"
+  print "  --reason <soat failure reason>    # may be repeated"
+  print "  --noreason <soat failure reason>  # may be repeated"
+  print "  --proto <protocol>"
+  print "  --resultfilter <TestResult class name>"
+  print "  --statuscode <'Failure' or 'Inconclusive'>"
+  print "  --sortby <'proto' or 'url' or 'exit' or 'reason'>"
+  print "  --falsepositives"
+  print "  --verbose"
+  sys.exit(1)
+
+def getargs(argv):
+  try:
+    opts,args = getopt.getopt(argv[1:],"d:f:e:r:vt:p:s:o:n:a:b:F", 
+             ["dir=", "file=", "exit=", "reason=", "resultfilter=", "proto=", 
+              "verbose", "statuscode=", "sortby=", "noreason=", "after=",
+              "before=", "falsepositives"])
+  except getopt.GetoptError,err:
+    print str(err)
+    usage(argv)
+  # FIXME: make all these repeatable
+  use_dir="./data/"
+  use_file=None
+  node=None
+  reasons=[]
+  noreasons=[]
+  result=2
+  verbose=1
+  proto=None
+  resultfilter=None
+  before = 0xffffffff
+  after = 0
+  sortby="proto"
+  falsepositives=False
+  for o,a in opts:
+    if o == '-d' or o == '--dir':
+      use_dir = a
+    elif o == '-f' or o == '--file':
+      use_file = a
+    elif o == '-b' or o == '--before':
+      before = time.mktime(time.strptime(a))
+    elif o == '-a' or o == '--after': 
+      after = time.mktime(time.strptime(a))
+    elif o == '-r' or o == '--reason': 
+      reasons.append(a)
+    elif o == '-r' or o == '--noreason': 
+      noreasons.append(a)
+    elif o == '-v' or o == '--verbose': 
+      verbose += 1
+    elif o == '-t' or o == '--resultfilter':
+      resultfilter = a
+    elif o == '-p' or o == '--proto':
+      proto = a
+    elif o == '-F' or o == '--falsepositives':
+      falsepositives = True
+    elif o == '-s' or o == '--sortby': 
+      if a not in ["proto", "site", "exit", "reason"]:
+        usage(argv)
+      else: sortby = a 
+    elif o == '-s' or o == '--statuscode': 
+      try:
+        result = int(a)
+      except ValueError:
+        result = RESULT_CODES[a]
+  return use_dir,use_file,node,reasons,noreasons,result,verbose,resultfilter,proto,sortby,before,after,falsepositives
+ 
+def main(argv):
+  use_dir,use_file,node,reasons,noreasons,result,verbose,resultfilter,proto,sortby,before,after,falsepositives=getargs(argv)
+  dh = DataHandler(use_dir)
+  print dh.data_dir
+
+  if use_file:
+    results = [dh.getResult(use_file)]
+  elif node:
+    results = dh.filterByNode(dh.getAll(), "$"+node)
+  else:
+    results = dh.getAll()
+
+  if sortby == "url":
+    results.sort(lambda x, y: cmp(x.site, y.site))
+  elif sortby == "reason":
+    results.sort(lambda x, y: cmp(x.reason, y.reason))
+  elif sortby == "exit":
+    results.sort(lambda x, y: cmp(x.exit_node, y.exit_node))
+
+  for r in results:
+    r.verbose = verbose
+    if r.reason in noreasons: continue
+    if reasons and r.reason not in reasons: continue
+    if r.timestamp < after or before < r.timestamp: continue
+    if (falsepositives) ^ r.false_positive: continue
+    if (not result or r.status == result) and \
+       (not proto or r.proto == proto) and \
+       (not resultfilter or r.__class__.__name__ == resultfilter):
+      try:
+        print r
+      except KeyboardInterrupt:
+        raise KeyboardInterrupt
+      except IOError, e:
+        traceback.print_exc()
+      except Exception, e:
+        traceback.print_exc()
+      print "\n-----------------------------\n"
+
+if __name__ == "__main__":
+  main(sys.argv)


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/snakeinspector.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:mergeinfo
   + 

Copied: torflow/trunk/NetworkScanners/ExitAuthority/soat.py (from rev 20213, torflow/trunk/NetworkScanners/soat.py)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/soat.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/soat.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,2784 @@
+#!/usr/bin/python
+#
+# 2008 Aleksei Gorny, mentored by Mike Perry
+
+'''
+Snakes on a Tor exit node scanner
+
+The SoaT scanner checks whether exit nodes behave by initiating connections
+to semi-randomly chosen targets using several protocols (http, https, ssh, smtp, imap, etc)
+and comparing content received directly and via tor.
+
+It interacts with metatroller and the control port to be aware of the tor network status.
+
+To run SoaT: 
+1) make sure you have py-openssl packages installed (see README)
+2) open Tor control port in the torrc
+3) start metatroller in the background (python ./metatroller.py)
+4) start soat (python ./soat.py) with some testing flags (run it without any flags
+    to see which options are available)
+5) check the results later by running soatstats (python ./soatstats.py)
+
+'''
+
+__all__ = ["ExitNodeScanner", "DNSRebindScanner", "load_wordlist"]
+
+import commands
+import getopt
+import os
+import random
+import re
+from sets import Set
+import smtplib
+import socket
+import sys
+import time
+import urllib
+import urllib2
+import httplib
+import traceback
+import copy
+import StringIO
+import zlib,gzip
+import urlparse
+import cookielib
+import sha
+import Queue
+import threading
+import atexit
+
+from libsoat import *
+
+sys.path.append("../")
+
+from TorCtl import TorUtil, TorCtl, PathSupport, ScanSupport
+from TorCtl.TorUtil import meta_port, meta_host, control_port, control_host, tor_port, tor_host
+from TorCtl.TorUtil import *
+from TorCtl.PathSupport import *
+from TorCtl.TorCtl import Connection, EventHandler, ConsensusTracker
+
+import OpenSSL
+from OpenSSL import *
+
+
+sys.path.append("./libs/")
+from BeautifulSoup.BeautifulSoup import SoupStrainer, Tag
+from SocksiPy import socks
+import Pyssh.pyssh
+
+from soat_config import *
+
+search_cookies=None
+scanhdlr=None
+datahandler=None
+linebreak = '\r\n'
+
+# Do NOT modify this object directly after it is handed to PathBuilder
+# Use PathBuilder.schedule_selmgr instead.
+# (Modifying the arguments here is OK)
+__selmgr = PathSupport.SelectionManager(
+      pathlen=2,
+      order_exits=True,
+      percent_fast=10, # XXX: This is fingerprintble..
+      percent_skip=0,
+      min_bw=1,
+      use_all_exits=True,
+      uniform=False,
+      use_exit=None,
+      use_guards=False,
+      exit_ports=[443])
+
+# Oh yeah. so dirty. Blame this guy if you hate me:
+# http://mail.python.org/pipermail/python-bugs-list/2008-October/061202.html
+_origsocket = socket.socket
+class BindingSocket(_origsocket):
+  bind_to = None
+  def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
+    _origsocket.__init__(self, family, type, proto, _sock)
+    if BindingSocket.bind_to:
+      plog("DEBUG", "Binding socket to "+BindingSocket.bind_to)
+      self.bind((BindingSocket.bind_to, 0))
+socket.socket = BindingSocket 
+
+# Nice.. HTTPConnection.connect is doing DNS for us! Fix that:
+# Hrmm.. suppose we could also bind here.. but BindingSocket is 
+# more general and may come in handy for other tests.
+class NoDNSHTTPConnection(httplib.HTTPConnection):
+  def connect(self):
+    try:
+      self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
+      self.sock.settimeout(read_timeout) # Mnemotronic tonic
+      if self.debuglevel > 0:
+        print "connect: (%s, %s)" % (self.host, self.port)
+      self.sock.connect((str(self.host), self.port))
+    except socket.error, msg:
+      if self.debuglevel > 0:
+        print 'connect fail:', (self.host, self.port)
+      if self.sock:
+        self.sock.close()
+      self.sock = None
+    if not self.sock:
+      raise socket.error, msg
+
+class NoDNSHTTPHandler(urllib2.HTTPHandler):
+  def http_open(self, req):
+    return self.do_open(NoDNSHTTPConnection, req)
+
+class ExitScanHandler(ScanSupport.ScanHandler):
+  def __init__(self, selmgr):
+    ScanSupport.ScanHandler.__init__(self, selmgr)
+    self.rlock = threading.Lock()
+    self.new_nodes=True
+
+  def has_new_nodes(self):
+    # XXX: Hrmm.. could do this with conditions instead..
+    ret = False
+    plog("DEBUG", "has_new_nodes begin")
+    try:
+      self.rlock.acquire()
+      ret = self.new_nodes
+      self.new_nodes = False
+    finally:
+      self.rlock.release()
+    plog("DEBUG", "has_new_nodes end")
+    return ret
+
+  def get_nodes_for_port(self, port):
+    ''' return a list of nodes that allow exiting to a given port '''
+    plog("DEBUG", "get_nodes_for_port begin")
+    cond = threading.Condition()
+    def notlambda(this):
+      cond.acquire()
+      restriction = NodeRestrictionList([FlagsRestriction(["Running", "Valid",
+"Fast"]), MinBWRestriction(min_node_bw), ExitPolicyRestriction('255.255.255.255', port)])
+      cond._result = [x for x in self.sorted_r if restriction.r_is_ok(x)]
+      self._sanity_check(cond._result)
+      cond.notify()
+      cond.release()
+    cond.acquire()
+    self.schedule_low_prio(notlambda)
+    cond.wait()
+    cond.release()
+    plog("DEBUG", "get_nodes_for_port end")
+    return cond._result
+
+  def new_consensus_event(self, n):
+    plog("DEBUG", "newconsensus_event begin")
+    try:
+      self.rlock.acquire()
+      ScanSupport.ScanHandler.new_consensus_event(self, n)
+      self.new_nodes = True
+    finally:
+      self.rlock.release()
+    plog("DEBUG", "newconsensus_event end")
+
+  def new_desc_event(self, d):
+    plog("DEBUG", "newdesc_event begin")
+    try:
+      self.rlock.acquire()
+      if ScanSupport.ScanHandler.new_desc_event(self, d):
+        self.new_nodes = True
+    finally:
+      self.rlock.release()
+    plog("DEBUG", "newdesc_event end")
+
+  # FIXME: Hrmm is this in the right place?
+  def check_all_exits_port_consistency(self):
+    '''
+    an independent test that finds nodes that allow connections over a
+    common protocol while disallowing connections over its secure version
+    (for instance http/https)
+    '''
+
+    # get the structure
+    routers = self.control.read_routers(self.control.get_network_status())
+    bad_exits = Set([])
+    specific_bad_exits = [None]*len(ports_to_check)
+    for i in range(len(ports_to_check)):
+      specific_bad_exits[i] = []
+
+    # check exit policies
+    for router in routers:
+      for i in range(len(ports_to_check)):
+        [common_protocol, common_restriction, secure_protocol, secure_restriction] = ports_to_check[i]
+        if common_restriction.r_is_ok(router) and not secure_restriction.r_is_ok(router):
+          bad_exits.add(router)
+          specific_bad_exits[i].append(router)
+          #plog('INFO', 'Router ' + router.nickname + ' allows ' + common_protocol + ' but not ' + secure_protocol)
+  
+
+    for i,exits in enumerate(specific_bad_exits):
+      [common_protocol, common_restriction, secure_protocol, secure_restriction] = ports_to_check[i]
+      plog("NOTICE", "Nodes allowing "+common_protocol+" but not "+secure_protocol+":\n\t"+"\n\t".join(map(lambda r: r.nickname+"="+r.idhex, exits)))
+      #plog('INFO', 'Router ' + router.nickname + ' allows ' + common_protocol + ' but not ' + secure_protocol)
+     
+
+    # report results
+    plog('INFO', 'Total nodes: ' + `len(routers)`)
+    for i in range(len(ports_to_check)):
+      [common_protocol, _, secure_protocol, _] = ports_to_check[i]
+      plog('INFO', 'Exits with ' + common_protocol + ' / ' + secure_protocol + ' problem: ' + `len(specific_bad_exits[i])` + ' (~' + `(len(specific_bad_exits[i]) * 100 / len(routers))` + '%)')
+    plog('INFO', 'Total bad exits: ' + `len(bad_exits)` + ' (~' + `(len(bad_exits) * 100 / len(routers))` + '%)')
+
+  # FIXME: Hrmm is this in the right place?
+  def check_dns_rebind(self):
+    ''' 
+    A DNS-rebind attack test that runs in the background and monitors REMAP events
+    The test makes sure that external hosts are not resolved to private addresses  
+    '''
+    plog('INFO', 'Monitoring REMAP events for weirdness')
+    # establish a control port connection
+    try:
+      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+      s.connect((control_host, control_port))
+      c = Connection(s)
+      c.authenticate()
+    except socket.error, e:
+      plog('ERROR', 'Couldn\'t connect to the control port')
+      plog('ERROR', e)
+      exit()
+    except AttributeError, e:
+      plog('ERROR', 'A service other that the Tor control port is listening on ' + control_host + ':' + control_port)
+      plog('ERROR', e)
+      exit()
+
+    self.__dnshandler = DNSRebindScanner(self, c)
+
+
+
+
+# Http request handling
+def http_request(address, cookie_jar=None, headers=firefox_headers):
+  ''' perform a http GET-request and return the content received '''
+  request = urllib2.Request(address)
+  for h in headers:
+    request.add_header(h[0], h[1])
+
+  content = ""
+  new_cookies = []
+  mime_type = ""
+  try:
+    plog("DEBUG", "Starting request for: "+address)
+    if cookie_jar != None:
+      opener = urllib2.build_opener(NoDNSHTTPHandler, urllib2.HTTPCookieProcessor(cookie_jar))
+      reply = opener.open(request)
+      if "__filename" in cookie_jar.__dict__:
+        cookie_jar.save(cookie_jar.__filename, ignore_discard=True)
+      new_cookies = cookie_jar.make_cookies(reply, request)
+    else:
+      reply = urllib2.urlopen(request)
+
+    length = reply.info().get("Content-Length")
+    if length and int(length) > max_content_size:
+      plog("WARN", "Max content size exceeded for "+address+": "+length)
+      return (reply.code, None, [], "", "")
+    mime_type = reply.info().type.lower()
+    reply_headers = HeaderDiffer.filter_headers(reply.info().items())
+    reply_headers.add(("mime-type", mime_type))
+    plog("DEBUG", "Mime type is "+mime_type+", length "+str(length))
+    content = decompress_response_data(reply)
+  except socket.timeout, e:
+    plog("WARN", "Socket timeout for "+address+": "+str(e))
+    traceback.print_exc()
+    return (-6.0, None, [], "", e.__class__.__name__+str(e))
+  except httplib.BadStatusLine, e:
+    plog('NOTICE', "HTTP Error during request of "+address+": "+str(e))
+    if not e.line: 
+      return (-13.0, None, [], "", e.__class__.__name__+"(None)") 
+    else:
+      traceback.print_exc()
+      return (-666.0, None, [], "", e.__class__.__name__+str(e)) 
+  except urllib2.HTTPError, e:
+    plog('NOTICE', "HTTP Error during request of "+address+": "+str(e))
+    if str(e) == "<urlopen error timed out>": # Yah, super ghetto...
+      return (-6.0, None, [], "", e.__class__.__name__+str(e)) 
+    else:
+      traceback.print_exc()
+      return (e.code, None, [], "", e.__class__.__name__+str(e)) 
+  except (ValueError, urllib2.URLError), e:
+    plog('WARN', 'The http-request address ' + address + ' is malformed')
+    if str(e) == "<urlopen error timed out>": # Yah, super ghetto...
+      return (-6.0, None, [], "", e.__class__.__name__+str(e)) 
+    else:
+      traceback.print_exc()
+      return (-23.0, None, [], "", e.__class__.__name__+str(e))
+  except socks.Socks5Error, e:
+    plog('WARN', 'A SOCKS5 error '+str(e.value[0])+' occured for '+address+": "+str(e))
+    return (-float(e.value[0]), None, [], "", e.__class__.__name__+str(e))
+  except KeyboardInterrupt:
+    raise KeyboardInterrupt
+  except Exception, e:
+    plog('WARN', 'An unknown HTTP error occured for '+address+": "+str(e))
+    traceback.print_exc()
+    return (-666.0, None, [], "", e.__class__.__name__+str(e))
+
+  return (reply.code, reply_headers, new_cookies, mime_type, content)
+
+class Test:
+  """ Base class for our tests """
+  def __init__(self, proto, port):
+    self.proto = proto
+    self.port = port
+    self.min_targets = min_targets
+    self.filename = None
+    self.rescan_nodes = sets.Set([])
+    self.nodes = sets.Set([])
+    self.node_map = {}
+    self.banned_targets = sets.Set([])
+    self.total_nodes = 0
+    self.scan_nodes = 0
+    self.nodes_to_mark = 0
+    self.tests_per_node = num_tests_per_node
+    self._reset()
+    self._pickle_revision = 6 # Will increment as fields are added
+
+  def run_test(self): 
+    raise NotImplemented()
+
+  def get_targets(self): 
+    raise NotImplemented()
+
+  def depickle_upgrade(self):
+    if self._pickle_revision < 1:
+      # Convert self.successes table from integers to sets.
+      # Yes, this is a hack, and yes, it will bias results
+      # away from the filter, but hey, at least it will still run.
+      self._pickle_revision = 1
+      
+      for addr in self.successes.keys():
+        if type(self.successes[addr]) == int:
+          self.successes[addr] = sets.Set(xrange(0,self.successes[addr]))
+      plog("INFO", "Upgraded "+self.__class__.__name__+" to v1")
+    if self._pickle_revision < 2: 
+      self._pickle_revision = 2
+    if self._pickle_revision < 3:
+      self.timeout_fails = {}
+      self._pickle_revision = 3
+    if self._pickle_revision < 4:
+      self.connect_fails = {}
+      self._pickle_revision = 4
+    if self._pickle_revision < 5:
+      self.dns_fails = {}
+      self._pickle_revision = 5
+    if self._pickle_revision < 6:
+      self.dns_fails_per_exit = self.dns_fails
+      self.timeout_fails_per_exit = self.timeout_fails
+      self.connect_fails_per_exit = {}
+      self._pickle_revision = 6
+
+  def refill_targets(self):
+    if len(self.targets) < self.min_targets:
+      plog("NOTICE", self.proto+" scanner short on targets. Adding more")
+      self.targets.extend(self.get_targets())
+
+  def _remove_target_addr(self, target):
+    if target in self.targets: self.targets.remove(target)
+
+  def remove_target(self, target, reason="None"):
+    self.banned_targets.add(target)
+    self.refill_targets()
+    self._remove_target_addr(target)
+    if target in self.dynamic_fails: del self.dynamic_fails[target]
+    if target in self.successes: del self.successes[target]
+    if target in self.exit_fails: del self.exit_fails[target]
+    if target in self.connect_fails: del self.connect_fails[target]
+    kill_results = []
+    for r in self.results: 
+      if r.site == target:
+        kill_results.append(r)
+    for r in kill_results:
+      # XXX: Need to re-add this node to our test set 
+      # (If it is still up)
+      if r.status == TEST_FAILURE:
+        # Save this new result file in false positive dir 
+        # and remove old one
+        try:
+          os.unlink(r.filename)
+        except:
+          pass
+        r.mark_false_positive(reason)
+        datahandler.saveResult(r)
+      self.results.remove(r)
+
+  def load_rescan(self, type, since=None):
+    self.rescan_nodes = sets.Set([])
+    results = datahandler.getAll()
+    for r in results:
+      if r.status == type:
+        if not since or r.timestamp >= since:
+          self.rescan_nodes.add(r.exit_node[1:])
+    plog("INFO", "Loaded "+str(len(self.rescan_nodes))+" nodes to rescan")
+    if self.nodes and self.rescan_nodes:
+      self.nodes &= self.rescan_nodes
+    self.scan_nodes = len(self.nodes)
+    self.tests_per_node = num_rescan_tests_per_node
+    self.nodes_to_mark = self.scan_nodes*self.tests_per_node
+
+  def toggle_rescan(self):
+    if self.rescan_nodes:
+      plog("NOTICE", self.proto+" rescan complete. Switching back to normal scan")
+      self.rescan_nodes = sets.Set([])
+      self.tests_per_node = num_tests_per_node
+      self.update_nodes()
+    else:
+      plog("NOTICE", self.proto+" switching to recan mode.")
+      self.load_rescan(TEST_FAILURE, self.run_start)
+
+  def get_node(self):
+    return random.choice(list(self.nodes))
+
+  def update_nodes(self):
+    nodes = scanhdlr.get_nodes_for_port(self.port)
+    self.node_map = {}
+    for n in nodes: 
+      self.node_map[n.idhex] = n
+    self.total_nodes = len(nodes)
+    self.nodes = sets.Set(map(lambda n: n.idhex, nodes))
+    marked_nodes = sets.Set(self.node_results.keys())
+    self.nodes -= marked_nodes # Remove marked nodes
+    # Only scan the stuff loaded from the rescan
+    if self.rescan_nodes: self.nodes &= self.rescan_nodes
+    if not self.nodes:
+      plog("ERROR", "No nodes remain after rescan load!")
+    self.scan_nodes = len(self.nodes)
+    self.nodes_to_mark = self.scan_nodes*self.tests_per_node
+    scanhdlr._sanity_check(map(lambda id: self.node_map[id], 
+                     self.nodes))
+
+  def mark_chosen(self, node, result):
+    exit_node = scanhdlr.get_exit_node()[1:]
+    if exit_node != node:
+      plog("ERROR", "Asked to mark a node that is not current: "+node+" vs "+exit_node)
+    plog("INFO", "Marking "+node+" with result "+str(result))
+    self.nodes_marked += 1
+    if not node in self.node_results: self.node_results[node] = []
+    self.node_results[node].append(result)
+    if len(self.node_results[node]) >= self.tests_per_node:
+      self.nodes.remove(node)
+      self.scan_nodes = len(self.nodes)
+      self.nodes_to_mark = self.scan_nodes*self.tests_per_node
+      plog("INFO", "Removed node "+node+". "+str(len(self.nodes))+" nodes remain")
+    else:
+      plog("DEBUG", "Keeping node "+node+". "+str(len(self.nodes))+" nodes remain. Tests: "+str(len(self.node_results[node]))+"/"+str(self.tests_per_node))
+
+     
+  def finished(self):
+    return not self.nodes
+   
+  def percent_complete(self):
+    return round(100.0 - (100.0*self.scan_nodes)/self.total_nodes, 1)
+
+  def _remove_false_positive_type(self, failset, failtype, max_rate):
+    if self.rescan_nodes: return
+    to_remove = copy.copy(failset)
+    for address in to_remove:
+      fails = len(failset[address])
+
+      if (100.0*fails)/(self.site_tests(address)) > max_rate:
+        plog("NOTICE", "Excessive "+self.proto+" "+failtype+" ("+str(fails)+"/"+str(self.site_tests(address))+") for "+address+". Removing.")
+        self.remove_target(address, failtype)
+
+  def remove_false_positives(self):
+    if self.rescan_nodes: 
+      plog("INFO", "Not removing false positives for rescan of "+self.__class__.__name__)
+    else:
+      plog("INFO", "Removing false positives for "+self.__class__.__name__)
+    self._remove_false_positive_type(self.exit_fails,
+                                     FALSEPOSITIVE_DYNAMIC_TOR,
+                                     max_exit_fail_pct)
+    self._remove_false_positive_type(self.dynamic_fails,
+                                     FALSEPOSITIVE_DYNAMIC,
+                                     max_dynamic_fail_pct)
+    self._remove_false_positive_type(self.connect_fails,
+                                     FALSEPOSITIVE_DEADSITE,
+                                     max_connect_fail_pct)
+
+  def _reset(self):
+    self.results = []
+    self.targets = []
+    self.tests_run = 0
+    self.nodes_marked = 0
+    self.run_start = time.time()
+    # These are indexed by idhex
+    self.connect_fails_per_exit = {}
+    self.timeout_fails_per_exit = {}
+    self.dns_fails_per_exit = {}
+    self.node_results = {}
+    # These are indexed by site url:
+    self.connect_fails = {}
+    self.exit_fails = {}
+    self.successes = {}
+    self.dynamic_fails = {}
+ 
+  def rewind(self):
+    self._reset()
+    self.update_nodes()
+    self.targets = self.get_targets()
+    if not self.targets:
+      raise NoURLsFound("No URLS found for protocol "+self.proto)
+    if type(self.targets) == dict:
+      for subtype in self.targets.iterkeys():
+        targets = "\n\t".join(self.targets[subtype])
+        plog("INFO", "Using the following urls for "+self.proto+"/"+subtype+" scan:\n\t"+targets) 
+        
+    else:
+      targets = "\n\t".join(self.targets)
+      plog("INFO", "Using the following urls for "+self.proto+" scan:\n\t"+targets) 
+
+  def site_tests(self, site):
+    tot_cnt = 0
+    if site in self.successes:
+      tot_cnt += len(self.successes[site])
+    if site in self.exit_fails:
+      tot_cnt += len(self.exit_fails[site])
+    if site in self.dynamic_fails:
+      tot_cnt += len(self.dynamic_fails[site])
+    if site in self.connect_fails:
+      tot_cnt += len(self.connect_fails[site])
+    return tot_cnt
+
+  def register_success(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    #datahandler.saveResult(result)
+    if result.site in self.successes: 
+      self.successes[result.site].add(result.exit_node)
+    else: self.successes[result.site]=sets.Set([result.exit_node])
+
+    win_cnt = len(self.successes[result.site])
+    
+    plog("INFO", self.proto+" success at "+result.exit_node+". This makes "+str(win_cnt)+"/"+str(self.site_tests(result.site))+" node successes for "+result.site)
+
+  def _register_site_connect_failure(self, result): 
+    if self.rescan_nodes: result.from_rescan = True
+    self.results.append(result)
+    datahandler.saveResult(result)
+    if result.site in self.connect_fails:
+      self.connect_fails[result.site].add(result.exit_node)
+    else:
+      self.connect_fails[result.site] = sets.Set([result.exit_node])
+    
+    err_cnt = len(self.connect_fails[result.site])
+
+    plog("ERROR", self.proto+" connection fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
+
+  def register_connect_failure(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    if result.exit_node not in self.connect_fails_per_exit:
+      self.connect_fails_per_exit[result.exit_node] = 0
+    self.connect_fails_per_exit[result.exit_node] += 1
+
+    c_cnt = self.connect_fails_per_exit[result.exit_node]
+   
+    if c_cnt > num_connfails_per_node:
+      if result.extra_info:
+        result.extra_info = str(result.extra_info) + " count: "+str(c_cnt)
+      else: 
+        result.extra_info = str(c_cnt)
+      self._register_site_connect_failure(result)
+      del self.connect_fails_per_exit[result.exit_node]
+      return TEST_FAILURE
+    else:
+      plog("NOTICE", self.proto+" connect fail at "+result.exit_node+". This makes "+str(c_cnt)+" fails")
+      return TEST_INCONCLUSIVE
+
+  def register_dns_failure(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    if result.exit_node not in self.dns_fails_per_exit:
+      self.dns_fails_per_exit[result.exit_node] = 0
+    self.dns_fails_per_exit[result.exit_node] += 1
+
+    d_cnt = self.dns_fails_per_exit[result.exit_node]
+   
+    if d_cnt > num_dnsfails_per_node:
+      if result.extra_info:
+        result.extra_info = str(result.extra_info) + " count: "+str(d_cnt)
+      else: 
+        result.extra_info = str(d_cnt)
+      self._register_site_connect_failure(result)
+      del self.dns_fails_per_exit[result.exit_node]
+      return TEST_FAILURE
+    else:
+      plog("NOTICE", self.proto+" dns fail at "+result.exit_node+". This makes "+str(d_cnt)+" fails")
+      return TEST_INCONCLUSIVE
+
+  def register_timeout_failure(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    if result.exit_node not in self.timeout_fails_per_exit:
+      self.timeout_fails_per_exit[result.exit_node] = 0
+    self.timeout_fails_per_exit[result.exit_node] += 1
+
+    t_cnt = self.timeout_fails_per_exit[result.exit_node]
+   
+    if t_cnt > num_timeouts_per_node:
+      if result.extra_info:
+        result.extra_info = str(result.extra_info) + " count: "+str(t_cnt)
+      else: 
+        result.extra_info = str(t_cnt)
+      self._register_site_connect_failure(result)
+      del self.timeout_fails_per_exit[result.exit_node]
+      return TEST_FAILURE
+    else:
+      plog("NOTICE", self.proto+" timeout at "+result.exit_node+". This makes "+str(t_cnt)+" timeouts")
+      return TEST_INCONCLUSIVE
+
+  def register_exit_failure(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    datahandler.saveResult(result)
+    self.results.append(result)
+
+    if result.site in self.exit_fails: 
+      self.exit_fails[result.site].add(result.exit_node)
+    else: self.exit_fails[result.site] = sets.Set([result.exit_node])
+
+    err_cnt = len(self.exit_fails[result.site])
+
+    plog("ERROR", self.proto+" exit-only fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
+
+  def register_dynamic_failure(self, result):
+    if self.rescan_nodes: result.from_rescan = True
+    self.results.append(result)
+    datahandler.saveResult(result)
+    if result.site in self.dynamic_fails:
+      self.dynamic_fails[result.site].add(result.exit_node)
+    else:
+      self.dynamic_fails[result.site] = sets.Set([result.exit_node])
+
+    err_cnt = len(self.dynamic_fails[result.site])
+
+    plog("ERROR", self.proto+" dynamic fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
+
+
+class SearchBasedTest(Test):
+  def __init__(self, proto, port, wordlist_file):
+    self.wordlist_file = wordlist_file
+    Test.__init__(self, proto, port)
+
+  def rewind(self):
+    self.wordlist = load_wordlist(self.wordlist_file)
+    Test.rewind(self)
+
+  def _is_useable_url(self, url, valid_schemes=None, filetypes=None):
+    (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
+    if netloc.rfind(":") != -1:
+      # FIXME: %-encoding?
+      port = netloc[netloc.rfind(":")+1:]
+      try:
+        if int(port) != self.port:
+          plog("DEBUG", "Unusable port "+port+" in "+url)
+          return False
+      except:
+        traceback.print_exc()
+        plog("WARN", "Unparseable port "+port+" in "+url)
+        return False
+    if valid_schemes and scheme not in valid_schemes:
+      plog("DEBUG", "Unusable scheme "+scheme+" in "+url)
+      return False
+    if url in self.banned_targets:
+      plog("DEBUG", "Banned url "+url)
+      return False
+    if filetypes: # Must be checked last
+      for filetype in filetypes:
+        if url[-len(filetype):] == filetype:
+          return True
+      plog("DEBUG", "Bad filetype for "+url)
+      return False
+    return True
+
+  def get_search_urls(self, protocol='any', results_per_type=10, host_only=False, filetypes=['any'], search_mode=default_search_mode):
+    ''' 
+    construct a list of urls based on the wordlist, filetypes and protocol. 
+    '''
+    plog('INFO', 'Searching google for relevant sites...')
+  
+    urllist = Set([])
+    for filetype in filetypes:
+      type_urls = Set([])
+  
+      while len(type_urls) < results_per_type:
+        query = random.choice(self.wordlist)
+        if filetype != 'any':
+          query += " "+search_mode["filetype"]+filetype
+        if protocol != 'any' and search_mode["inurl"]:
+          query += " "+search_mode["inurl"]+protocol # this isn't too reliable, but we'll re-filter results later
+        #query += '&num=' + `g_results_per_page` 
+  
+        # search google for relevant pages
+        # note: google only accepts requests from idenitified browsers
+        host = search_mode["host"]
+        params = urllib.urlencode({search_mode["query"] : query})
+        search_path = '/search' + '?' + params
+        search_url = "http://"+host+search_path
+         
+        plog("INFO", "Search url: "+search_url)
+        try:
+          if search_mode["useragent"]:
+            (code, resp_headers, new_cookies, mime_type, content) = http_request(search_url, search_cookies)
+          else:
+            headers = filter(lambda h: h[0] != "User-Agent", 
+                             copy.copy(firefox_headers))
+            (code, resp_headers, new_cookies, mime_type, content) = http_request(search_url, search_cookies, headers)
+        except socket.gaierror:
+          plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
+          traceback.print_exc()
+          return list(urllist)
+        except:
+          plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
+          traceback.print_exc()
+          # Bloody hack just to run some tests overnight
+          return [protocol+"://www.eff.org", protocol+"://www.fastmail.fm", protocol+"://www.torproject.org", protocol+"://secure.wikileaks.org/"]
+  
+        links = SoupStrainer('a')
+        try:
+          soup = TheChosenSoup(content, parseOnlyThese=links)
+        except Exception:
+          plog('ERROR', 'Soup-scraping of http://'+host+search_path+" failed")
+          traceback.print_exc()
+          print "Content is: "+str(content)
+          return [protocol+"://www.eff.org", protocol+"://www.fastmail.fm", protocol+"://www.torproject.org", protocol+"://secure.wikileaks.org/"] 
+        # get the links and do some additional filtering
+        for link in soup.findAll('a'):
+          skip = True
+          for a in link.attrs:
+            if a[0] == "class" and search_mode["class"] in a[1]:
+              skip = False
+              break
+          if skip: continue
+          if link.has_key(search_mode['realtgt']):
+            url = link[search_mode['realtgt']]
+          else:
+            url = link['href']
+          if protocol == 'any': prot_list = None
+          else: prot_list = [protocol]
+          if filetype == 'any': file_list = None
+          else: file_list = filetypes
+
+          if self._is_useable_url(url, prot_list, file_list):
+            if host_only:
+              # FIXME: %-encoding, @'s, etc?
+              host = urlparse.urlparse(url)[1]
+              # Have to check again here after parsing the url: 
+              if host not in self.banned_targets:
+                type_urls.add(host)
+            else:
+              type_urls.add(url)
+          else:
+            pass
+        plog("INFO", "Have "+str(len(type_urls))+"/"+str(results_per_type)+" google urls so far..") 
+
+      # make sure we don't get more urls than needed
+      if len(type_urls) > results_per_type:
+        type_urls = Set(random.sample(type_urls, results_per_type))
+      urllist.union_update(type_urls)
+       
+    return list(urllist)
+
+class HTTPTest(SearchBasedTest):
+  def __init__(self, wordlist, filetypes=scan_filetypes):
+    # FIXME: Handle http urls w/ non-80 ports..
+    SearchBasedTest.__init__(self, "HTTP", 80, wordlist)
+    self.fetch_targets = urls_per_filetype
+    self.httpcode_fails = {}
+    self.scan_filetypes = filetypes
+
+  def _reset(self):
+    SearchBasedTest._reset(self)
+    self.targets = {}
+
+  def rewind(self):
+    SearchBasedTest.rewind(self)
+    self.httpcode_fails = {}
+
+  def check_cookies(self):
+    # FIXME: This test is badly broken..
+    # We probably only want to do this on a per-url basis.. Then
+    # we can do the dynamic compare..
+    return TEST_SUCCESS
+    tor_cookies = "\n"
+    plain_cookies = "\n"
+    # FIXME: do we need to sort these? So far we have worse problems..
+    for cookie in self.tor_cookie_jar:
+      tor_cookies += "\t"+cookie.name+":"+cookie.domain+cookie.path+" discard="+str(cookie.discard)+"\n"
+    for cookie in self.cookie_jar:
+      plain_cookies += "\t"+cookie.name+":"+cookie.domain+cookie.path+" discard="+str(cookie.discard)+"\n"
+    if tor_cookies != plain_cookies:
+      exit_node = scanhdlr.get_exit_node()
+      plog("ERROR", "Cookie mismatch at "+exit_node+":\nTor Cookies:"+tor_cookies+"\nPlain Cookies:\n"+plain_cookies)
+      result = CookieTestResult(self.node_map[exit_node[1:]],
+                          TEST_FAILURE, FAILURE_COOKIEMISMATCH, plain_cookies, 
+                          tor_cookies)
+      if self.rescan_nodes: result.from_rescan = True
+      self.results.append(result)
+      datahandler.saveResult(result)
+      return TEST_FAILURE
+    return TEST_SUCCESS
+
+
+  def run_test(self):
+    # A single test should have a single cookie jar
+    self.tor_cookie_jar = cookielib.MozillaCookieJar()
+    self.cookie_jar = cookielib.MozillaCookieJar()
+    self.headers = copy.copy(firefox_headers)
+   
+    self.tests_run += 1
+
+    n_tests = random.choice(xrange(1,len(self.targets)+1))
+    filetypes = random.sample(self.targets.keys(), n_tests)
+    
+    plog("INFO", "HTTPTest decided to fetch "+str(n_tests)+" urls of types: "+str(filetypes))
+
+    n_success = n_fail = n_inconclusive = 0 
+    for ftype in filetypes:
+      # FIXME: Set referrer to random or none for each of these
+      address = random.choice(self.targets[ftype])
+      result = self.check_http(address)
+      if result == TEST_INCONCLUSIVE: n_inconclusive += 1
+      if result == TEST_FAILURE: n_fail += 1
+      if result == TEST_SUCCESS: n_success += 1
+
+    # Cookie jars contain locks and can't be pickled. Clear them away.
+    self.tor_cookie_jar = None
+    self.cookie_jar = None
+  
+    if n_fail: return TEST_FAILURE
+    elif n_inconclusive > 2*n_success: # > 66% inconclusive -> redo
+      return TEST_INCONCLUSIVE
+    else:
+      return TEST_SUCCESS 
+
+  def _remove_target_addr(self, target):
+    for ftype in self.targets:
+      if target in self.targets[ftype]: self.targets[ftype].remove(target)
+
+  def remove_target(self, address, reason):
+    SearchBasedTest.remove_target(self, address, reason)
+    if address in self.httpcode_fails: del self.httpcode_fails[address]
+
+  def refill_targets(self):
+    for ftype in self.targets:
+      if len(self.targets[ftype]) < self.fetch_targets:
+        plog("NOTICE", self.proto+" scanner short on "+ftype+" targets. Adding more")
+        raw_urls = self.get_search_urls('http', self.fetch_targets, 
+                                        filetypes=[ftype])
+        self.targets[ftype].extend(raw_urls)
+
+    
+  def get_targets(self):
+    raw_urls = self.get_search_urls('http', self.fetch_targets, 
+                                     filetypes=self.scan_filetypes)
+    urls = {} 
+    # Slow, but meh..
+    for ftype in self.scan_filetypes: urls[ftype] = []
+    for url in raw_urls:
+      for ftype in self.scan_filetypes:
+        if url[-len(ftype):] == ftype:
+          urls[ftype].append(url)
+    return urls     
+
+  def remove_false_positives(self):
+    SearchBasedTest.remove_false_positives(self)
+    self._remove_false_positive_type(self.httpcode_fails,
+                                     FALSEPOSITIVE_HTTPERRORS,
+                                     max_httpcode_fail_pct)
+  def site_tests(self, site):
+    tot_cnt = SearchBasedTest.site_tests(self, site) 
+    if site in self.httpcode_fails:
+      tot_cnt += len(self.httpcode_fails[site])
+    return tot_cnt
+    
+  def register_http_failure(self, result): # XXX: Currently deadcode
+    if self.rescan_nodes: result.from_rescan = True
+    self.results.append(result)
+    datahandler.saveResult(result)
+    if result.site in self.httpcode_fails:
+      self.httpcode_fails[result.site].add(result.exit_node)
+    else:
+      self.httpcode_fails[result.site] = sets.Set([result.exit_node])
+    
+    err_cnt = len(self.httpcode_fails[result.site])
+
+    plog("ERROR", self.proto+" http error code fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
+    
+
+  def check_http_nodynamic(self, address, nocontent=False):
+    # TODO: use nocontent to cause us to not load content into memory.
+    # This will require refactoring http_response though.
+    ''' check whether a http connection to a given address is molested '''
+
+    # an address representation acceptable for a filename 
+    address_file = DataHandler.safeFilename(address[7:])
+    content_prefix = http_content_dir+address_file
+    
+    # Keep a copy of the cookie jar before mods for refetch or
+    # to restore on errors that cancel a fetch
+    orig_cookie_jar = cookielib.MozillaCookieJar()
+    for cookie in self.cookie_jar: orig_cookie_jar.set_cookie(cookie)
+    orig_tor_cookie_jar = cookielib.MozillaCookieJar()
+    for cookie in self.tor_cookie_jar: orig_tor_cookie_jar.set_cookie(cookie)
+
+    try:
+      # Load content from disk, md5
+      content_file = open(content_prefix+'.content', 'r')
+      sha1sum = sha.sha()
+      buf = content_file.read(4096)
+      while buf:
+        sha1sum.update(buf)
+        buf = content_file.read(4096)
+      content_file.close()
+      
+      added_cookie_jar = cookielib.MozillaCookieJar()
+      added_cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
+      self.cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
+
+      headerdiffer = SnakePickler.load(content_prefix+'.headerdiff')
+
+      content = None
+      mime_type = None 
+
+    except IOError:
+      (code, resp_headers, new_cookies, mime_type, content) = http_request(address, self.cookie_jar, self.headers)
+
+      if code - (code % 100) != 200:
+        plog("NOTICE", "Non-tor HTTP error "+str(code)+" fetching content for "+address)
+        # Just remove it
+        self.remove_target(address, FALSEPOSITIVE_HTTPERRORS)
+        # Restore cookie jars
+        self.cookie_jar = orig_cookie_jar
+        self.tor_cookie_jar = orig_tor_cookie_jar
+        return TEST_INCONCLUSIVE
+
+      if not content:
+        plog("WARN", "Failed to direct load "+address)
+        # Just remove it
+        self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
+        # Restore cookie jar
+        self.cookie_jar = orig_cookie_jar
+        self.tor_cookie_jar = orig_tor_cookie_jar
+        return TEST_INCONCLUSIVE 
+      sha1sum = sha.sha(content)
+
+      content_file = open(content_prefix+'.content', 'w')
+      content_file.write(content)
+      content_file.close()
+      
+      headerdiffer = HeaderDiffer(resp_headers)
+      SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
+      
+      # Need to do set subtraction and only save new cookies.. 
+      # or extract/make_cookies
+      added_cookie_jar = cookielib.MozillaCookieJar()
+      for cookie in new_cookies: added_cookie_jar.set_cookie(cookie)
+      try:
+        added_cookie_jar.save(content_prefix+'.cookies', ignore_discard=True)
+      except:
+        traceback.print_exc()
+        plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
+
+    except TypeError, e:
+      plog('ERROR', 'Failed obtaining the shasum for ' + address)
+      plog('ERROR', e)
+      # Restore cookie jars
+      self.cookie_jar = orig_cookie_jar
+      self.tor_cookie_jar = orig_tor_cookie_jar
+      return TEST_INCONCLUSIVE
+
+    defaultsocket = socket.socket
+    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+    socket.socket = socks.socksocket
+
+    (pcode, presp_headers, pnew_cookies, pmime_type, pcontent) = http_request(address, self.tor_cookie_jar, self.headers)
+    psha1sum = sha.sha(pcontent)
+
+    # reset the connection to direct
+    socket.socket = defaultsocket
+
+    exit_node = scanhdlr.get_exit_node()
+    if exit_node == 0 or exit_node == '0' or not exit_node:
+      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
+      result = HttpTestResult(None, 
+                              address, TEST_INCONCLUSIVE, INCONCLUSIVE_NOEXIT)
+      if self.rescan_nodes: result.from_rescan = True
+      self.results.append(result)
+      datahandler.saveResult(result)
+
+      # Restore cookie jars
+      self.cookie_jar = orig_cookie_jar
+      self.tor_cookie_jar = orig_tor_cookie_jar
+      return TEST_INCONCLUSIVE
+
+    if pcode - (pcode % 100) != 200:
+      plog("NOTICE", exit_node+" had error "+str(pcode)+" fetching content for "+address)
+      # Restore cookie jars
+      # XXX: This is odd and possibly wrong for the refetch
+      self.cookie_jar = orig_cookie_jar
+      self.tor_cookie_jar = orig_tor_cookie_jar
+      BindingSocket.bind_to = refetch_ip
+      (code_new, resp_headers_new, new_cookies_new, mime_type_new, content_new) = http_request(address, orig_tor_cookie_jar, self.headers)
+      BindingSocket.bind_to = None
+      
+      if code_new == pcode:
+        plog("NOTICE", "Non-tor HTTP error "+str(code_new)+" fetching content for "+address)
+        # Just remove it
+        self.remove_target(address, FALSEPOSITIVE_HTTPERRORS)
+        return TEST_INCONCLUSIVE 
+
+      if pcode < 0 and type(pcode) == float:
+        if pcode == -1: # "General socks error"
+          fail_reason = FAILURE_CONNERROR
+        elif pcode == -2: # "connection not allowed aka ExitPolicy
+          fail_reason = FAILURE_EXITPOLICY
+        elif pcode == -3: # "Net Unreach" ??
+          fail_reason = FAILURE_NETUNREACH
+        elif pcode == -4: # "Host Unreach" aka RESOLVEFAILED
+          fail_reason = FAILURE_HOSTUNREACH
+          result = HttpTestResult(self.node_map[exit_node[1:]],
+                                 address, TEST_FAILURE, fail_reason)
+          return self.register_dns_failure(result)
+        elif pcode == -5: # Connection refused
+          fail_reason = FAILURE_CONNREFUSED
+          result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_FAILURE, fail_reason)
+          self.register_exit_failure(result)
+          return TEST_FAILURE
+        elif pcode == -6: # timeout
+          fail_reason = FAILURE_TIMEOUT
+          result = HttpTestResult(self.node_map[exit_node[1:]],
+                                 address, TEST_FAILURE, fail_reason)
+          return self.register_timeout_failure(result)
+        elif pcode == -13:
+          fail_reason = FAILURE_NOEXITCONTENT
+          result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_FAILURE, fail_reason)
+          self.register_exit_failure(result)
+          return TEST_FAILURE
+        elif pcode == -23: 
+          fail_reason = FAILURE_URLERROR
+        else:
+          fail_reason = FAILURE_MISCEXCEPTION
+      else: 
+        fail_reason = FAILURE_BADHTTPCODE+str(pcode)
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                            address, TEST_FAILURE, fail_reason)
+      result.extra_info = str(pcontent)
+      self.register_connect_failure(result)
+      return TEST_FAILURE
+
+    # if we have no content, we had a connection error
+    if pcontent == "":
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_FAILURE, FAILURE_NOEXITCONTENT)
+      self.register_exit_failure(result)
+      # Restore cookie jars
+      self.cookie_jar = orig_cookie_jar
+      self.tor_cookie_jar = orig_tor_cookie_jar
+      return TEST_FAILURE
+
+    hdiffs = headerdiffer.show_differences(presp_headers)
+    if hdiffs:
+      plog("NOTICE", "Header differences for "+address+": \n"+hdiffs)
+
+    # compare the content
+    # if content matches, everything is ok
+    if not hdiffs and psha1sum.hexdigest() == sha1sum.hexdigest():
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_SUCCESS)
+      self.register_success(result)
+      return TEST_SUCCESS
+
+    # Check for a simple truncation failure, which seems
+    # common with many nodes
+    if not content and not nocontent:
+      load_file = content_prefix+'.content'
+      content_file = open(load_file, 'r')
+      content = content_file.read()
+      content_file.close()
+    
+    if content and len(pcontent) < len(content):
+      if content[0:len(pcontent)] == pcontent[0:len(pcontent)]:
+        failed_prefix = http_failed_dir+address_file
+        exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
+        exit_content_file.write(pcontent)
+        exit_content_file.close()
+        result = HttpTestResult(self.node_map[exit_node[1:]], 
+                                address, TEST_FAILURE, FAILURE_EXITTRUNCATION, 
+                                sha1sum.hexdigest(), psha1sum.hexdigest(), 
+                                content_prefix+".content",
+                                exit_content_file.name)
+        self.register_exit_failure(result)
+        # Restore cookie jars
+        self.cookie_jar = orig_cookie_jar
+        self.tor_cookie_jar = orig_tor_cookie_jar
+        return TEST_FAILURE
+
+    # if content doesnt match, update the direct content and use new cookies
+    # If we have alternate IPs to bind to on this box, use them?
+    # Sometimes pages have the client IP encoded in them..
+    # Also, use the Tor cookies, since those identifiers are
+    # probably embeded in the Tor page as well.
+    BindingSocket.bind_to = refetch_ip
+    (code_new, resp_headers_new, new_cookies_new, mime_type_new, content_new) = http_request(address, orig_tor_cookie_jar, self.headers)
+    BindingSocket.bind_to = None
+    
+    if not content_new:
+      plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_INCONCLUSIVE, 
+                              INCONCLUSIVE_NOLOCALCONTENT)
+      if self.rescan_nodes: result.from_rescan = True
+      self.results.append(result)
+      datahandler.saveResult(result)
+      return TEST_INCONCLUSIVE
+
+    headerdiffer.prune_differences(resp_headers_new)
+    hdiffs = headerdiffer.show_differences(presp_headers)
+
+    SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
+
+    sha1sum_new = sha.sha(content_new)
+
+    if sha1sum.hexdigest() != sha1sum_new.hexdigest():
+      # if content has changed outside of tor, update the saved file
+      os.rename(content_prefix+'.content', content_prefix+'.content-old')
+      new_content_file = open(content_prefix+'.content', 'w')
+      new_content_file.write(content_new)
+      new_content_file.close()
+
+    # Need to do set subtraction and only save new cookies.. 
+    # or extract/make_cookies
+    
+    self.cookie_jar = orig_cookie_jar
+    new_cookie_jar = cookielib.MozillaCookieJar()
+    for cookie in new_cookies_new: 
+      new_cookie_jar.set_cookie(cookie)
+      self.cookie_jar.set_cookie(cookie) # Update..
+    os.rename(content_prefix+'.cookies', content_prefix+'.cookies-old')
+    try:
+      new_cookie_jar.save(content_prefix+'.cookies', ignore_discard=True)
+    except:
+      traceback.print_exc()
+      plog("WARN", "Error saving cookies in "+str(new_cookie_jar)+" to "+content_prefix+".cookies")
+
+    if hdiffs:
+      # XXX: We probably should store the header differ + exit headers 
+      # for later comparison (ie if the header differ picks up more diffs)
+      plog("NOTICE", "Post-refetch header changes for "+address+": \n"+hdiffs)
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_FAILURE, FAILURE_HEADERCHANGE)
+      result.extra_info = hdiffs
+      self.register_dynamic_failure(result)
+      # Lets let the rest of the tests run too actually
+      #return TEST_FAILURE 
+
+    # compare the node content and the new content
+    # if it matches, everything is ok
+    if psha1sum.hexdigest() == sha1sum_new.hexdigest():
+      result = HttpTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_SUCCESS)
+      self.register_success(result)
+      return TEST_SUCCESS
+ 
+    if not content and not nocontent:
+      if sha1sum.hexdigest() != sha1sum_new.hexdigest():
+        load_file = content_prefix+'.content-old'
+      else:
+        load_file = content_prefix+'.content'
+      content_file = open(load_file, 'r')
+      content = content_file.read()
+      content_file.close()
+    
+    if not ((mime_type == mime_type_new or not mime_type) \
+               and mime_type_new == pmime_type):
+      if not mime_type: mime_type = "text/disk"
+      plog("WARN", "Mime type change: 1st: "+mime_type+", 2nd: "+mime_type_new+", Tor: "+pmime_type)
+      # TODO: If this actually happens, store a result.
+
+    # Dirty dirty dirty...
+    return (mime_type_new, pcontent, psha1sum, content, sha1sum, content_new, 
+            sha1sum_new, exit_node)
+
+  def check_http(self, address):
+    plog('INFO', 'Conducting an http test with destination ' + address)
+    ret = self.check_http_nodynamic(address)
+    if type(ret) == int:
+      return ret
+    return self._check_http_worker(address, ret) 
+
+  def _check_http_worker(self, address, http_ret):
+    (mime_type,pcontent,psha1sum,content,sha1sum,content_new,sha1sum_new,exit_node) = http_ret
+     
+    address_file = DataHandler.safeFilename(address[7:])
+    content_prefix = http_content_dir+address_file
+    failed_prefix = http_failed_dir+address_file
+
+    # compare the new and old content
+    # if they match, means the node has been changing the content
+    if sha1sum.hexdigest() == sha1sum_new.hexdigest():
+      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
+      exit_content_file.write(pcontent)
+      exit_content_file.close()
+
+      result = HttpTestResult(self.node_map[exit_node[1:]],
+                              address, TEST_FAILURE, FAILURE_EXITONLY, 
+                              sha1sum.hexdigest(), psha1sum.hexdigest(), 
+                              content_prefix+".content", exit_content_file.name)
+      self.register_exit_failure(result)
+      return TEST_FAILURE
+
+    exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'),'w')
+    exit_content_file.write(pcontent)
+    exit_content_file.close()
+
+    result = HttpTestResult(self.node_map[exit_node[1:]], 
+                            address, TEST_FAILURE, FAILURE_DYNAMIC, 
+                            sha1sum_new.hexdigest(), psha1sum.hexdigest(), 
+                            content_prefix+".content", exit_content_file.name, 
+                            content_prefix+'.content-old',
+                            sha1sum.hexdigest())
+    if self.rescan_nodes: result.from_rescan = True
+    self.results.append(result)
+    datahandler.saveResult(result)
+
+    # The HTTP Test should remove address immediately...
+    plog("WARN", "HTTP Test is removing dynamic URL "+address)
+    self.remove_target(address, FALSEPOSITIVE_DYNAMIC)
+    return TEST_FAILURE
+
+class HTMLTest(HTTPTest):
+  def __init__(self, wordlist, recurse_filetypes=scan_filetypes):
+    HTTPTest.__init__(self, wordlist, recurse_filetypes)
+    self.fetch_targets = num_html_urls
+    self.proto = "HTML"
+    self.recurse_filetypes = recurse_filetypes
+    self.fetch_queue = []
+   
+  def _reset(self):
+    HTTPTest._reset(self)
+    self.targets = [] # FIXME: Lame..
+    self.soupdiffer_files = {} # XXX: These two are now deprecated
+    self.jsdiffer_files = {}
+ 
+  def depickle_upgrade(self):
+    if self._pickle_revision < 2:
+      self.soupdiffer_files = {}
+      self.jsdiffer_files = {}
+    SearchBasedTest.depickle_upgrade(self)
+
+  def run_test(self):
+    # A single test should have a single cookie jar
+    self.tor_cookie_jar = cookielib.MozillaCookieJar()
+    self.cookie_jar = cookielib.MozillaCookieJar()
+    self.headers = copy.copy(firefox_headers)
+
+    use_referers = False
+    first_referer = None    
+    if random.randint(1,100) < referer_chance_pct:
+      use_referers = True
+      # FIXME: Hrmm.. May want to do this a bit better..
+      first_referer = random.choice(self.targets)
+      plog("INFO", "Chose random referer "+first_referer)
+    
+    self.tests_run += 1
+    address = random.choice(self.targets)
+    
+    # Keep a trail log for this test and check for loops
+    fetched = sets.Set([])
+
+    self.fetch_queue.append(("html", address, first_referer))
+    n_success = n_fail = n_inconclusive = 0 
+    while self.fetch_queue:
+      (test, url, referer) = self.fetch_queue.pop(0)
+      if url in fetched:
+        plog("INFO", "Already fetched "+url+", skipping")
+        continue
+      fetched.add(url)
+      if use_referers and referer: 
+        self.headers.append(('Referer', referer))
+      # Technically both html and js tests check and dispatch via mime types
+      # but I want to know when link tags lie
+      if test == "html" or test == "http": result = self.check_html(url)
+      elif test == "js": result = self.check_js(url)
+      elif test == "image":
+        accept_hdr = filter(lambda h: h[0] == "Accept", self.headers)[0]
+        orig_accept = accept_hdr[1]
+        accept_hdr[1] = image_accept_hdr
+        result = self.check_http(url)
+        accept_hdr[1] = orig_accept
+      else: 
+        plog("WARN", "Unknown test type: "+test+" for "+url)
+        result = TEST_SUCCESS
+      if result == TEST_INCONCLUSIVE: n_inconclusive += 1
+      if result == TEST_FAILURE: n_fail += 1
+      if result == TEST_SUCCESS: n_success += 1
+
+    # Need to clear because the cookiejars use locks...
+    self.tor_cookie_jar = None
+    self.cookie_jar = None
+
+    if n_fail: return TEST_FAILURE
+    elif 2*n_inconclusive > n_success: # > 33% inconclusive -> redo
+      return TEST_INCONCLUSIVE
+    else:
+      return TEST_SUCCESS 
+
+  # FIXME: This is pretty lame.. We should change how
+  # the HTTPTest stores URLs so we don't have to do this.
+  def _remove_target_addr(self, target):
+    Test._remove_target_addr(self, target)
+    if target in self.soupdiffer_files: del self.soupdiffer_files[target]
+    if target in self.jsdiffer_files: del self.jsdiffer_files[target]
+
+  def refill_targets(self):
+    Test.refill_targets(self)
+
+  def get_targets(self):
+    return self.get_search_urls('http', self.fetch_targets) 
+
+  def _add_recursive_targets(self, soup, orig_addr):
+    # Only pull at most one filetype from the list of 'a' links
+    targets = []
+    got_type = {}
+    found_favicon = False
+    # Hrmm, if we recursively strained only these tags, this might be faster
+    for tag in tags_to_recurse:
+      tags = soup.findAll(tag)
+      for t in tags:
+        #plog("DEBUG", "Got tag: "+str(t))
+        for a in t.attrs:
+          attr_name = a[0]
+          attr_tgt = a[1]
+          if attr_name in attrs_to_recurse:
+            if t.name in recurse_html:
+              targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
+            elif t.name in recurse_script:
+              if t.name == "link":
+                for a in t.attrs:
+                  a = map(lambda x: x.lower(), a)
+                  # Special case CSS and favicons
+                  if (a[0] == "type" and a[1] == "text/css") or \
+                   ((a[0] == "rel" or a[0] == "rev") and a[1] == "stylesheet"):
+                    plog("INFO", "Adding CSS of: "+str(t))
+                    targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
+                  elif (a[0] == "rel" or a[0] == "rev") and \
+                       ("shortcut" in a[1] or "icon" in a[1]):
+                    plog("INFO", "Adding favicon of: "+str(t))
+                    found_favicon = True
+                    targets.append(("image", urlparse.urljoin(orig_addr, attr_tgt)))
+                  elif a[0] == "type" and self.is_script(a[1], ""):
+                    plog("INFO", "Adding link script of: "+str(t))
+                    targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+              else:
+                plog("INFO", "Adding script tag of: "+str(t))
+                targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+            elif t.name in recurse_image:
+              plog("INFO", "Adding image tag of: "+str(t))
+              targets.append(("image", urlparse.urljoin(orig_addr, attr_tgt)))
+            elif t.name == 'a':
+              if attr_name == "href":
+                for f in self.recurse_filetypes:
+                  if f not in got_type and attr_tgt[-len(f):] == f:
+                    got_type[f] = 1
+                    targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
+            else:
+              targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
+    
+    if not found_favicon:
+      targets.insert(0, ("image", urlparse.urljoin(orig_addr, "/favicon.ico")))
+
+    loaded = sets.Set([])
+
+    for i in targets:
+      if i[1] in loaded: continue
+      loaded.add(i[1])
+      if self._is_useable_url(i[1], html_schemes):
+        plog("NOTICE", "Adding "+i[0]+" target: "+i[1])
+        self.fetch_queue.append((i[0], i[1], orig_addr))
+      else:
+        plog("NOTICE", "Skipping "+i[0]+" target: "+i[1])
+
+  def check_js(self, address):
+    plog('INFO', 'Conducting a js test with destination ' + address)
+
+    accept_hdr = filter(lambda h: h[0] == "Accept", self.headers)[0]
+    orig_accept = accept_hdr[1]
+    accept_hdr[1] = script_accept_hdr
+    ret = self.check_http_nodynamic(address)
+    accept_hdr[1] = orig_accept
+
+    if type(ret) == int:
+      return ret
+    return self._check_js_worker(address, ret)
+
+  def is_html(self, mime_type, content):
+    is_html = False
+    for type_match in html_mime_types:
+      if re.match(type_match, mime_type.lower()): 
+        is_html = True
+        break
+    return is_html
+ 
+  def is_script(self, mime_type, content):
+    is_script = False
+    for type_match in script_mime_types:
+      if re.match(type_match, mime_type.lower()): 
+        is_script = True
+        break
+    return is_script
+
+  def _check_js_worker(self, address, http_ret):
+    (mime_type, tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = http_ret
+
+    if not self.is_script(mime_type, orig_js):
+      plog("WARN", "Non-script mime type "+mime_type+" fed to JS test for "+address)
+     
+      if self.is_html(mime_type, orig_js):
+        return self._check_html_worker(address, http_ret)
+      else:
+        return self._check_http_worker(address, http_ret)
+
+    address_file = DataHandler.safeFilename(address[7:])
+    content_prefix = http_content_dir+address_file
+    failed_prefix = http_failed_dir+address_file
+
+    if os.path.exists(content_prefix+".jsdiff"):
+      plog("DEBUG", "Loading jsdiff for "+address)
+      jsdiff = SnakePickler.load(content_prefix+".jsdiff")
+    else:
+      plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
+      jsdiff = JSDiffer(orig_js)
+    
+    jsdiff.prune_differences(new_js)
+    SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
+
+    has_js_changes = jsdiff.contains_differences(tor_js)
+
+    if not has_js_changes:
+      result = JsTestResult(self.node_map[exit_node[1:]], 
+                            address, TEST_SUCCESS)
+      self.register_success(result)
+      return TEST_SUCCESS
+    else:
+      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'), 'w')
+      exit_content_file.write(tor_js)
+      exit_content_file.close()
+
+      result = JsTestResult(self.node_map[exit_node[1:]], 
+                             address, TEST_FAILURE, FAILURE_DYNAMIC, 
+                             content_prefix+".content", exit_content_file.name, 
+                             content_prefix+'.content-old',
+                             content_prefix+".jsdiff")
+      self.register_dynamic_failure(result)
+      return TEST_FAILURE
+
+  def check_html(self, address):
+    plog('INFO', 'Conducting an html test with destination ' + address)
+    ret = self.check_http_nodynamic(address)
+    
+    if type(ret) == int:
+      return ret
+
+    return self._check_html_worker(address, ret)
+
+  def _check_html_worker(self, address, http_ret):
+    (mime_type,tor_html,tsha,orig_html,osha,new_html,nsha,exit_node)=http_ret
+
+    if not self.is_html(mime_type, orig_html):
+      # XXX: Keep an eye on this logline.
+      plog("WARN", "Non-html mime type "+mime_type+" fed to HTML test for "+address)
+      if self.is_script(mime_type, orig_html):
+        return self._check_js_worker(address, http_ret)
+      else:
+        return self._check_http_worker(address, http_ret)
+
+    # an address representation acceptable for a filename 
+    address_file = DataHandler.safeFilename(address[7:])
+    content_prefix = http_content_dir+address_file
+    failed_prefix = http_failed_dir+address_file
+
+    orig_soup = FullyStrainedSoup(orig_html.decode('ascii', 'ignore'))
+    tor_soup = FullyStrainedSoup(tor_html.decode('ascii', 'ignore'))
+
+    # Also find recursive urls
+    recurse_elements = SoupStrainer(lambda name, attrs: 
+        name in tags_to_recurse and 
+       len(Set(map(lambda a: a[0], attrs)).intersection(Set(attrs_to_recurse))) > 0)
+    self._add_recursive_targets(TheChosenSoup(tor_html.decode('ascii',
+                                   'ignore'), recurse_elements), address) 
+
+    # compare the content
+    # if content matches, everything is ok
+    if str(orig_soup) == str(tor_soup):
+      plog("INFO", "Successful soup comparison after SHA1 fail for "+address+" via "+exit_node)
+      result = HtmlTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_SUCCESS)
+      self.register_success(result)
+
+      return TEST_SUCCESS
+
+    content_new = new_html.decode('ascii', 'ignore')
+    if not content_new:
+      plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
+      result = HtmlTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_INCONCLUSIVE, 
+                              INCONCLUSIVE_NOLOCALCONTENT)
+      if self.rescan_nodes: result.from_rescan = True
+      self.results.append(result)
+      datahandler.saveResult(result)
+      return TEST_INCONCLUSIVE
+
+    new_soup = FullyStrainedSoup(content_new)
+
+    # compare the new and old content
+    # if they match, means the node has been changing the content
+    if str(orig_soup) == str(new_soup):
+      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
+      exit_content_file.write(tor_html)
+      exit_content_file.close()
+
+      result = HtmlTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_FAILURE, FAILURE_EXITONLY, 
+                              content_prefix+".content", exit_content_file.name)
+      self.register_exit_failure(result)
+      return TEST_FAILURE
+
+    # Lets try getting just the tag differences
+    # 1. Take difference between old and new tags both ways
+    # 2. Make map of tags that change to their attributes
+    # 3. Compare list of changed tags for tor vs new and
+    #    see if any extra tags changed or if new attributes
+    #    were added to additional tags
+    if os.path.exists(content_prefix+".soupdiff"):
+      plog("DEBUG", "Loading soupdiff for "+address)
+      soupdiff = SnakePickler.load(content_prefix+".soupdiff")
+      soupdiff.prune_differences(new_soup)
+    else:
+      plog("DEBUG", "No soupdiff for "+address+". Creating+dumping")
+      soupdiff = SoupDiffer(orig_soup, new_soup)
+
+    SnakePickler.dump(soupdiff, content_prefix+".soupdiff")
+    
+    more_tags = soupdiff.show_changed_tags(tor_soup)     
+    more_attrs = soupdiff.show_changed_attrs(tor_soup)
+    more_content = soupdiff.show_changed_content(tor_soup)
+
+    # Verify all of our changed tags are present here 
+    if more_tags or more_attrs or (more_content and not soupdiff.content_changed):
+      false_positive = False
+      plog("NOTICE", "SoupDiffer finds differences for "+address)
+      plog("NOTICE", "New Tags:\n"+more_tags)
+      plog("NOTICE", "New Attrs:\n"+more_attrs)
+      if more_content and not soupdiff.content_changed:
+        plog("NOTICE", "New Content:\n"+more_content)
+    else:
+      plog("INFO", "SoupDiffer predicts false_positive")
+      false_positive = True
+
+    if false_positive:
+      if os.path.exists(content_prefix+".jsdiff"):
+        plog("DEBUG", "Loading jsdiff for "+address)
+        jsdiff = SnakePickler.load(content_prefix+".jsdiff")
+      else:
+        plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
+        jsdiff = JSSoupDiffer(orig_soup)
+      
+      jsdiff.prune_differences(new_soup)
+      SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
+
+      differences = jsdiff.show_differences(tor_soup)
+      false_positive = not differences
+      plog("INFO", "JSSoupDiffer predicts false_positive="+str(false_positive))
+      if not false_positive:
+        plog("NOTICE", "JSSoupDiffer finds differences: "+differences)
+
+    if false_positive:
+      plog("NOTICE", "False positive detected for dynamic change at "+address+" via "+exit_node)
+      result = HtmlTestResult(self.node_map[exit_node[1:]], 
+                              address, TEST_SUCCESS)
+      self.register_success(result)
+      return TEST_SUCCESS
+
+    exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'),'w')
+    exit_content_file.write(tor_html)
+    exit_content_file.close()
+ 
+    if os.path.exists(content_prefix+".jsdiff"):
+      jsdiff_file = content_prefix+".jsdiff"
+    else: jsdiff_file = None
+    if os.path.exists(content_prefix+".soupdiff"):
+      soupdiff_file = content_prefix+".soupdiff"
+    else: soupdiff_file = None
+
+    result = HtmlTestResult(self.node_map[exit_node[1:]], 
+                            address, TEST_FAILURE, FAILURE_DYNAMIC, 
+                            content_prefix+".content", exit_content_file.name, 
+                            content_prefix+'.content-old',
+                            soupdiff_file, jsdiff_file)
+    self.register_dynamic_failure(result)
+    return TEST_FAILURE
+    
+
+class SSLTest(SearchBasedTest):
+  def __init__(self, wordlist):
+    self.test_hosts = num_ssl_hosts
+    SearchBasedTest.__init__(self, "SSL", 443, wordlist)
+
+  def run_test(self):
+    self.tests_run += 1
+    return self.check_openssl(random.choice(self.targets))
+
+  def get_targets(self):
+    return self.get_search_urls('https', self.test_hosts, True, search_mode=google_search_mode) 
+
+  def ssl_request(self, address):
+    ''' initiate an ssl connection and return the server certificate '''
+    address=str(address) # Unicode hostnames not supported..
+     
+    # specify the context
+    ctx = SSL.Context(SSL.TLSv1_METHOD)
+    ctx.set_timeout(int(read_timeout))
+    ctx.set_verify_depth(1)
+
+    # ready the certificate request
+    request = crypto.X509Req()
+
+    # open an ssl connection
+    # FIXME: Hrmmm. handshake considerations
+    try:
+      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+      # SSL has its own timeouts handled above. Undo ours from BindingSocket
+      s.settimeout(None) 
+      c = SSL.Connection(ctx, s)
+      c.set_connect_state()
+      c.connect((address, 443)) # DNS OK.
+      c.send(crypto.dump_certificate_request(crypto.FILETYPE_PEM,request))
+      # return the cert
+      return (0, c.get_peer_certificate(), None)
+    except socket.timeout, e:
+      plog('WARN','Socket timeout for '+address+": "+str(e))
+      return (-6.0, None,  e.__class__.__name__+str(e))
+    except socket.error, e:
+      plog('WARN','An error occured while opening an ssl connection to '+address+": "+str(e))
+      return (-666.0, None,  e.__class__.__name__+str(e))
+    except socks.Socks5Error, e:
+      plog('WARN', 'A SOCKS5 error '+str(e.value[0])+' occured for '+address+": "+str(e))
+      return (-float(e.value[0]), None,  e.__class__.__name__+str(e))
+    except KeyboardInterrupt:
+      raise KeyboardInterrupt
+    except OpenSSL.crypto.Error, e:
+      traceback.print_exc()
+      return (-23.0, None, e.__class__.__name__+str(e)) 
+    except Exception, e:
+      plog('WARN', 'An unknown SSL error occured for '+address+': '+str(e))
+      traceback.print_exc()
+      return (-666.0, None,  e.__class__.__name__+str(e))
+
+  def get_resolved_ip(self, hostname):
+    # XXX: This is some extreme GIL abuse.. It may have race conditions
+    # on control prot shutdown.. but at that point it's game over for
+    # us anyways.
+    mappings = scanhdlr.c.get_address_mappings("cache")
+    ret = None
+    for m in mappings:
+      if m.from_addr == hostname:
+        if ret:
+          plog("WARN", "Multiple maps for "+hostname)
+        ret = m.to_addr
+    return ret
+
+  def _update_cert_list(self, ssl_domain, check_ips):
+    changed = False
+    for ip in check_ips:
+      #let's always check.
+      #if not ssl_domain.seen_ip(ip):
+      plog('INFO', 'Ssl connection to new ip '+ip+" for "+ssl_domain.domain)
+      (code, raw_cert, exc) = self.ssl_request(ip)
+      if not raw_cert:
+        plog('WARN', 'Error getting the correct cert for '+ssl_domain.domain+":"+ip+" "+str(code)+"("+str(exc)+")")
+        continue
+      try:
+        ssl_domain.add_cert(ip,
+             crypto.dump_certificate(crypto.FILETYPE_PEM, raw_cert))
+        changed = True # Always save new copy.
+      except Exception, e:
+        traceback.print_exc()
+        plog('WARN', 'Error dumping cert for '+ssl_domain.domain+":"+ip+" E:"+str(e))
+    return changed
+
+  def check_openssl(self, address):
+    ''' check whether an https connection to a given address is molested '''
+    plog('INFO', 'Conducting an ssl test with destination ' + address)
+
+    # an address representation acceptable for a filename 
+    address_file = DataHandler.safeFilename(address[8:])
+    ssl_file_name = ssl_certs_dir + address_file + '.ssl'
+
+    # load the original cert and compare
+    # if we don't have the original cert yet, get it
+    try:
+      ssl_domain = SnakePickler.load(ssl_file_name)
+    except IOError:
+      ssl_domain = SSLDomain(address)
+
+    check_ips = []
+    # Make 3 resolution attempts
+    for attempt in xrange(1,4):
+      try:
+        resolved = []
+        resolved = socket.getaddrinfo(address, 443)
+        break
+      except socket.gaierror:
+        plog("NOTICE", "Local resolution failure #"+str(attempt)+" for "+address)
+       
+    for res in resolved:
+      if res[0] == socket.AF_INET and res[2] == socket.IPPROTO_TCP:
+        check_ips.append(res[4][0])
+
+    if not check_ips:
+      plog("WARN", "Local resolution failure for "+address)
+      self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
+      return TEST_INCONCLUSIVE
+
+    if self._update_cert_list(ssl_domain, check_ips):
+      SnakePickler.dump(ssl_domain, ssl_file_name)
+
+    if not ssl_domain.cert_map:
+      plog('WARN', 'Error getting the correct cert for ' + address)
+      self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
+      return TEST_INCONCLUSIVE
+
+    if ssl_domain.cert_changed:
+      ssl_domain = SSLDomain(address)
+      plog('INFO', 'Fetching all new certs for '+address)
+      if self._update_cert_list(ssl_domain, check_ips):
+        SnakePickler.dump(ssl_domain, ssl_file_name)
+      if ssl_domain.cert_changed:
+        plog("NOTICE", "Fully dynamic certificate host "+address)
+
+        result = SSLTestResult("NoExit", "NotStored!", address, ssl_file_name, 
+                               TEST_INCONCLUSIVE,
+                               INCONCLUSIVE_DYNAMICSSL)
+        if self.rescan_nodes: result.from_rescan = True
+        datahandler.saveResult(result)
+        self.results.append(result)
+        self.remove_target(address, FALSEPOSITIVE_DYNAMIC)
+        return TEST_INCONCLUSIVE
+
+    if not ssl_domain.num_certs():
+        plog("NOTICE", "No non-tor certs available for "+address)
+        result = SSLTestResult("NoExit", "NoStored!", address, ssl_file_name, 
+                               TEST_INCONCLUSIVE,
+                               INCONCLUSIVE_NOLOCALCONTENT)
+        if self.rescan_nodes: result.from_rescan = True
+        datahandler.saveResult(result)
+        self.results.append(result)
+        self.remove_target(address, FALSEPOSITIVE_DEADSITE)
+        return TEST_INCONCLUSIVE
+
+    # get the cert via tor
+    defaultsocket = socket.socket
+    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+    socket.socket = socks.socksocket
+
+    (code, cert, exc) = self.ssl_request(address)
+
+    # reset the connection method back to direct
+    socket.socket = defaultsocket
+
+    exit_node = scanhdlr.get_exit_node()
+    if not exit_node or exit_node == '0':
+      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
+      result = SSLTestResult(None, 
+                              address, ssl_file_name, TEST_INCONCLUSIVE,
+                              INCONCLUSIVE_NOEXIT)
+      if self.rescan_nodes: result.from_rescan = True
+      self.results.append(result)
+      datahandler.saveResult(result)
+      return TEST_INCONCLUSIVE
+
+    if not cert:
+      if code < 0 and type(code) == float:
+        if code == -1: # "General socks error"
+          fail_reason = FAILURE_CONNERROR
+        elif code == -2: # "connection not allowed" aka ExitPolicy
+          fail_reason = FAILURE_EXITPOLICY
+        elif code == -3: # "Net Unreach" ??
+          fail_reason = FAILURE_NETUNREACH
+        elif code == -4: # "Host Unreach" aka RESOLVEFAILED
+          fail_reason = FAILURE_HOSTUNREACH
+          result = SSLTestResult(self.node_map[exit_node[1:]], address,
+                                ssl_file_name, TEST_FAILURE, fail_reason)
+          return self.register_dns_failure(result)
+        elif code == -5: # Connection refused
+          fail_reason = FAILURE_CONNREFUSED
+          result = SSLTestResult(self.node_map[exit_node[1:]],
+                       address, ssl_file_name, TEST_FAILURE, fail_reason)
+          self.extra_info=exc
+          self.register_exit_failure(result)
+          return TEST_FAILURE
+        elif code == -6: # timeout
+          fail_reason = FAILURE_TIMEOUT
+          result = SSLTestResult(self.node_map[exit_node[1:]], address,
+                                ssl_file_name, TEST_FAILURE, fail_reason)
+          return self.register_timeout_failure(result)
+        elif code == -13:
+          fail_reason = FAILURE_NOEXITCONTENT # shouldn't happen here
+          result = SSLTestResult(self.node_map[exit_node[1:]],
+                       address, ssl_file_name, TEST_FAILURE, fail_reason)
+          self.extra_info=exc
+          self.register_exit_failure(result)
+          return TEST_FAILURE
+        elif code == -23: 
+          fail_reason = FAILURE_CRYPTOERROR
+          result = SSLTestResult(self.node_map[exit_node[1:]],
+                       address, ssl_file_name, TEST_FAILURE, fail_reason)
+          self.extra_info=exc
+          self.register_exit_failure(result)
+          return TEST_FAILURE
+        else:
+          fail_reason = FAILURE_MISCEXCEPTION
+      else:
+          fail_reason = FAILURE_MISCEXCEPTION
+
+      result = SSLTestResult(self.node_map[exit_node[1:]], 
+                             address, ssl_file_name, TEST_FAILURE, fail_reason) 
+      result.extra_info = exc
+      self.register_connect_failure(result)
+      return TEST_FAILURE
+
+    try:
+      # get an easily comparable representation of the certs
+      cert_pem = crypto.dump_certificate(crypto.FILETYPE_PEM, cert)
+    except OpenSSL.crypto.Error, e:
+      result = SSLTestResult(self.node_map[exit_node[1:]],
+                   address, ssl_file_name, TEST_FAILURE, FAILURE_CRYPTOERROR)
+      self.extra_info=e.__class__.__name__+str(e)
+      self.register_exit_failure(result)
+      return TEST_FAILURE
+
+    # if certs match, everything is ok
+    if ssl_domain.seen_cert(cert_pem):
+      result = SSLTestResult(self.node_map[exit_node[1:]], 
+                             address, ssl_file_name, TEST_SUCCESS)
+      self.register_success(result)
+      return TEST_SUCCESS
+
+    # False positive case.. Can't help it if the cert rotates AND we have a
+    # failure... Need to prune all results for this cert and give up.
+    if ssl_domain.cert_rotates:
+      result = SSLTestResult(self.node_map[exit_node[1:]], 
+                             address, ssl_file_name, TEST_FAILURE, 
+                             FAILURE_DYNAMIC, self.get_resolved_ip(address), 
+                             cert_pem)
+      self.register_dynamic_failure(result)
+      return TEST_FAILURE
+
+    # if certs dont match, means the exit node has been messing with the cert
+    result = SSLTestResult(self.node_map[exit_node[1:]], 
+                           address, ssl_file_name, TEST_FAILURE,
+                           FAILURE_EXITONLY, self.get_resolved_ip(address), 
+                           cert_pem)
+    self.register_exit_failure(result)
+    return TEST_FAILURE
+
+class POP3STest(Test):
+  def __init__(self):
+    Test.__init__(self, "POP3S", 110)
+
+  def run_test(self):
+    self.tests_run += 1
+    return self.check_pop(random.choice(self.targets))
+
+  def get_targets(self):
+    return [] 
+
+  def check_pop(self, address, port=''):
+    ''' 
+    check whether a pop + tls connection to a given address is molested 
+    it is implied that the server reads/sends messages compliant with RFC1939 & RFC2449
+    '''
+
+    plog('INFO', 'Conducting a pop test with destination ' + address)
+
+    if not port:
+      port = 110
+
+    defaultsocket = socket.socket
+    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+    socket.socket = socks.socksocket
+
+    capabilities_ok = False
+    starttls_present = False
+    tls_started = None
+    tls_succeeded = None
+
+    try:
+      pop = Client(address, port)
+    
+      # read the server greeting
+      server_greeting = pop.readline()
+
+      # get the server capabilities
+      pop.writeline('CAPA')
+      capabilities = ''
+      while 1:
+        curr = pop.readline()
+        if '+OK' in curr:
+          capabilities_ok = True
+        elif curr == '.':
+          break
+        elif 'STLS' in curr:
+          starttls_present = True
+      
+      if not capabilities_ok:
+        return TEST_INCONCLUSIVE
+
+      # try to start tls negotiation
+      if starttls_present:
+        pop.writeline('STLS')
+
+      starttls_response = pop.readline()
+      starttls_started = '+OK' in starttls_response
+
+      # negotiate TLS and issue some request to feel good about it
+      # TODO check certs? 
+      ctx = SSL.Context(SSL.SSLv23_METHOD)
+      c = SSL.Connection(ctx, pop.sock)
+      c.set_connect_state()
+      c.do_handshake()
+      c.send('CAPA' + linebreak)
+      
+      while tls_succeeded == None:
+        line = ''
+        char = None
+        while char != '\n':
+          char = c.read(1)
+          if not char:
+            break
+          elif char == '.':
+            tls_succeeded = False
+          line += char
+
+        if '-ERR' in line:
+          tls_succeeded = False
+        elif '+OK' in line:
+          tls_succeeded = True
+        elif not line:
+          tls_succeeded = False
+
+    except socket.error, e: 
+      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except OpenSSL.SSL.SysCallError, e:
+      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+
+    # reset the connection to default
+    socket.socket = defaultsocket
+
+    # check whether the test was valid at all
+    exit_node = scanhdlr.get_exit_node()
+    if exit_node == 0 or exit_node == '0':
+      plog('INFO', 'We had no exit node to test, skipping to the next test.')
+      return TEST_INCONCLUSIVE
+
+    # do the same for the direct connection
+
+    capabilities_ok_d = False
+    starttls_present_d = False
+    tls_started_d = None
+    tls_succeeded_d = None
+
+    try:
+      pop = Client(address, port)
+    
+      # read the server greeting
+      server_greeting = pop.readline()
+
+      # get the server capabilities
+      pop.writeline('CAPA')
+      capabilities = ''
+      while 1:
+        curr = pop.readline()
+        if '+OK' in curr:
+          capabilities_ok_d = True
+        elif curr == '.':
+          break
+        elif 'STLS' in curr:
+          starttls_present_d = True
+      
+      if not capabilities_ok_d:
+        return TEST_INCONCLUSIVE
+
+      # try to start tls negotiation
+      if starttls_present_d:
+        pop.writeline('STLS')
+
+      starttls_started_d = '+OK' in starttls_response
+
+      # negotiate TLS, issue some request to feel good about it
+      ctx = SSL.Context(SSL.SSLv23_METHOD)
+      c = SSL.Connection(ctx, pop.sock)
+      c.set_connect_state()
+      c.do_handshake()
+      c.send('CAPA' + linebreak)
+      
+      while tls_succeeded_d == None:
+        line = ''
+        char = None
+        while char != '\n':
+          char = c.read(1)
+          if not char:
+            break
+          elif char == '.':
+            tls_succeeded_d = False
+          line += char
+
+        if '-ERR' in line:
+          tls_succeeded_d = False
+        elif '+OK' in line:
+          tls_succeeded_d = True
+        elif not line:
+          tls_succeeded_d = False
+
+    except socket.error, e: 
+      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except OpenSSL.SSL.SysCallError, e:
+      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+
+    # compare
+    if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
+        tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
+      result = POPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
+      datahandler.saveResult(result)
+      return TEST_FAILURE
+    
+    result = POPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
+    datahandler.saveResult(result)
+    return TEST_SUCCESS
+
+class SMTPSTest(Test):
+  def __init__(self):
+    Test.__init__(self, "SMTPS", 587)
+
+  def run_test(self):
+    self.tests_run += 1
+    return self.check_smtp(random.choice(self.targets))
+
+  def get_targets(self):
+    return [('smtp.gmail.com','587')]
+
+  def check_smtp(self, address, port=''):
+    ''' 
+    check whether smtp + tls connection to a given address is molested
+    this is done by going through the STARTTLS sequence and comparing server
+    responses for the direct and tor connections
+    '''
+
+    plog('INFO', 'Conducting an smtp test with destination ' + address)
+
+    defaultsocket = socket.socket
+    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+    socket.socket = socks.socksocket
+
+    ehlo1_reply = 0
+    has_starttls = 0
+    ehlo2_reply = 0
+
+    try:
+      s = smtplib.SMTP(address, port)
+      ehlo1_reply = s.ehlo()[0]
+      if ehlo1_reply != 250:
+        raise smtplib.SMTPException('First ehlo failed')
+      has_starttls = s.has_extn('starttls')
+      if not has_starttls:
+        raise smtplib.SMTPException('It seems the server doesn\'t support starttls')
+      s.starttls()
+      # TODO check certs?
+      ehlo2_reply = s.ehlo()[0]
+      if ehlo2_reply != 250:
+        raise smtplib.SMTPException('Second ehlo failed')
+    except socket.gaierror, e:
+      plog('WARN', 'A connection error occured while testing smtp at ' + address)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except smtplib.SMTPException, e:
+      plog('WARN','An error occured while testing smtp at ' + address)
+      plog('WARN', e)
+      return TEST_INCONCLUSIVE
+    # reset the connection method back to direct
+    socket.socket = defaultsocket 
+
+    # check whether the test was valid at all
+    exit_node = scanhdlr.get_exit_node()
+    if exit_node == 0 or exit_node == '0':
+      plog('INFO', 'We had no exit node to test, skipping to the next test.')
+      return TEST_INCONCLUSIVE
+
+    # now directly
+
+    ehlo1_reply_d = 0
+    has_starttls_d = 0
+    ehlo2_reply_d = 0
+
+    try:
+      s = smtplib.SMTP(address, port)
+      ehlo1_reply_d = s.ehlo()[0]
+      if ehlo1_reply != 250:
+        raise smtplib.SMTPException('First ehlo failed')
+      has_starttls_d = s.has_extn('starttls')
+      if not has_starttls_d:
+        raise smtplib.SMTPException('It seems that the server doesn\'t support starttls')
+      s.starttls()
+      ehlo2_reply_d = s.ehlo()[0]
+      if ehlo2_reply_d != 250:
+        raise smtplib.SMTPException('Second ehlo failed')
+    except socket.gaierror, e:
+      plog('WARN', 'A connection error occured while testing smtp at ' + address)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except smtplib.SMTPException, e:
+      plog('WARN', 'An error occurred while testing smtp at ' + address)
+      plog('WARN', e)
+      return TEST_INCONCLUSIVE
+
+    print ehlo1_reply, ehlo1_reply_d, has_starttls, has_starttls_d, ehlo2_reply, ehlo2_reply_d
+
+    # compare
+    if ehlo1_reply != ehlo1_reply_d or has_starttls != has_starttls_d or ehlo2_reply != ehlo2_reply_d:
+      result = SMTPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
+      datahandler.saveResult(result)
+      return TEST_FAILURE
+
+    result = SMTPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
+    datahandler.saveResult(result)
+    return TEST_SUCCESS
+
+
+class IMAPSTest(Test):
+  def __init__(self):
+    Test.__init__(self, "IMAPS", 143)
+
+  def run_test(self):
+    self.tests_run += 1
+    return self.check_imap(random.choice(self.targets))
+
+  def get_targets(self):
+    return []
+
+  def check_imap(self, address, port=''):
+    ''' 
+    check whether an imap + tls connection to a given address is molested 
+    it is implied that the server reads/sends messages compliant with RFC3501
+    ''' 
+    plog('INFO', 'Conducting an imap test with destination ' + address)
+
+    if not port:
+      port = 143
+
+    defaultsocket = socket.socket
+    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
+    socket.socket = socks.socksocket
+    
+    capabilities_ok = None
+    starttls_present = None
+    tls_started = None
+    tls_succeeded = None
+
+    try:
+      imap = Client(address, port)
+
+      # read server greeting
+      server_greeting = imap.readline()
+
+      # get server capabilities
+      imap.writeline('a001 CAPABILITY')
+      capabilities = imap.readline() # first line - list of capabilities
+      capabilities_ok = 'OK' in imap.readline() # second line - the request status
+    
+      if not capabilities_ok:
+         return TEST_INCONCLUSIVE
+
+      # check if starttls is present
+      starttls_present = 'STARTTLS' in capabilities
+
+      if starttls_present:
+        imap.writeline('a002 STARTTLS')
+        tls_started = 'OK' in imap.readline()
+
+      # negotiate TLS, issue a request to feel good about it
+      # TODO check the cert aswell ?
+      ctx = SSL.Context(SSL.SSLv23_METHOD)
+      c = SSL.Connection(ctx, imap.sock)
+      c.set_connect_state()
+      c.do_handshake()
+      c.send('a003 CAPABILITY' + linebreak)
+      
+      while tls_succeeded == None:
+        line = ''
+        char = None
+        while char != '\n':
+          char = c.read(1)
+          if not char:
+            break
+          line += char
+
+        if 'Error' in line or 'error' in line:
+          tls_succeeded = False
+        elif 'OK' in line:
+          tls_succeeded = True
+        elif not line:
+          tls_succeeded = False
+  
+    except socket.error, e: 
+      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except OpenSSL.SSL.SysCallError, e:
+      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    
+    socket.socket = defaultsocket 
+
+    # check whether the test was valid at all
+    exit_node = scanhdlr.get_exit_node()
+    if exit_node == 0 or exit_node == '0':
+      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
+      return TEST_INCONCLUSIVE
+
+    # do the same for the direct connection
+    capabilities_ok_d = None
+    starttls_present_d = None
+    tls_started_d = None
+    tls_succeeded_d = None
+
+    try:
+      imap = Client(address, port)
+
+      # read server greeting
+      server_greeting = imap.readline()
+
+      # get server capabilities
+      imap.writeline('a001 CAPABILITY')
+      capabilities = imap.readline() # first line - list of capabilities
+      capabilities_ok_d = 'OK' in imap.readline() # second line - the request status
+
+      if not capabilities_ok_d:
+        return TEST_INCONCLUSIVE
+
+      # check if starttls is present
+      starttls_present_d = 'STARTTLS' in capabilities
+
+      if starttls_present_d:
+        imap.writeline('a002 STARTTLS')
+        tls_started = 'OK' in imap.readline()
+
+      # negotiate TLS, issue some request to feel good about it
+      ctx = SSL.Context(SSL.SSLv23_METHOD)
+      c = SSL.Connection(ctx, imap.sock)
+      c.set_connect_state()
+      c.do_handshake()
+      c.send('a003 CAPABILITY' + linebreak)
+
+      while tls_succeeded_d == None:
+        line = ''
+        char = None
+        while char != '\n':
+          char = c.read(1)
+          if not char:
+            break
+          line += char
+
+        if 'Error' in line or 'error' in line:
+          tls_succeeded_d = False
+        elif 'OK' in line:
+          tls_succeeded_d = True
+        elif not line:
+          tls_succeeded_d = False
+
+    except socket.error, e: 
+      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+    except OpenSSL.SSL.SysCallError, e:
+      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
+      plog('WARN', e)
+      socket.socket = defaultsocket
+      return TEST_INCONCLUSIVE
+
+    # compare
+    if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
+      tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
+      result = IMAPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
+      datahandler.saveResult(result)
+      return TEST_FAILURE
+
+    result = IMAPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
+    datahandler.saveResult(result)
+    return TEST_SUCCESS
+
+class DNSTest(Test):
+  def check_dns(self, address):
+    ''' A basic comparison DNS test. Rather unreliable. '''
+    # TODO Spawns a lot of false positives (for ex. doesn't work for google.com). 
+    # TODO: This should be done passive like the DNSRebind test (possibly as
+    # part of it)
+    plog('INFO', 'Conducting a basic dns test for destination ' + address)
+
+    ip = tor_resolve(address)
+
+    # check whether the test was valid at all
+    exit_node = scanhdlr.get_exit_node()
+    if exit_node == 0 or exit_node == '0':
+      plog('INFO', 'We had no exit node to test, skipping to the next test.')
+      return TEST_SUCCESS
+
+    ips_d = Set([])
+    try:
+      results = socket.getaddrinfo(address,None)
+      for result in results:
+        ips_d.add(result[4][0])
+    except socket.herror, e:
+      plog('WARN', 'An error occured while performing a basic dns test')
+      plog('WARN', e)
+      return TEST_INCONCLUSIVE
+
+    if ip in ips_d:
+      result = DNSTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
+      return TEST_SUCCESS
+    else:
+      plog('ERROR', 'The basic DNS test suspects ' + exit_node + ' to be malicious.')
+      result = DNSTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
+      return TEST_FAILURE
+
+class SSHTest(Test):
+  def check_openssh(self, address):
+    ''' check whether an openssh connection to a given address is molested '''
+    # TODO
+    #ssh = pyssh.Ssh('username', 'host', 22)
+    #ssh.set_sshpath(pyssh.SSH_PATH)
+    #response = self.ssh.sendcmd('ls')
+    #print response
+
+    return 0 
+
+
+# a simple interface to handle a socket connection
+class Client:
+  def __init__(self, host, port):
+    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    self.sock.connect((host, port))
+    self.buffer = self.sock.makefile('rb')
+
+  def writeline(self, line):
+    self.sock.send(line + linebreak)
+
+  def readline(self):
+    response = self.buffer.readline()
+    if not response:
+      raise EOFError
+    elif response[-2:] == linebreak:
+      response = response[:-2]
+    elif response[-1:] in linebreak:
+      response = response[:-1]
+    return response 
+
+class DNSRebindScanner(EventHandler):
+  ''' 
+  A tor control event handler extending TorCtl.EventHandler 
+  Monitors for REMAP events (see check_dns_rebind())
+  '''
+  def __init__(self, mt, c):
+    EventHandler.__init__(self)
+    self.__mt = mt
+    c.set_event_handler(self)
+    c.set_events([TorCtl.EVENT_TYPE.STREAM], True)
+    self.c=c
+
+  def stream_status_event(self, event):
+    if event.status == 'REMAP':
+      octets = map(lambda x: int2bin(x).zfill(8), event.target_host.split('.'))
+      ipbin = ''.join(octets)
+      for network in ipv4_nonpublic:
+        if ipbin[:len(network)] == network:
+          handler = DataHandler()
+          node = self.__mt.get_exit_node()
+          plog("ERROR", "DNS Rebeind failure via "+node)
+
+          result = DNSRebindTestResult(self.__mt.node_manager.idhex_to_r(node), 
+                                       '', TEST_FAILURE)
+          handler.saveResult(result)
+    # TODO: This is currently handled via socks error codes,
+    # but stream events would give us more info...
+    #elif event.status == "FAILED" or event.status == "CLOSED":
+       # check remote_reason == "RESOLVEFAILED"
+       # getinfo.circuit_status()
+       # TODO: Check what we do in these detached cases..
+       #scanhdlr.name_to_idhex(exit)
+
+# some helpful methods
+
+def load_wordlist(file):
+  ''' load a list of strings from a file (which contains words separated by newlines) '''
+  plog('INFO', 'Loading the wordlist')
+  
+  wordlist = []
+  fh = None
+  try:
+    fh = open(file, 'r')
+  except IOError, e:
+    plog('ERROR', 'Reading the wordlist file failed.')
+    plog('ERROR', e)
+  
+  try:
+    for line in fh:
+      wordlist.append(line[:-1]) # get rid of the linebreaks
+  finally:
+    fh.close()
+
+  return wordlist
+
+
+def decompress_response_data(response):
+  encoding = None
+
+  # a reponse to a httplib.HTTPRequest 
+  if (response.__class__.__name__ == "HTTPResponse"):
+    encoding = response.getheader("Content-Encoding")
+  # a response to urllib2.urlopen()
+  elif (response.__class__.__name__ == "addinfourl"):
+    encoding = response.info().get("Content-Encoding")
+
+  tot_len = response.info().get("Content-Length")
+  if not tot_len:
+    tot_len = "0"
+
+  start = 0
+  data = ""
+  while True:
+    data_read = response.read(500) # Cells are 495 bytes..
+    if not start: start = time.time()
+    # TODO: if this doesn't work, check stream observer for 
+    # lack of progress.. or for a sign we should read..
+    len_read = len(data)
+    now = time.time()
+
+    plog("DEBUG", "Read "+str(len_read)+"/"+str(tot_len))
+    # Wait 5 seconds before counting data
+    if (now-start) > 5 and len_read/(now-start) < min_rate:
+      plog("WARN", "Minimum xfer rate not maintained. Aborting xfer")
+      return ""
+      
+    if not data_read:
+      break
+    data += data_read 
+ 
+  if encoding == 'gzip' or encoding == 'x-gzip':
+    return gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)).read()
+  elif encoding == 'deflate':
+    return StringIO.StringIO(zlib.decompress(data)).read()
+  else:
+    return data
+
+def tor_resolve(address):
+  ''' performs a DNS query explicitly via tor '''
+  return commands.getoutput("tor-resolve " + address)
+
+def int2bin(n):
+  '''
+  simple decimal -> binary conversion, needed for comparing IP addresses 
+  '''
+  n = int(n)
+  if n < 0:
+    raise ValueError, "Negative values are not accepted."
+  elif n == 0:
+    return '0'
+  else:
+    bin = ''
+    while n > 0:
+      bin += str(n % 2)
+      n = n >> 1
+    return bin[::-1]
+
+
+class NoURLsFound(Exception):
+  pass
+
+
+def cleanup(c, f):
+  plog("INFO", "Resetting __LeaveStreamsUnattached=0 and FetchUselessDescriptors="+f)
+  try:
+    c.set_option("__LeaveStreamsUnattached", "0")
+    c.set_option("FetchUselessDescriptors", f)
+  except TorCtl.TorCtlClosed:
+    pass
+
+def setup_handler(out_dir, cookie_file):
+  plog('INFO', 'Connecting to Tor at '+TorUtil.control_host+":"+str(TorUtil.control_port))
+  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+  s.connect((TorUtil.control_host,TorUtil.control_port))
+  c = PathSupport.Connection(s)
+  c.debug(file(out_dir+"/control.log", "w", buffering=0))
+  c.authenticate_cookie(file(cookie_file, "r"))
+  #f = c.get_option("__LeaveStreamsUnattached")[0]
+  h = ExitScanHandler(c, __selmgr)
+
+  c.set_event_handler(h)
+  #c.set_periodic_timer(2.0, "PULSE")
+
+  c.set_events([TorCtl.EVENT_TYPE.STREAM,
+          TorCtl.EVENT_TYPE.BW,
+          TorCtl.EVENT_TYPE.NEWCONSENSUS,
+          TorCtl.EVENT_TYPE.NEWDESC,
+          TorCtl.EVENT_TYPE.CIRC,
+          TorCtl.EVENT_TYPE.STREAM_BW], True)
+
+  c.set_option("__LeaveStreamsUnattached", "1")
+  f = c.get_option("FetchUselessDescriptors")[0][1]
+  c.set_option("FetchUselessDescriptors", "1")
+  atexit.register(cleanup, *(c, f))
+  return (c,h)
+
+
+# main logic
+def main(argv):
+  # make sure we have something to test for
+  if len(argv) < 2:
+    print ''
+    print 'Please provide at least one test option:'
+    print '--pernode <n>'
+    print '--resume [<n>]'
+    print '--rescan [<n>]'
+    print '--ssl'
+    print '--http'
+    print '--html'
+#    print '--ssh (doesn\'t work yet)'
+#    print '--smtp (~works)'
+#    print '--pop (~works)'
+#    print '--imap (~works)'
+    print '--dnsrebind (use with one or more of above tests)'
+    print '--policies'
+    print '--exit <exit>'
+    print ''
+    return
+
+  opts = ['ssl','rescan', 'pernode=', 'resume', 'html','http','ssh','smtp','pop','imap','dns','dnsrebind','policies','exit=']
+  flags, trailer = getopt.getopt(argv[1:], [], opts)
+  
+  # get specific test types
+  do_resume = False
+  do_rescan = ('--rescan','') in flags
+  do_ssl = ('--ssl','') in flags
+  do_http = ('--http','') in flags
+  do_html = ('--html','') in flags
+  #do_ssh = ('--ssh','') in flags
+  #do_smtp = ('--smtp','') in flags
+  #do_pop = ('--pop','') in flags
+  #do_imap = ('--imap','') in flags
+  do_dns_rebind = ('--dnsrebind','') in flags
+  do_consistency = ('--policies','') in flags
+
+  scan_exit=None
+  for flag in flags:
+    if flag[0] == "--exit":
+      scan_exit = flag[1]
+    if flag[0] == "--pernode":
+      global num_tests_per_node
+      num_tests_per_node = int(flag[1])
+    if flag[0] == "--rescan" and flag[1]:
+      global num_rescan_tests_per_node
+      num_rescan_tests_per_node = int(flag[1])
+    if flag[0] == "--resume":
+      do_resume = True
+      if flag[1]:
+        resume_run=int(flag[1])
+      else:
+        resume_run=-1
+
+  # Make logs go to disk so resumes are less painful
+  #TorUtil.logfile = open(log_file_name, "a")
+
+  # initiate the connection to tor
+  try:
+    global scanhdlr
+    # XXX: sync with tor somehow..
+    (c,scanhdlr) = setup_handler(out_dir, tor_dir+"/control_auth_cookie")
+  except Exception, e:
+    traceback.print_exc()
+    plog("WARN", "Can't connect to Tor: "+str(e))
+
+  global datahandler
+  datahandler = DataHandler()
+
+  # initiate the passive dns rebind attack monitor
+  if do_dns_rebind:
+    scanhdlr.check_dns_rebind()
+
+  # check for sketchy exit policies
+  if do_consistency:
+    scanhdlr.check_all_exits_port_consistency()
+
+  # maybe only the consistency test was required
+  if not (do_ssl or do_html or do_http):
+    plog('INFO', 'Done.')
+    return
+
+  # Load the cookie jar
+  global search_cookies
+  search_cookies = cookielib.LWPCookieJar()
+  if os.path.isfile(search_cookie_file):
+    search_cookies.load(search_cookie_file, ignore_discard=True)
+  search_cookies.__filename = search_cookie_file
+
+  tests = {}
+
+  if do_resume:
+    plog("NOTICE", "Resuming previous SoaT run")
+    if do_ssl:
+      tests["SSL"] = datahandler.loadTest("SSLTest", resume_run)
+
+    if do_http:
+      tests["HTTP"] = datahandler.loadTest("HTTPTest", resume_run)
+
+    if do_html:
+      tests["HTML"] = datahandler.loadTest("HTMLTest", resume_run)
+  
+  else:
+    if do_ssl:
+      tests["SSL"] = SSLTest(ssl_wordlist_file)
+
+    if do_http:
+      tests["HTTP"] = HTTPTest(filetype_wordlist_file)
+
+    if do_html:
+      tests["HTML"] = HTMLTest(html_wordlist_file)
+
+
+  # maybe no tests could be initialized
+  if not tests:
+    plog('INFO', 'Done.')
+    sys.exit(0)
+
+  # Make sure refetch_ip is valid rather than exploding mid-test
+  global refetch_ip
+  BindingSocket.bind_to = refetch_ip
+  try:
+    socket.socket()
+  except socket.error:
+    plog("WARN", "Cannot bind to "+refetch_ip+". Ignoring refetch_ip setting.")
+    refetch_ip = None
+  BindingSocket.bind_to = None
+ 
+  if do_rescan:
+    plog("NOTICE", "Loading rescan.")
+    for test in tests.itervalues():
+      test.load_rescan(TEST_FAILURE)
+
+  if not do_resume:
+    for test in tests.itervalues():
+      test.rewind()
+ 
+  if scan_exit:
+    plog("NOTICE", "Scanning only "+scan_exit)
+    scanhdlr.set_exit_node(scan_exit)
+    scanhdlr.new_exit()
+
+    while 1:
+      for test in tests.values():
+        result = test.run_test()
+        plog("INFO", test.proto+" test via "+scan_exit+" has result "+str(result))
+
+  # start testing
+  while 1:
+    avail_tests = tests.values()
+    if scanhdlr.has_new_nodes():
+      plog("INFO", "Got signal for node update.")
+      for test in avail_tests:
+        test.update_nodes()
+      plog("INFO", "Node update complete.")
+
+    # Get as much milage out of each exit as we safely can:
+    # Run a random subset of our tests in random order
+    n_tests = random.choice(xrange(1,len(avail_tests)+1))
+    
+    to_run = random.sample(avail_tests, n_tests)
+
+    common_nodes = None
+    # Do set intersection and reuse nodes for shared tests
+    for test in to_run:
+      if test.finished(): continue
+      if not common_nodes: common_nodes = copy.copy(test.nodes)
+      else: common_nodes &= test.nodes
+      scanhdlr._sanity_check(map(lambda id: test.node_map[id],
+                                             test.nodes))
+
+    if common_nodes:
+      current_exit_idhex = random.choice(list(common_nodes))
+      plog("DEBUG", "Chose to run "+str(n_tests)+" tests via "+current_exit_idhex+" (tests share "+str(len(common_nodes))+" exit nodes)")
+
+      scanhdlr.set_exit_node(current_exit_idhex)
+      scanhdlr.new_exit()
+      for test in to_run:
+        result = test.run_test()
+        if result != TEST_INCONCLUSIVE:
+          test.mark_chosen(current_exit_idhex, result)
+        datahandler.saveTest(test)
+        plog("INFO", test.proto+" test via "+current_exit_idhex+" has result "+str(result))
+        plog("INFO", test.proto+" attempts: "+str(test.tests_run)+".  Completed: "+str(test.total_nodes - test.scan_nodes)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
+    else:
+      plog("NOTICE", "No nodes in common between "+", ".join(map(lambda t: t.proto, to_run)))
+      for test in to_run:
+        if test.finished(): continue
+        current_exit = test.get_node()
+        scanhdlr.set_exit_node(current_exit.idhex)
+        scanhdlr.new_exit()
+        result = test.run_test()
+        if result != TEST_INCONCLUSIVE: 
+          test.mark_chosen(current_exit_idhex, result)
+        datahandler.saveTest(test)
+        plog("INFO", test.proto+" test via "+current_exit.idhex+" has result "+str(result))
+        plog("INFO", test.proto+" attempts: "+str(test.tests_run)+".  Completed: "+str(test.total_nodes - test.scan_nodes)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
+     
+    # Check each test for rewind 
+    for test in tests.itervalues():
+      if test.finished():
+        plog("NOTICE", test.proto+" test has finished all nodes.")
+        datahandler.saveTest(test)
+        test.remove_false_positives()
+        if not do_rescan and rescan_at_finish:
+          test.toggle_rescan()
+          test.rewind()
+        elif restart_at_finish:
+          test.rewind()
+    all_finished = True
+    for test in tests.itervalues():
+      if not test.finished():
+        all_finished = False
+    if all_finished:
+      plog("NOTICE", "All tests have finished. Exiting\n")
+      sys.exit(0)
+
+# initiate the program
+#
+if __name__ == '__main__':
+  try:
+    main(sys.argv)
+  except KeyboardInterrupt:
+    plog('INFO', "Ctrl + C was pressed. Exiting ... ")
+    traceback.print_exc()
+  except Exception, e:
+    plog('ERROR', "An unexpected error occured.")
+    traceback.print_exc()


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/soat.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:mergeinfo
   + 

Copied: torflow/trunk/NetworkScanners/ExitAuthority/soatstats.py (from rev 20213, torflow/trunk/NetworkScanners/soatstats.py)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/soatstats.py	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/soatstats.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+#
+# 2008 Aleksei Gorny, mentored by Mike Perry
+
+import dircache
+import operator
+import os
+import pickle
+import sys
+import time
+
+import sets
+from sets import Set
+
+import libsoat
+from libsoat import *
+
+sys.path.append("../")
+from TorCtl.TorUtil import *
+
+class ResultCount:
+  def __init__(self, type):
+    self.type = type
+    self.good = 0
+    self.bad = 0
+    self.inconclusive = 0
+
+class ResultNode:
+  def __init__(self, idhex):
+    self.total = ResultCount("All")
+    self.counts = {}
+    self.idhex = idhex 
+
+def main(argv):
+  dh = DataHandler()
+  data = dh.getAll()
+
+  reason_counts = {}
+  nodeResults = {}
+  tests = Set([])
+
+  total = len(data)
+
+  for result in data:
+    if result.exit_node in nodeResults:
+      rn = nodeResults[result.exit_node]
+    else:
+      rn = ResultNode(result.exit_node)
+      nodeResults[result.exit_node] = rn
+
+    tests.add(result.__class__.__name__) 
+    if result.__class__.__name__ not in rn.counts:
+      rn.counts[result.__class__.__name__] = ResultCount(result.__class__.__name__)
+
+    if result.status == TEST_SUCCESS:
+      rn.total.good += 1
+      rn.counts[result.__class__.__name__].good += 1
+    elif result.status == TEST_INCONCLUSIVE:
+      rn.total.inconclusive += 1
+      rn.counts[result.__class__.__name__].inconclusive += 1
+    elif result.status == TEST_FAILURE:
+      rn.total.bad += 1
+      rn.counts[result.__class__.__name__].bad += 1
+      if result.reason not in reason_counts:
+        reason_counts[result.reason] = 1
+      else:
+        reason_counts[result.reason] += 1
+    
+  # Sort by total counts, print out nodes with highest counts first
+  failed_nodes = nodeResults.values()
+  failed_nodes.sort(lambda x, y: cmp(y.total.bad, x.total.bad))
+
+  inconclusive_nodes = nodeResults.values()
+  inconclusive_nodes.sort(lambda x, y: cmp(y.total.inconclusive, x.total.inconclusive))
+
+  # Sort by individual test counts, print out nodes with highest counts first
+
+  failed_nodes_specific = {}
+  inconclusive_nodes_specific = {}
+  for test in tests:
+    tested = [node for node in nodeResults.values() if node.counts.get(test)]
+    failed_nodes_specific[test] = list(sorted(tested, lambda x, y: cmp(y.counts[test].bad, x.counts[test].bad)))
+    inconclusive_nodes_specific[test] = list(sorted(tested, lambda x, y: cmp(y.counts[test].inconclusive, x.counts[test].inconclusive)))
+
+  print "\nFailures"
+  for node in failed_nodes:
+    if node.total.bad != 0:
+      print `node.idhex` + "\t" + `node.total.bad`
+
+  #print "\nInconclusive test results"
+  #for node in inconclusive_nodes:
+  #  if node.total.inconclusive != 0:
+  #    print `node.idhex` + "\t" + `node.total.inconclusive`
+
+  for test in tests:
+    print "\n" + test[:(-6)] + " failures"
+    for node in failed_nodes_specific[test]:
+      if node.counts[test].bad != 0:
+        print `node.idhex` + "\t" + `node.counts[test].bad`
+
+  #for test in tests:
+  #  print "\n" + test[:(-6)] + " inconclusive results"
+  #  for node in inconclusive_nodes_specific[test]:
+  #    if node.counts[test].inconclusive != 0:
+  #      print `node.idhex` + "\t" + `node.counts[test].inconclusive`
+
+  print ""
+
+  reasons = sorted(reason_counts.iterkeys(), lambda x, y:
+cmp(reason_counts[x], reason_counts[y]))
+
+  for r in reasons:
+    print r+": "+str(reason_counts[r])
+
+if __name__ == "__main__":
+  main(sys.argv)


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/soatstats.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:mergeinfo
   + 

Copied: torflow/trunk/NetworkScanners/ExitAuthority/wordlist.txt (from rev 20213, torflow/trunk/NetworkScanners/wordlist.txt)
===================================================================
--- torflow/trunk/NetworkScanners/ExitAuthority/wordlist.txt	                        (rev 0)
+++ torflow/trunk/NetworkScanners/ExitAuthority/wordlist.txt	2009-08-13 19:34:12 UTC (rev 20279)
@@ -0,0 +1,30 @@
+document
+important
+download
+setup
+install
+plugin
+file
+program
+run
+microsoft
+windows
+xp
+installer
+slides
+presentation
+paper
+browser
+winscp
+vidalia+bundle
+putty
+torpark
+firefox+setup
+mozilla
+privoxy
+privacy
+extension
+firefox+extension
+example
+sample
+censorship


Property changes on: torflow/trunk/NetworkScanners/ExitAuthority/wordlist.txt
___________________________________________________________________
Added: svn:mergeinfo
   + 

Deleted: torflow/trunk/NetworkScanners/README.ExitScanning
===================================================================
--- torflow/trunk/NetworkScanners/README.ExitScanning	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/README.ExitScanning	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,209 +0,0 @@
-              How to run the Snakes on a Tor Exit Scanner
-
-
-
-I. Introduction
-
-The Snakes on a Tor Exit Scanner scans the Tor network for misbehaving
-and misconfigured exit nodes. It has several tests that it performs,
-including HTML, javascript, arbitrary HTTP, SSL and DNS scans. The
-mechanisms by which these scans operate will be covered in another
-document. This document concerns itself only with running the scanner.
-
-
-
-II. Prerequisites
-
-Python 2.4+
-Tor 0.2.1.13 (r18556 or later)
-Super Secret SoaT Sauce
-py-openssl/pyOpenSSL
-Bonus: Secondary external IP address
-
-Having a second external IP address will allow your scanner to filter
-out false positives for dynamic pages that arise due to pages encoding
-your IP address in documents.
-
-
-
-III. Setup
-
-A. Compiling Tor
-
-To run SoaT you will need Tor 0.2.1.13. SVN r18516 contains a timeout
-fix that is essential to scanning the network in any reasonable amount
-of time.
-
-It is also strongly recommended that you have a custom Tor instance that
-is devoted only to exit scanning, and is not performing any other
-function (including serving as a relay or a directory authority).
-
-
-B. Configuring SoaT
-
-To configure SoaT (and even to get it to run), you will need to obtain
-Super Secret SoaT Sauce from Mike Perry's Super Secret SoaT Sauce Stash.
-It contains the necessary pheromones you will need to enable you to
-properly hunt some motherfuckin snakes.
-
-Once you have the Sauce, you should copy it to soat_config.py and have a
-look at its contents. In particular, you'll want to change 'refetch_ip'
-to be set to your secondary IP address. If you don't have a secondary
-IP, set it to None.
-
-If you're feeling ambitious, you can edit soat_config.py to change the
-set of 'scan_filetypes' and increase 'max_content_size' to something
-large enough to support these filetypes. However, you should balance
-this with our more immediate need for the scanner to run quickly so that
-the code is exercised and can stabilize quickly.
-
-You'll also want to edit ./wordlist.txt and change its contents to be a
-smattering of random and/or commonly censored words. If you speak other
-languages (especially any that have unicode characters), using keywords
-from them would be especially useful for testing and scanning. Note that
-these queries WILL be issued in plaintext via non-Tor, and the resulting
-urls fetched via non-Tor as well, so bear that and your server's legal
-jurisdiction in mind when choosing keywords.
-
-You can also separate out the wordlist.txt file into three files by
-changing the soat_config.py settings 'filetype_wordlist_file',
-'filetype_wordlist_file', and 'filetype_wordlist_file'. This will allow
-you to use separate keywords for obtaining SSL, HTML, and Filetype
-urls. This can be useful if you believe it likely for an adversary to
-target only certain keywords/concepts/sites in a particular context.
-
-You can edit the contents of the wordlist files while SoaT runs. It will
-pick up the changes after it completes a full network scan with the old 
-list.
-
-
-IV. Running Tor, The Metatroller, and SoaT
-
-Once you have everything compiled and configured, you should be ready to
-run the pieces. You probably want to do this as a separate, unprivileged
-user.
-
-First, start up your custom Tor with the sample torrc provided in the
-TorFlow svn root:
-
-# ~/src/tor-trunk/src/or/tor -f ~/src/torflow-trunk/torrc >& tor.log &
-
-Then, start up the Metatroller:
-
-# ~/src/torflow-trunk/metatroller.py >& mt.log &
-
-Finally, start up SoaT:
-
-# ./soat.py --ssl --html --http --dnsrebind >& soat.log &
-
-
-If your machine is shared, you probably want to set a control port 
-password. The location for this for metatroller and SoaT is the 
-control_pass variable at the top of torflow-trunk/TorCtl/TorUtil.py.
-Of course you also have to set it in the custom torrc as well.
-
-
-V. Monitoring and Results
-
-A. Watching for Captcha Problems
-
-You'll need to keep an eye on the beginning of the soat.log to make sure
-it is actually retrieving urls from Google. Google's servers can
-periodically decide that you are not worthy to query them, especially if
-you restart soat several times in a row. If this happens, open up
-soat_config.py and change the line:
-
-default_search_mode = google_search_mode
-
-to
-
-default_search_mode = yahoo_search_mode
-
-and remove the --ssl from the soat command line until Google decides it
-hates you a little less (this usually takes less than a day). The SSL
-scanner is hardcoded to use google_search_mode regardless of the
-default_search_mode because Yahoo's "inurl:" modifier does not apply to
-the scheme of the url, which we need in order to obtain fresh https
-urls.
-
-It is possible changing that default_search_mode to yahoo_search_mode
-BEFORE Google starts to hate you while still using --ssl will allow you
-to restart soat more times than with just Google alone, but then if both
-Yahoo and Google begin to hate you, you can't scan at all.
-
-
-B. Handling Crashes
-
-At this stage in the game, your primary task will be to periodically
-check the scanner for exceptions and hangs. For that you'll just want
-to tail the soat.log file to make sure it is putting out recent loglines
-and is continuing to run. If there are any issues, please mail me your
-soat.log.
-
-If/When SoaT crashes, you should be able to resume it exactly where it
-left off with:
-
-# ./soat.py --resume --ssl --html --http --dnsrebind >& soat.log &
-
-Keeping the same options during a --resume is a Really Good Idea.
-
-Soat actually saves a snapshot to a unique name each time you run it
-without --resume, so you can suspend and resume arbitrary runs by
-specifying their number:
-
-# ls ./data/soat/
-# ./soat.py --resume 2 --ssl --html --http --dnsrebind >& soat.log &
-
-
-C. Handling Results
-
-As things stabilize, you'll want to begin grepping your soat.log for
-ERROR lines. These indicate serious scanning errors and content
-modifications. There will likely be false positives at first, and these
-will require you tar up your ./data directory and soat.log and send it
-to me to improve the filters for them:
-
-# tar -jcf soat-data.tbz2 ./data/soat ./soat.log
-
-If you're feeling adventurous, you can inspect the results yourself by
-running snakeinspector.py. Running it with no arguments will dump all
-failures to your screen in a semi-human readable format. You can add a
---verbose to get unified diffs of content modifications, and you can
-filter on specific Test Result types with --resultfilter, and on
-specific exit idhexes with --exit. Ex:
-
-# ./snakeinspector.py --verbose --exit 80972D30FE33CB8AD60726C5272AFCEBB05CD6F7
-   --resultfilter SSLTestResult 
-
-or just:
-
-# ./snakeinspector.py | less
-
-At some point in the future, I hope to have a script prepared that will
-mail false positives and actual results to me when you run it. Later
-still, soat will automatically mail these results to an email list we
-are all subscribed to as they happen.
-
-
-D. Verifying Results
-
-If you would like to verify a set of results, you can use the --rescan
-option of soat, which crawls your data directory and creates a list of
-nodes to scan that consist only of failures, and then scans those with
-fresh URLs:
-
-# ./soat.py --rescan --ssl --html --http --dnsrebind >& soat.log &
-
-Rescans can also be resumed with --resume should they fail.
-
-SoaT can also do a rescan at the end of every loop through the node
-list. This is governed by the rescan_at_finish soat_config option.
-
-Note that rescanning does not prune out geolocated URLs that differ
-across the majority of exit nodes. It can thus cause many more false
-positives to accumulate than a regular scan.
-
-
-
-Alright that covers the basics. Let's get those motherfuckin snakes off
-this motherfuckin Tor!

Deleted: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/libsoat.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,1077 +0,0 @@
-#!/usr/bin/python
-#
-# Common code to soat
-
-import operator
-import os
-import pickle
-import sys
-import time
-import traceback
-import difflib
-import re
-import copy
-import socket
-import struct
-sys.path.append("./libs")
-from OpenSSL import crypto
-from BeautifulSoup.BeautifulSoup import Tag, SoupStrainer
-
-import sets
-from sets import Set
-from soat_config import *
-
-sys.path.append("../")
-from TorCtl.TorUtil import *
-
-# Antlr stuff
-sys.path.append("./libs/jsparser/")
-import antlr3
-from JavaScriptParser import tokenNames as JSTokenNames
-from JavaScriptLexer import JavaScriptLexer
-from JavaScriptParser import JavaScriptParser
-
-class LoggingJSParser(JavaScriptParser):
-  def __init__(self, tokens):
-    JavaScriptParser.__init__(self, tokens)
-    self.parse_errors__ = []
-  def displayRecognitionError(self, tokens, e):
-    self.parse_errors__.append(e)
-    JavaScriptParser.displayRecognitionError(self, tokens, e)
-class LoggingJSLexer(JavaScriptLexer):
-  def __init__(self, tokens):
-    JavaScriptLexer.__init__(self, tokens)
-    self.lex_errors__ = []
-  def displayRecognitionError(self, tokens, e):
-    self.lex_errors__.append(e)
-    JavaScriptLexer.displayRecognitionError(self, tokens, e)
-
-# constants
-
-TEST_SUCCESS = 0
-TEST_INCONCLUSIVE = 1
-TEST_FAILURE = 2
-
-# Sorry, we sort of rely on the ordinal nature of the above constants
-RESULT_STRINGS = {TEST_SUCCESS:"Success", TEST_INCONCLUSIVE:"Inconclusive", TEST_FAILURE:"Failure"}
-RESULT_CODES=dict([v,k] for k,v in RESULT_STRINGS.iteritems())
-
-# Inconclusive reasons
-INCONCLUSIVE_NOLOCALCONTENT = "InconclusiveNoLocalContent"
-INCONCLUSIVE_DYNAMICSSL = "InconclusiveDynamicSSL"
-INCONCLUSIVE_TORBREAKAGE = "InconclusiveTorBreakage"
-INCONCLUSIVE_NOEXIT = "InconclusiveNoExit"
-
-# Failed reasons
-FAILURE_EXITONLY = "FailureExitOnly"
-FAILURE_DYNAMIC = "FailureDynamic" 
-FAILURE_COOKIEMISMATCH = "FailureCookieMismatch"
-FAILURE_BADHTTPCODE = "FailureBadHTTPCode"
-FAILURE_NOEXITCONTENT = "FailureNoExitContent"
-FAILURE_EXITTRUNCATION = "FailureExitTruncation"
-FAILURE_SOCKSERROR = "FailureSocksError"
-FAILURE_HOSTUNREACH = "FailureHostUnreach" # aka DNS issue
-FAILURE_NETUNREACH = "FailureNetUnreach"
-FAILURE_EXITPOLICY = "FailureExitPolicy"
-FAILURE_CONNREFUSED = "FailureConnRefused"
-FAILURE_CONNERROR = "FailureConnError"
-FAILURE_URLERROR = "FailureURLError"
-FAILURE_CRYPTOERROR = "FailureCryptoError"
-FAILURE_TIMEOUT = "FailureTimeout"
-FAILURE_HEADERCHANGE = "FailureHeaderChange"
-FAILURE_MISCEXCEPTION = "FailureMiscException"
-
-# False positive reasons
-FALSEPOSITIVE_HTTPERRORS = "FalsePositiveHTTPErrors"
-FALSEPOSITIVE_DYNAMIC = "FalsePositiveDynamic"
-FALSEPOSITIVE_DYNAMIC_TOR = "FalsePositiveDynamicTor"
-FALSEPOSITIVE_DEADSITE = "FalsePositiveDeadSite"
-
-# classes to use with pickle to dump test results into files
-
-class TestResult(object):
-  ''' Parent class for all test result classes '''
-  def __init__(self, exit_obj, site, status, reason=None):
-    if exit_obj:
-      self.exit_node = exit_obj.idhex
-      self.exit_name = exit_obj.nickname
-      self.exit_ip = exit_obj.ip
-      self.contact = exit_obj.contact
-    else:
-      self.exit_node = "[No Exit Used]"
-      self.exit_name = ""
-      self.exit_ip = 0
-      self.contact = "[No Exit Used]"
-    self.exit_obj = exit_obj
-    self.site = site
-    self.timestamp = time.time()
-    self.status = status
-    self.reason = reason
-    self.extra_info = None
-    self.false_positive=False
-    self.false_positive_reason="None"
-    self.verbose=0
-    self.from_rescan = False
-    self.filename=None
-    self._pickle_revision = 5
-
-  def depickle_upgrade(self):
-    if not "_pickle_revision" in self.__dict__: # upgrade to v0
-      self._pickle_revision = 0
-    if self._pickle_revision < 1:
-      self._pickle_revision = 1
-    if self._pickle_revision < 2:
-      self._pickle_revision = 2
-      self.exit_name = "NameNotStored!"
-    if self._pickle_revision < 3:
-      self._pickle_revision = 3
-      self.exit_ip = "\x00\x00\x00\x00"
-      self.exit_obj = None
-    if self._pickle_revision < 4:
-      self._pickle_revision = 4
-      self.contact = None
-    if self._pickle_revision < 5:
-      self._pickle_revision = 5
-      if type(self.exit_ip) == str or not self.exit_ip: self.exit_ip = 0
-
-
-  def _rebase(self, filename, new_data_root):
-    if not filename: return filename
-    filename = os.path.normpath(filename)
-    split_file = filename.split("/")
-    return os.path.normpath(os.path.join(new_data_root, *split_file[1:]))
-
-  def rebase(self, new_data_root):
-    self.filename = self._rebase(self.filename, new_data_root)
- 
-  def mark_false_positive(self, reason):
-    self.false_positive=True
-    self.false_positive_reason=reason
-
-  def move_file(self, file, to_dir):
-    if not file: return None
-    try:
-      basename = os.path.basename(file)
-      new_file = to_dir+basename
-      if not os.path.exists(file) and os.path.exists(new_file):
-        return new_file # Already moved by another test (ex: content file)
-      os.rename(file, new_file)
-      return new_file
-    except Exception, e:
-      traceback.print_exc()
-      plog("WARN", "Error moving "+file+" to "+to_dir)
-      return file
-
-  def __str__(self):
-    ret = self.__class__.__name__+" for "+self.site+"\n"
-    ret += " Time: "+time.ctime(self.timestamp)+"\n"
-    ret += " Exit: "+socket.inet_ntoa(struct.pack(">I",self.exit_ip))+" "+self.exit_node+" ("+self.exit_name+")\n"
-    ret += " Contact: "+str(self.contact)+"\n"  
-    ret += " "+str(RESULT_STRINGS[self.status])
-    if self.reason:
-      ret += " Reason: "+self.reason
-    if self.extra_info:
-      ret += "\n Extra info: "+self.extra_info 
-    if self.false_positive:
-      ret += "\n Removed as False Positive: "+self.false_positive_reason
-    if self.from_rescan:
-      ret += "\n From rescan: "+str(self.from_rescan)
-    ret += "\n"
-    return ret
-
-class SSLTestResult(TestResult):
-  ''' Represents the result of an openssl test '''
-  def __init__(self, exit_obj, ssl_site, ssl_file, status, 
-               reason=None, exit_ip=None, exit_cert_pem=None):
-    super(SSLTestResult, self).__init__(exit_obj, ssl_site, status, reason)
-    self.ssl_file = ssl_file
-    self.exit_cert = exit_cert_pem # Meh, not that much space
-    self.exit_ip = exit_ip
-    self.proto = "ssl"
-
-  def rebase(self, new_data_root):
-    self.ssl_file = self._rebase(self.ssl_file, new_data_root)
-
-  def mark_false_positive(self, reason):
-    TestResult.mark_false_positive(self, reason)
-    self.ssl_file=self.move_file(self.ssl_file, ssl_falsepositive_dir)
-
-  def _dump_cert(self, cert):
-    ret = ""
-    x509 = crypto.load_certificate(crypto.FILETYPE_PEM, cert)
-    ret += "Issuer: "+str(x509.get_issuer())+"\n"
-    ret += "Subject: "+str(x509.get_subject())+"\n"
-    return ret
-
-  def __str__(self):
-    ret = TestResult.__str__(self)
-    ssl_domain = SnakePickler.load(self.ssl_file)
-    ret += " Rotates: "+str(ssl_domain.cert_rotates)
-    ret += " Changed: "+str(ssl_domain.cert_changed)+"\n" 
-    if self.verbose:
-      if self.exit_cert:
-        for cert in ssl_domain.cert_map.iterkeys():
-          ret += "\nCert for "+ssl_domain.cert_map[cert]+":\n"
-          if self.verbose > 1: ret += cert
-          ret += self._dump_cert(cert)
-        if self.exit_ip: 
-          ret += "\nExit node's cert for "+self.exit_ip+":\n"
-        else:
-          ret += "\nExit node's cert:\n"
-        if self.verbose > 1: ret += self.exit_cert
-        ret += self._dump_cert(self.exit_cert)
-    return ret 
-
-class SSLDomain:
-  def __init__(self, domain):
-    self.domain = domain
-    self.cert_map = {}
-    self.ip_map = {}
-    self.cert_rotates = False
-    self.cert_changed = False
-
-  def depickle_upgrade(self):
-    pass
-
-  def add_cert(self, ip, cert_string):
-    if ip in self.ip_map and self.ip_map[ip] != cert_string:
-      plog("NOTICE", self.domain+" has changed certs.")
-      self.cert_changed = True
-    if len(self.cert_map) and cert_string not in self.cert_map:
-      plog("NOTICE", self.domain+" is rotating certs.")
-      self.cert_rotates = True
-    self.cert_map[cert_string] = ip
-    self.ip_map[ip] = cert_string
-    
-  def seen_cert(self, cert_string):
-    return cert_string in self.cert_map
-
-  def seen_ip(self, ip):
-    return ip in self.ip_map
-
-  def num_certs(self):
-    return len(self.cert_map)
-
-class HttpTestResult(TestResult):
-  ''' Represents the result of a http test '''
-  def __init__(self, exit_obj, website, status, reason=None, 
-               sha1sum=None, exit_sha1sum=None, content=None, 
-               content_exit=None, content_old=None, sha1sum_old=None):
-    super(HttpTestResult, self).__init__(exit_obj, website, status, reason)
-    self.proto = "http"
-    self.sha1sum = sha1sum
-    self.sha1sum_old = sha1sum_old
-    self.exit_sha1sum = exit_sha1sum
-    self.content = content
-    self.content_exit = content_exit
-    self.content_old = content_old
-
-  def rebase(self, new_data_root):
-    self.content = self._rebase(self.content, new_data_root)
-    self.content_exit = self._rebase(self.content_exit, new_data_root)
-    self.content_old = self._rebase(self.content_old, new_data_root)
-
-  def mark_false_positive(self, reason):
-    TestResult.mark_false_positive(self, reason)
-    self.content=self.move_file(self.content, http_falsepositive_dir)
-    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
-    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
-
-  def remove_files(self):
-    try: os.unlink(self.content)
-    except: pass
-    try: os.unlink(self.content_old)
-    except: pass
-    try: os.unlink(self.content_exit)
-    except: pass
-
-  def __str__(self):
-    ret = TestResult.__str__(self)
-    if self.content:
-      ret += " "+self.content+" (SHA1: "+self.sha1sum+")\n"
-    if self.content_old:
-      ret += " "+self.content_old+" (SHA1: "+self.sha1sum_old+")\n"
-    if self.content_exit:
-      ret += " "+self.content_exit+" (SHA1: "+self.exit_sha1sum+")\n"
-    return ret
-
-class CookieTestResult(TestResult):
-  def __init__(self, exit_obj, status, reason, plain_cookies, 
-               tor_cookies):
-    super(CookieTestResult, self).__init__(exit_obj, "cookies", status)
-    self.proto = "http"
-    self.reason = reason
-    self.tor_cookies = tor_cookies
-    self.plain_cookies = plain_cookies
-
-  def __str__(self):
-    ret = TestResult.__str__(self)
-    ret += " Plain Cookies:"+self.plain_cookies
-    ret += " Tor Cookies:"+self.tor_cookies
-    return ret
-
-class JsTestResult(TestResult):
-  ''' Represents the result of a JS test '''
-  def __init__(self, exit_obj, website, status, reason=None, 
-               content=None, content_exit=None, content_old=None,
-               jsdiffer=None):
-    super(JsTestResult, self).__init__(exit_obj, website, status, reason)
-    self.proto = "http"
-    self.content = content
-    self.content_exit = content_exit
-    self.content_old = content_old
-    self.jsdiffer = jsdiffer
-
-  def depickle_upgrade(self):
-    if not "_pickle_revision" in self.__dict__ or self._pickle_revision < 1:
-      self.jsdiffer = None
-    TestResult.depickle_upgrade(self)
-
-  def rebase(self, new_data_root):
-    self.content = self._rebase(self.content, new_data_root)
-    self.content_exit = self._rebase(self.content_exit, new_data_root)
-    self.content_old = self._rebase(self.content_old, new_data_root)
-    self.jsdiffer = self._rebase(self.jsdiffer, new_data_root)
-
-  def mark_false_positive(self, reason):
-    TestResult.mark_false_positive(self, reason)
-    self.content=self.move_file(self.content, http_falsepositive_dir)
-    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
-    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
-    self.jsdiffer=self.move_file(self.jsdiffer,http_falsepositive_dir)
-
-  def remove_files(self):
-    try: os.unlink(self.content)
-    except: pass
-    try: os.unlink(self.content_old)
-    except: pass
-    try: os.unlink(self.content_exit)
-    except: pass
-
-  def __str__(self):
-    ret = TestResult.__str__(self)
-    if self.verbose:
-      if self.content and self.content_old:
-        diff = difflib.unified_diff(open(self.content).read().split("\n"),
-                             open(self.content_old).read().split("\n"), 
-                             "Non-Tor1", "Non-Tor2",
-                             lineterm="")
-        for line in diff:
-          ret+=line+"\n"
-      if self.content and self.content_exit:
-        diff = difflib.unified_diff(open(self.content).read().split("\n"),
-                             open(self.content_exit).read().split("\n"), 
-                              "Non-Tor", "Exit",
-                              lineterm="")
-        for line in diff:
-          ret+=line+"\n"
-    else:
-      if self.content:
-        ret += " "+self.content+"\n"
-      if self.content_old:
-        ret += " "+self.content_old+"\n"
-      if self.content_exit:
-        ret += " "+self.content_exit+"\n"
-    return ret
-
-class HtmlTestResult(TestResult):
-  ''' Represents the result of a http test '''
-  def __init__(self, exit_obj, website, status, reason=None, 
-               content=None, content_exit=None, content_old=None, 
-               soupdiffer=None, jsdiffer=None):
-    super(HtmlTestResult, self).__init__(exit_obj, website, status, reason)
-    self.proto = "http"
-    self.content = content
-    self.content_exit = content_exit
-    self.content_old = content_old
-    self.soupdiffer = soupdiffer
-    self.jsdiffer = jsdiffer
-
-  def depickle_upgrade(self):
-    if not "_pickle_revision" in self.__dict__ or self._pickle_revision < 1:
-      self.soupdiffer = None
-      self.jsdiffer = None
-    TestResult.depickle_upgrade(self)
-
-  def rebase(self, new_data_root):
-    self.content = self._rebase(self.content, new_data_root)
-    self.content_exit = self._rebase(self.content_exit, new_data_root)
-    self.content_old = self._rebase(self.content_old, new_data_root)
-    self.soupdiffer = self._rebase(self.soupdiffer, new_data_root)
-    self.jsdiffer = self._rebase(self.jsdiffer, new_data_root)
-
-  def mark_false_positive(self, reason):
-    TestResult.mark_false_positive(self, reason)
-    self.content=self.move_file(self.content,http_falsepositive_dir)
-    self.content_old=self.move_file(self.content_old, http_falsepositive_dir)
-    self.content_exit=self.move_file(self.content_exit,http_falsepositive_dir)
-    self.soupdiffer=self.move_file(self.soupdiffer,http_falsepositive_dir)
-    self.jsdiffer=self.move_file(self.jsdiffer,http_falsepositive_dir)
-
-  def remove_files(self):
-    try: os.unlink(self.content)
-    except: pass
-    try: os.unlink(self.content_old)
-    except: pass
-    try: os.unlink(self.content_exit)
-    except: pass
-
-  def __str__(self):
-    ret = TestResult.__str__(self)
-    if self.verbose:
-      soup = old_soup = tor_soup = None
-      if self.content:
-        content = open(self.content).read().decode('ascii', 'ignore')
-        soup = FullyStrainedSoup(content)
-
-      if self.content_old:
-        content_old = open(self.content_old).read().decode('ascii', 'ignore')
-        old_soup = FullyStrainedSoup(content_old)
-
-      if self.content_exit:
-        content_exit = open(self.content_exit).read().decode('ascii', 'ignore')
-        tor_soup = FullyStrainedSoup(content_exit)
-
-      if self.verbose > 1:
-        ret += " Content: "+str(self.content)+"\n"
-        ret += " Content old: "+str(self.content_old)+"\n"
-        ret += " Exit: "+str(self.content_exit)+"\n"
-
-        if self.content and self.content_old:
-          tags = map(str, soup.findAll())
-          old_tags = map(str, old_soup.findAll())
-          diff = difflib.unified_diff(old_tags, tags, "Non-Tor1", "Non-Tor2",
-                                      lineterm="")
-          for line in diff:
-            ret+=line+"\n"
-
-        if self.content and self.content_exit:
-          tags = map(str, soup.findAll())
-          tor_tags = map(str, tor_soup.findAll())
-          diff = difflib.unified_diff(tags, tor_tags, "Non-Tor", "Exit",
-                                      lineterm="")
-          for line in diff:
-            ret+=line+"\n"
-
-      if soup and tor_soup and old_soup:
-        if self.soupdiffer and os.path.exists(self.soupdiffer):
-          soupdiff = SnakePickler.load(self.soupdiffer)
-        else:
-          soupdiff = SoupDiffer(old_soup, soup)
-
-        more_tags = soupdiff.show_changed_tags(tor_soup)     
-        more_attrs = soupdiff.show_changed_attrs(tor_soup)
-        more_content = soupdiff.show_changed_content(tor_soup)
-
-        if more_tags:
-          ret += "\nTor changed tags:\n"
-          ret += more_tags
-        if more_attrs:
-          ret += "\nTor changed attrs:\n"
-          ret += more_attrs
-        if not soupdiff.content_changed and more_content:
-          ret += "\nChanged Content:\n"
-          ret += "\n".join(more_content)+"\n"
-        if (soupdiff.content_changed or not more_content) and not more_tags and not more_attrs:
-          ret += "\nSoupDiffer claims false positive.\n"
-          jsdiff = JSSoupDiffer(old_soup)
-          jsdiff.prune_differences(soup)
-          jsdifferences = jsdiff.show_differences(tor_soup)
-          if not jsdifferences: jsdifferences = "None."
-          ret += "Javascript Differences: "+jsdifferences+"\n"
-    else:
-      if self.content:
-        ret += " "+self.content+"\n"
-      if self.content_old:
-        ret += " "+self.content_old+"\n"
-      if self.content_exit:
-        ret += " "+self.content_exit+"\n"
-    return ret
-
-class SSHTestResult(TestResult):
-  ''' Represents the result of an ssh test '''
-  def __init__(self, exit_obj, ssh_site, status):
-    super(SSHTestResult, self).__init__(exit_obj, ssh_site, status)
-    self.proto = "ssh"
-
-class DNSTestResult(TestResult):
-  ''' Represents the result of a dns test '''
-  def __init__(self, exit_obj, dns_site, status):
-    super(DNSTestResult, self).__init__(exit_obj, dns_site, status)
-    self.proto = "dns"
-
-class DNSRebindTestResult(TestResult):
-  ''' Represents the result of a dns rebind test '''
-  def __init__(self, exit_obj, dns_rebind_site, status):
-    super(DNSRebindTestResult, self).__init__(exit_obj, dns_rebind_site, status)
-    self.proto = "dns"
-
-class SMTPTestResult(TestResult):
-  ''' Represents the result of an smtp test '''
-  def __init__(self, exit_obj, smtp_site, status):
-    super(SMTPTestResult, self).__init__(exit_obj, smtp_site, status)
-    self.proto = "smtp"
-
-class IMAPTestResult(TestResult):
-  ''' Represents the result of an imap test '''
-  def __init__(self, exit_obj, imap_site, status):
-    super(IMAPTestResult, self).__init__(exit_obj, imap_site, status)
-    self.proto = "imap"
-
-class POPTestResult(TestResult):
-  ''' Represents the result of a pop test '''
-  def __init__(self, exit_obj, pop_site, status):
-    super(POPTestResult, self).__init__(exit_obj, pop_site, status)
-    self.proto = "pop"
-
-class DataHandler:
-  def __init__(self, my_data_dir=data_dir):
-    self.data_dir = my_data_dir
-
-  ''' Class for saving and managing test result data '''
-  def filterResults(self, results, protocols=[], show_good=False, 
-      show_bad=False, show_inconclusive=False):
-    ''' filter results based on protocol and success level ''' 
-
-    protocol_filters = []
-    status_filters = []
-
-    for protocol in protocols:
-      protocol_filters.append(lambda x, p=protocol: x.__class__.__name__.lower()[:-10].endswith(p))
-    if show_good:
-      status_filters.append(lambda x: x.status == TEST_SUCCESS)
-    if show_bad:
-      status_filters.append(lambda x: x.status == TEST_FAILURE)
-    if show_inconclusive:
-      status_filters.append(lambda x: x.status == TEST_INCONCLUSIVE)
-
-    if len(protocol_filters) == 0 or len(status_filters) == 0:
-      return []
-     
-    protocol_filter = lambda x: reduce(operator.__or__, [f(x) for f in protocol_filters])
-    status_filter = lambda x: reduce(operator.__or__, [f(x) for f in status_filters])
-
-    return [x for x in results if (protocol_filter(x) and status_filter(x))]
-    
-  def filterByNode(self, results, id):
-    ''' filter by node'''
-    return filter(lambda x: x.exit_node == id, results)
-
-  def getAll(self):
-    ''' get all available results'''
-    return self.__getResults(self.data_dir)
-
-  def getSsh(self):
-    ''' get results of ssh tests '''
-    return self.__getResults(self.data_dir + 'ssh/')
-    
-  def getHttp(self):
-    ''' get results of http tests '''
-    return self.__getResults(self.data_dir + 'http/')
-
-  def getSsl(self):
-    ''' get results of ssl tests '''
-    return self.__getResults(self.data_dir + 'ssl/')
-
-  def getSmtp(self):
-    ''' get results of smtp tests '''
-    return self.__getResults(self.data_dir + 'smtp/')
-
-  def getPop(self):
-    ''' get results of pop tests '''
-    return self.__getResults(self.data_dir + 'pop/')
-
-  def getImap(self):
-    ''' get results of imap tests '''
-    return self.__getResults(self.data_dir + 'imap/')
-
-  def getDns(self):
-    ''' get results of basic dns tests '''
-    return self.__getResults(self.data_dir + 'dns')
-
-  def getDnsRebind(self):
-    ''' get results of dns rebind tests '''
-    return self.__getResults(self.data_dir + 'dnsbrebind/')
-
-  def __getResults(self, rdir):
-    ''' 
-    recursively traverse the directory tree starting with dir
-    gather test results from files ending with .result
-    '''
-    results = []
-
-    for root, dirs, files in os.walk(rdir):
-      for f in files:
-        if f.endswith('.result'):
-          result = SnakePickler.load(os.path.join(root, f))
-          result.rebase(self.data_dir)
-          results.append(result)
-    return results
-
-  def getResult(self, file):
-    return SnakePickler.load(file)
-
-  def uniqueFilename(afile):
-    (prefix,suffix)=os.path.splitext(afile)
-    i=0
-    while os.path.exists(prefix+"."+str(i)+suffix):
-      i+=1
-    return prefix+"."+str(i)+suffix
-  uniqueFilename = Callable(uniqueFilename)
-  
-  def safeFilename(unsafe_file):
-    ''' 
-    remove characters illegal in some systems 
-    and trim the string to a reasonable length
-    '''
-    unsafe_file = unsafe_file.decode('ascii', 'ignore')
-    safe_file = re.sub(unsafe_filechars, "_", unsafe_file)
-    return str(safe_file[:200])
-  safeFilename = Callable(safeFilename)
-
-  def __resultFilename(self, result):
-    address = ''
-    if result.__class__.__name__ == 'HtmlTestResult' or result.__class__.__name__ == 'HttpTestResult':
-      address = DataHandler.safeFilename(result.site[7:])
-    elif result.__class__.__name__ == 'SSLTestResult':
-      address = DataHandler.safeFilename(result.site[8:])
-    elif 'TestResult' in result.__class__.__name__:
-      address = DataHandler.safeFilename(result.site)
-    else:
-      raise Exception, 'This doesn\'t seems to be a result instance.'
-
-    rdir = self.data_dir+result.proto.lower()+'/'
-    if result.false_positive:
-      rdir += 'falsepositive/'
-    elif result.from_rescan:
-      rdir += 'rescan/'
-    elif result.status == TEST_SUCCESS:
-      rdir += 'successful/'
-    elif result.status == TEST_INCONCLUSIVE:
-      rdir += 'inconclusive/'
-    elif result.status == TEST_FAILURE:
-      rdir += 'failed/'
-
-    return DataHandler.uniqueFilename(str((rdir+address+'.'+result.exit_node[1:]+".result").decode('ascii', 'ignore')))
-
-  def saveResult(self, result):
-    ''' generic method for saving test results '''
-    result.filename = self.__resultFilename(result)
-    SnakePickler.dump(result, result.filename)
-
-  def __testFilename(self, test, position=-1):
-    if position == -1:
-      return DataHandler.uniqueFilename(self.data_dir+test.__class__.__name__+".test")
-    else:
-      return self.data_dir+test.__class__.__name__+"."+str(position)+".test"
-
-  def loadTest(self, testname, position=-1):
-    filename = self.data_dir+testname
-    if position == -1:
-      i=0
-      while os.path.exists(filename+"."+str(i)+".test"):
-        i+=1
-      position = i-1
-    
-    test = SnakePickler.load(filename+"."+str(position)+".test")
-    return test
-
-  def saveTest(self, test):
-    if not test.filename:
-      test.filename = self.__testFilename(test)
-    SnakePickler.dump(test, test.filename)
-
-# These three bits are needed to fully recursively strain the parsed soup.
-# For some reason, the SoupStrainer does not get applied recursively..
-__first_strainer = SoupStrainer(lambda name, attrs: name in tags_to_check or 
-   len(Set(map(lambda a: a[0], attrs)).intersection(Set(attrs_to_check))) > 0)
-
-def __tag_not_worthy(tag):
-  if tag.name in tags_to_check:
-    return False
-  for attr in tag.attrs:
-    if attr[0] in attrs_to_check_map:
-      return False
-  return True
-
-def FullyStrainedSoup(html):
-  """ Remove all tags that are of no interest. Also remove content """
-  soup = TheChosenSoup(html, __first_strainer)
-  to_extract = []
-  for tag in soup.findAll():
-    to_prune = []
-    for attr in tag.attrs:
-      if attr[0] in attrs_to_prune:
-        to_prune.append(attr)
-    for attr in to_prune:
-      tag.attrs.remove(attr)
-    if __tag_not_worthy(tag):
-      to_extract.append(tag)
-    if tag.name not in tags_preserve_inner:
-      for child in tag.childGenerator():
-        if not isinstance(child, Tag) or __tag_not_worthy(child):
-          to_extract.append(child)
-  for tag in to_extract:
-    if isinstance(tag, Tag):
-      parent = tag.findParent()
-      for child in tag.findChildren():
-        parent.append(child)
-  for tag in to_extract:
-    tag.extract()
-  # Also flatten the tag structure
-  flattened_tags = soup.findAll()
-  for tag in flattened_tags:
-    if isinstance(tag, Tag): # Don't extract script/CSS strings.
-      tag.extract() 
-  for tag in flattened_tags:
-    soup.append(tag)
-  return soup      
-
-class SnakePickler:
-  def dump(obj, filename):
-    if not "depickle_upgrade" in dir(obj.__class__):
-      plog("WARN", "Pickling instance of "+obj.__class__.__name__+" without upgrade method")
-    f = file(filename, "w")
-    try:
-      pickle.dump(obj, f)
-    except KeyboardInterrupt:
-      finished = False
-      while not finished:
-        try:
-          f.close()
-          f = file(filename, "w")
-          pickle.dump(obj, f)
-          f.close()
-          finished = True
-        except KeyboardIterrupt:
-          pass
-      raise KeyboardInterrupt
-    except Exception, e:
-      plog("WARN", "Exception during pickle dump: "+e)
-      try:
-        os.unlink(filename)
-      except: pass
-    f.close()
-  dump = Callable(dump)
-
-  def load(filename):
-    f = file(filename, "r")
-    try:
-      obj = pickle.load(f)
-    except Exception, e:
-      plog("WARN", "Error loading object from "+filename+": "+str(e))
-      return None
-    if not "depickle_upgrade" in dir(obj.__class__):
-      plog("WARN", "De-pickling instance of "+obj.__class__.__name__+" without upgrade method")
-    else:
-      obj.depickle_upgrade()
-    f.close()
-    return obj
-  load = Callable(load)
-     
-class SoupDiffer:
-  """ Diff two soup tag sets, optionally writing diffs to outfile. """
-  def __init__(self, soup_old, soup_new):
-    tags_old = self._get_tags(soup_old)
-    tags_new = self._get_tags(soup_new)
-    self.tag_pool = tags_new | tags_old
-    self.changed_tag_map = {}
-    self._update_changed_tag_map(tags_old, tags_new)
-    self._update_changed_tag_map(tags_new, tags_old)
-
-    attrs_new = self._get_attributes(soup_new)
-    attrs_old = self._get_attributes(soup_old)
-    self.attr_pool = attrs_new | attrs_old
-    self.changed_attr_map = {}
-    self._update_changed_attr_map(attrs_new, attrs_old)
-    self._update_changed_attr_map(attrs_old, attrs_new)
-
-    cntnt_new = self._get_content(soup_new)
-    cntnt_old = self._get_content(soup_old)
-    self.content_pool = cntnt_new | cntnt_old
-    self.content_changed = bool(cntnt_new ^ cntnt_old) 
-    self._pickle_revision = 0    
-
-  def depickle_upgrade(self):
-    pass
-
-  def _get_tags(self, soup):
-    return sets.Set(map(str, 
-           [tag for tag in soup.findAll() if isinstance(tag, Tag)]))
-
-  def _get_attributes(self, soup):
-    attr_soup = [(tag.name, tag.attrs) for tag in soup.findAll()]
-    attrs = sets.Set([])
-    for (tag, attr_list) in attr_soup:
-      for at in attr_list:
-        attrs.add((tag, at)) 
-    return attrs
-
-  def _get_content(self, soup):
-    return sets.Set(map(str, 
-      [tag for tag in soup.findAll() if not isinstance(tag, Tag)]))
-  
-  def _update_changed_tag_map(self, tags_old, tags_new):
-    """ Create a map of changed tags to ALL attributes that tag
-        has ever had (changed or not) """
-    changed_tags = list(tags_new - tags_old)
-    for tags in map(TheChosenSoup, changed_tags):
-      for t in tags.findAll():
-        if t.name not in changed_tags:
-          self.changed_tag_map[t.name] = sets.Set([])
-        for attr in t.attrs:
-          self.changed_tag_map[t.name].add(attr[0])
-
-  def _update_changed_attr_map(self, attrs_old, attrs_new):
-    """ Transform the list of (tag, attribute) pairings for new/changed
-        attributes into a map. This allows us to quickly see
-        if any attributes changed for a specific tag. """
-    changed_attributes = list(attrs_new - attrs_old)
-    for (tag, attr) in changed_attributes:
-      if tag not in self.changed_attr_map:
-        self.changed_attr_map[tag] = sets.Set([])
-      self.changed_attr_map[tag].add(attr[0])
-
-  def _update_changed_content(self, content_old, content_new):
-    # FIXME: This could be tracked by parent tag+attr
-    if not self.content_changed:
-      self.content_changed = bool(content_old ^ content_new)
-
-  def prune_differences(self, soup):
-    tags = self._get_tags(soup)
-    attrs = self._get_attributes(soup)
-    cntnt = self._get_content(soup)
-
-    self._update_changed_tag_map(self.tag_pool, tags)
-    self._update_changed_attr_map(self.attr_pool, attrs)
-    self._update_changed_content(self.content_pool, cntnt)
-    self.tag_pool.union_update(tags)
-    self.attr_pool.union_update(attrs)
-    self.content_pool.union_update(cntnt)
-
-  def show_changed_tags(self, soup):
-    soup_tags = self._get_tags(soup)
-    new_tags = soup_tags - self.tag_pool
-    ret = ""
-    for tags in map(TheChosenSoup, new_tags):
-      for t in tags.findAll():
-        if t.name not in self.changed_tag_map:
-          ret += " New Tag: "+str(t)+"\n"
-        else:
-          for attr in t.attrs:
-            if attr[0] not in self.changed_tag_map[t.name] \
-                 and attr[0] in attrs_to_check_map:
-              ret += " New Attr "+attr[0]+": "+str(t)+"\n"
-    return ret
-
-  def show_changed_attrs(self, soup):
-    soup_attrs = self._get_attributes(soup)
-    new_attrs = soup_attrs - self.attr_pool
-    ret = ""
-    for (tag, attr) in new_attrs:
-      if tag in self.changed_attr_map:
-        if attr[0] not in self.changed_attr_map[tag] \
-            and attr[0] in attrs_to_check_map:
-          ret += " New Attr "+attr[0]+": "+tag+" "+attr[0]+'="'+attr[1]+'"\n'
-      else:
-        ret += " New Tag: "+tag+" "+attr[0]+'="'+attr[1]+'"\n'
-    return ret
-
-  def show_changed_content(self, soup):
-    """ Return a list of tag contents changed in soup_new """
-    content = self._get_content(soup)
-    ret = list(content - self.content_pool)
-    ret.sort()
-    return ret
-
-class HeaderDiffer:
-  def __init__(self, orig_headers):
-    self.header_pool = sets.Set(orig_headers)
-    self.changed_headers = sets.Set([])
-    self._pickle_revision = 0
- 
-  def filter_headers(headers):
-    ret = []
-    for h in headers:
-      matched = False
-      for i in ignore_http_headers:
-        if re.match(i, h[0]):
-          matched = True
-      if not matched: ret.append(h)
-    return sets.Set(ret)
-  filter_headers = Callable(filter_headers)
- 
-  def depickle_upgrade(self):
-    pass
-
-  def prune_differences(self, new_headers):
-    new_headers = sets.Set(new_headers)
-    changed = new_headers - self.header_pool
-    for i in changed:
-      self.changed_headers.add(i[0])
-    self.header_pool.union_update(new_headers)
-
-  def show_differences(self, new_headers):
-    ret = ""
-    changed = sets.Set(new_headers) - self.header_pool
-    for i in changed:
-      if i[0] not in self.changed_headers:
-        ret += " "+i[0]+": "+i[1]+"\n"
-    if ret:
-      return "New HTTP Headers:\n"+ret
-    else: 
-      return ret
-
-class JSDiffer:
-  def __init__(self, js_string):
-    self._pickle_revision = 0    
-    self.ast_cnts = self._count_ast_elements(js_string)
-
-  def depickle_upgrade(self):
-    pass
-
-  def _ast_recursive_worker(ast, ast_cnts):
-    node = JSTokenNames[ast.getType()]
-    if not node in ast_cnts:
-      ast_cnts[node] = 1
-    else: ast_cnts[node] += 1
-
-    for child in ast.getChildren():
-      JSDiffer._ast_recursive_worker(child, ast_cnts)
-  _ast_recursive_worker = Callable(_ast_recursive_worker)
-
-  def _antlr_parse(self, js_string):
-    char_stream = antlr3.ANTLRStringStream(js_string)
-    lexer = LoggingJSLexer(char_stream)
-    tokens = antlr3.CommonTokenStream(lexer)
-    parser = LoggingJSParser(tokens)
-    program = parser.program()
-    program.tree.parse_errors = parser.parse_errors__
-    program.tree.lex_errors = lexer.lex_errors__
-    return program.tree
-                            
-  def _count_ast_elements(self, js_string, name="global"):
-    ast_cnts = {}
-    try:
-      js_string = js_string.replace("\n\r","\n").replace("\r\n","\n").replace("\r","\n")+";"
-      
-      ast = self._antlr_parse(js_string)
-      JSDiffer._ast_recursive_worker(ast, ast_cnts)
-      for e in ast.lex_errors+ast.parse_errors:
-        name+=":"+e.__class__.__name__
-        if "line" in e.__dict__: 
-          name+=":"+str(e.line)
-        if "token" in e.__dict__ and e.token \
-            and "type" in e.token.__dict__: 
-          name+=":"+JSTokenNames[e.token.type]
-        # XXX: Any other things we want to add?
-        plog("INFO", "Parse error "+name+" on "+js_string)
-        if not "ParseError:"+name in ast_cnts:
-          ast_cnts["ParseError:"+name] = 1
-        else: ast_cnts["ParseError:"+name] += 1
-    except UnicodeDecodeError, e:
-      name+=":"+e.__class__.__name__
-      plog("INFO", "Unicode error "+name+" on "+js_string)
-      if not "ParseError:"+name in ast_cnts:
-        ast_cnts["ParseError:"+name] = 1
-      else: ast_cnts["ParseError:"+name] +=1
-    return ast_cnts
-
-  def _difference_pruner(self, other_cnts):
-    for node in self.ast_cnts.iterkeys():
-      if node not in other_cnts:
-        self.ast_cnts[node] = 0
-      elif self.ast_cnts[node] != other_cnts[node]:
-        self.ast_cnts[node] = 0
-    for node in other_cnts.iterkeys():
-      if node not in self.ast_cnts:
-        self.ast_cnts[node] = 0
-
-  def _difference_checker(self, other_cnts):
-    for node in self.ast_cnts.iterkeys():
-      if not self.ast_cnts[node]: continue # pruned difference
-      if node not in other_cnts:
-        return True
-      elif self.ast_cnts[node] != other_cnts[node]:
-        return True
-    for node in other_cnts.iterkeys():
-      if node not in self.ast_cnts:
-        return True
-    return False
-
-  def _difference_printer(self, other_cnts):
-    ret = ""
-    missing = []
-    miscount = []
-    new = []
-    for node in self.ast_cnts.iterkeys():
-      if not self.ast_cnts[node]: continue # pruned difference
-      if node not in other_cnts:
-        missing.append(str(node))
-      elif self.ast_cnts[node] != other_cnts[node]:
-        miscount.append(str(node))
-    for node in other_cnts.iterkeys():
-      if node not in self.ast_cnts:
-        new.append(str(node))
-    if missing:
-      ret += "\nMissing: "
-      for node in missing: ret += node+" "
-    if new:
-      ret += "\nNew: "
-      for node in new: ret += node+" "
-    if miscount:
-      ret += "\nMiscount: "
-      for node in miscount: ret += node+" "
-    return ret
-
-  def prune_differences(self, other_string):
-    other_cnts = self._count_ast_elements(other_string)
-    self._difference_pruner(other_cnts)
-
-  def contains_differences(self, other_string):
-    other_cnts = self._count_ast_elements(other_string)
-    return self._difference_checker(other_cnts) 
-
-  def show_differences(self, other_string):
-    other_cnts = self._count_ast_elements(other_string)
-    return self._difference_printer(other_cnts) 
-
-
-class JSSoupDiffer(JSDiffer):
-  def _add_cnts(tag_cnts, ast_cnts):
-    ret_cnts = {}
-    for n in tag_cnts.iterkeys():
-      if n in ast_cnts:
-        ret_cnts[n] = tag_cnts[n]+ast_cnts[n]
-      else:
-        ret_cnts[n] = tag_cnts[n]
-    for n in ast_cnts.iterkeys():
-      if n not in tag_cnts:
-        ret_cnts[n] = ast_cnts[n]
-    return ret_cnts
-  _add_cnts = Callable(_add_cnts)
-
-  def _count_ast_elements(self, soup, name="Soup"):
-    ast_cnts = {}
-    for tag in soup.findAll():
-      if tag.name == 'script':
-        for child in tag.childGenerator():
-          if isinstance(child, Tag):
-            plog("ERROR", "Script tag with subtag!")
-          else:
-            script = str(child).replace("<!--", "").replace("-->", "").replace("<![CDATA[", "").replace("]]>", "")
-            tag_cnts = JSDiffer._count_ast_elements(self, script, tag.name)
-            ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
-      for attr in tag.attrs:
-        # hrmm.. %-encoding too? Firefox negs on it..
-        parse = ""
-        if attr[1].replace(" ","")[:11] == "javascript:":
-          split_at = attr[1].find(":")+1
-          parse = str(attr[1][split_at:])
-        elif attr[0] in attrs_with_raw_script_map:
-          parse = str(attr[1])
-        if not parse: continue
-        tag_cnts = JSDiffer._count_ast_elements(self,parse,tag.name+":"+attr[0])
-        ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
-    return ast_cnts
-

Deleted: torflow/trunk/NetworkScanners/snakeinspector.py
===================================================================
--- torflow/trunk/NetworkScanners/snakeinspector.py	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/snakeinspector.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,141 +0,0 @@
-#!/usr/bin/python
-
-import dircache
-import operator
-import os
-import pickle
-import sys
-import time
-
-import sets
-from sets import Set
-
-import getopt
-
-import libsoat
-from libsoat import *
-
-sys.path.append("../")
-
-import TorCtl.TorUtil
-from TorCtl.TorUtil import *
-
-TorCtl.TorUtil.loglevel="NOTICE"
-
-if TorCtl.TorUtil.loglevels[TorCtl.TorUtil.loglevel] > TorCtl.TorUtil.loglevels["INFO"]:
-  # Kill stderr (jsdiffer and exception noise) if our loglevel is above INFO
-  sys.stderr = file("/dev/null", "w")
-
-
-def usage(argv):
-  print "Usage: "+argv[0]+" with 0 or more of the following filters: "
-  print "  --dir <datadir>"
-  print "  --file <.result file>"
-  print "  --exit <idhex>"
-  print "  --before <timestamp as string>"
-  print "  --after <timestamp as string>"
-  print "  --reason <soat failure reason>    # may be repeated"
-  print "  --noreason <soat failure reason>  # may be repeated"
-  print "  --proto <protocol>"
-  print "  --resultfilter <TestResult class name>"
-  print "  --statuscode <'Failure' or 'Inconclusive'>"
-  print "  --sortby <'proto' or 'url' or 'exit' or 'reason'>"
-  print "  --falsepositives"
-  print "  --verbose"
-  sys.exit(1)
-
-def getargs(argv):
-  try:
-    opts,args = getopt.getopt(argv[1:],"d:f:e:r:vt:p:s:o:n:a:b:F", 
-             ["dir=", "file=", "exit=", "reason=", "resultfilter=", "proto=", 
-              "verbose", "statuscode=", "sortby=", "noreason=", "after=",
-              "before=", "falsepositives"])
-  except getopt.GetoptError,err:
-    print str(err)
-    usage(argv)
-  # FIXME: make all these repeatable
-  use_dir="./data/"
-  use_file=None
-  node=None
-  reasons=[]
-  noreasons=[]
-  result=2
-  verbose=1
-  proto=None
-  resultfilter=None
-  before = 0xffffffff
-  after = 0
-  sortby="proto"
-  falsepositives=False
-  for o,a in opts:
-    if o == '-d' or o == '--dir':
-      use_dir = a
-    elif o == '-f' or o == '--file':
-      use_file = a
-    elif o == '-b' or o == '--before':
-      before = time.mktime(time.strptime(a))
-    elif o == '-a' or o == '--after': 
-      after = time.mktime(time.strptime(a))
-    elif o == '-r' or o == '--reason': 
-      reasons.append(a)
-    elif o == '-r' or o == '--noreason': 
-      noreasons.append(a)
-    elif o == '-v' or o == '--verbose': 
-      verbose += 1
-    elif o == '-t' or o == '--resultfilter':
-      resultfilter = a
-    elif o == '-p' or o == '--proto':
-      proto = a
-    elif o == '-F' or o == '--falsepositives':
-      falsepositives = True
-    elif o == '-s' or o == '--sortby': 
-      if a not in ["proto", "site", "exit", "reason"]:
-        usage(argv)
-      else: sortby = a 
-    elif o == '-s' or o == '--statuscode': 
-      try:
-        result = int(a)
-      except ValueError:
-        result = RESULT_CODES[a]
-  return use_dir,use_file,node,reasons,noreasons,result,verbose,resultfilter,proto,sortby,before,after,falsepositives
- 
-def main(argv):
-  use_dir,use_file,node,reasons,noreasons,result,verbose,resultfilter,proto,sortby,before,after,falsepositives=getargs(argv)
-  dh = DataHandler(use_dir)
-  print dh.data_dir
-
-  if use_file:
-    results = [dh.getResult(use_file)]
-  elif node:
-    results = dh.filterByNode(dh.getAll(), "$"+node)
-  else:
-    results = dh.getAll()
-
-  if sortby == "url":
-    results.sort(lambda x, y: cmp(x.site, y.site))
-  elif sortby == "reason":
-    results.sort(lambda x, y: cmp(x.reason, y.reason))
-  elif sortby == "exit":
-    results.sort(lambda x, y: cmp(x.exit_node, y.exit_node))
-
-  for r in results:
-    r.verbose = verbose
-    if r.reason in noreasons: continue
-    if reasons and r.reason not in reasons: continue
-    if r.timestamp < after or before < r.timestamp: continue
-    if (falsepositives) ^ r.false_positive: continue
-    if (not result or r.status == result) and \
-       (not proto or r.proto == proto) and \
-       (not resultfilter or r.__class__.__name__ == resultfilter):
-      try:
-        print r
-      except KeyboardInterrupt:
-        raise KeyboardInterrupt
-      except IOError, e:
-        traceback.print_exc()
-      except Exception, e:
-        traceback.print_exc()
-      print "\n-----------------------------\n"
-
-if __name__ == "__main__":
-  main(sys.argv)

Deleted: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/soat.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,2848 +0,0 @@
-#!/usr/bin/python
-#
-# 2008 Aleksei Gorny, mentored by Mike Perry
-
-'''
-Snakes on a Tor exit node scanner
-
-The SoaT scanner checks whether exit nodes behave by initiating connections
-to semi-randomly chosen targets using several protocols (http, https, ssh, smtp, imap, etc)
-and comparing content received directly and via tor.
-
-It interacts with metatroller and the control port to be aware of the tor network status.
-
-To run SoaT: 
-1) make sure you have py-openssl packages installed (see README)
-2) open Tor control port in the torrc
-3) start metatroller in the background (python ./metatroller.py)
-4) start soat (python ./soat.py) with some testing flags (run it without any flags
-    to see which options are available)
-5) check the results later by running soatstats (python ./soatstats.py)
-
-'''
-
-__all__ = ["ExitNodeScanner", "DNSRebindScanner", "load_wordlist"]
-
-import commands
-import getopt
-import os
-import random
-import re
-from sets import Set
-import smtplib
-import socket
-import sys
-import time
-import urllib
-import urllib2
-import httplib
-import traceback
-import copy
-import StringIO
-import zlib,gzip
-import urlparse
-import cookielib
-import sha
-import Queue
-import threading
-
-from libsoat import *
-
-sys.path.append("../")
-
-from TorCtl import TorUtil, TorCtl, PathSupport
-from TorCtl.TorUtil import meta_port, meta_host, control_port, control_host, tor_port, tor_host
-from TorCtl.TorUtil import *
-from TorCtl.PathSupport import *
-from TorCtl.TorCtl import Connection, EventHandler, ConsensusTracker
-
-import OpenSSL
-from OpenSSL import *
-
-
-sys.path.append("./libs/")
-from BeautifulSoup.BeautifulSoup import SoupStrainer, Tag
-from SocksiPy import socks
-import Pyssh.pyssh
-
-from soat_config import *
-
-search_cookies=None
-metacon=None
-datahandler=None
-linebreak = '\r\n'
-
-
-# Oh yeah. so dirty. Blame this guy if you hate me:
-# http://mail.python.org/pipermail/python-bugs-list/2008-October/061202.html
-_origsocket = socket.socket
-class BindingSocket(_origsocket):
-  bind_to = None
-  def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
-    _origsocket.__init__(self, family, type, proto, _sock)
-    if BindingSocket.bind_to:
-      plog("DEBUG", "Binding socket to "+BindingSocket.bind_to)
-      self.bind((BindingSocket.bind_to, 0))
-socket.socket = BindingSocket 
-
-# Nice.. HTTPConnection.connect is doing DNS for us! Fix that:
-# Hrmm.. suppose we could also bind here.. but BindingSocket is 
-# more general and may come in handy for other tests.
-class NoDNSHTTPConnection(httplib.HTTPConnection):
-  def connect(self):
-    try:
-      self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
-      self.sock.settimeout(read_timeout) # Mnemotronic tonic
-      if self.debuglevel > 0:
-        print "connect: (%s, %s)" % (self.host, self.port)
-      self.sock.connect((str(self.host), self.port))
-    except socket.error, msg:
-      if self.debuglevel > 0:
-        print 'connect fail:', (self.host, self.port)
-      if self.sock:
-        self.sock.close()
-      self.sock = None
-    if not self.sock:
-      raise socket.error, msg
-
-class NoDNSHTTPHandler(urllib2.HTTPHandler):
-  def http_open(self, req):
-    return self.do_open(NoDNSHTTPConnection, req)
-
-# Http request handling
-def http_request(address, cookie_jar=None, headers=firefox_headers):
-  ''' perform a http GET-request and return the content received '''
-  request = urllib2.Request(address)
-  for h in headers:
-    request.add_header(h[0], h[1])
-
-  content = ""
-  new_cookies = []
-  mime_type = ""
-  try:
-    plog("DEBUG", "Starting request for: "+address)
-    if cookie_jar != None:
-      opener = urllib2.build_opener(NoDNSHTTPHandler, urllib2.HTTPCookieProcessor(cookie_jar))
-      reply = opener.open(request)
-      if "__filename" in cookie_jar.__dict__:
-        cookie_jar.save(cookie_jar.__filename, ignore_discard=True)
-      new_cookies = cookie_jar.make_cookies(reply, request)
-    else:
-      reply = urllib2.urlopen(request)
-
-    length = reply.info().get("Content-Length")
-    if length and int(length) > max_content_size:
-      plog("WARN", "Max content size exceeded for "+address+": "+length)
-      return (reply.code, None, [], "", "")
-    mime_type = reply.info().type.lower()
-    reply_headers = HeaderDiffer.filter_headers(reply.info().items())
-    reply_headers.add(("mime-type", mime_type))
-    plog("DEBUG", "Mime type is "+mime_type+", length "+str(length))
-    content = decompress_response_data(reply)
-  except socket.timeout, e:
-    plog("WARN", "Socket timeout for "+address+": "+str(e))
-    traceback.print_exc()
-    return (-6.0, None, [], "", e.__class__.__name__+str(e))
-  except httplib.BadStatusLine, e:
-    plog('NOTICE', "HTTP Error during request of "+address+": "+str(e))
-    if not e.line: 
-      return (-13.0, None, [], "", e.__class__.__name__+"(None)") 
-    else:
-      traceback.print_exc()
-      return (-666.0, None, [], "", e.__class__.__name__+str(e)) 
-  except urllib2.HTTPError, e:
-    plog('NOTICE', "HTTP Error during request of "+address+": "+str(e))
-    if str(e) == "<urlopen error timed out>": # Yah, super ghetto...
-      return (-6.0, None, [], "", e.__class__.__name__+str(e)) 
-    else:
-      traceback.print_exc()
-      return (e.code, None, [], "", e.__class__.__name__+str(e)) 
-  except (ValueError, urllib2.URLError), e:
-    plog('WARN', 'The http-request address ' + address + ' is malformed')
-    if str(e) == "<urlopen error timed out>": # Yah, super ghetto...
-      return (-6.0, None, [], "", e.__class__.__name__+str(e)) 
-    else:
-      traceback.print_exc()
-      return (-23.0, None, [], "", e.__class__.__name__+str(e))
-  except socks.Socks5Error, e:
-    plog('WARN', 'A SOCKS5 error '+str(e.value[0])+' occured for '+address+": "+str(e))
-    return (-float(e.value[0]), None, [], "", e.__class__.__name__+str(e))
-  except KeyboardInterrupt:
-    raise KeyboardInterrupt
-  except Exception, e:
-    plog('WARN', 'An unknown HTTP error occured for '+address+": "+str(e))
-    traceback.print_exc()
-    return (-666.0, None, [], "", e.__class__.__name__+str(e))
-
-  return (reply.code, reply_headers, new_cookies, mime_type, content)
-
-class Test:
-  """ Base class for our tests """
-  def __init__(self, proto, port):
-    self.proto = proto
-    self.port = port
-    self.min_targets = min_targets
-    self.filename = None
-    self.rescan_nodes = sets.Set([])
-    self.nodes = sets.Set([])
-    self.node_map = {}
-    self.banned_targets = sets.Set([])
-    self.total_nodes = 0
-    self.scan_nodes = 0
-    self.nodes_to_mark = 0
-    self.tests_per_node = num_tests_per_node
-    self._reset()
-    self._pickle_revision = 6 # Will increment as fields are added
-
-  def run_test(self): 
-    raise NotImplemented()
-
-  def get_targets(self): 
-    raise NotImplemented()
-
-  def depickle_upgrade(self):
-    if self._pickle_revision < 1:
-      # Convert self.successes table from integers to sets.
-      # Yes, this is a hack, and yes, it will bias results
-      # away from the filter, but hey, at least it will still run.
-      self._pickle_revision = 1
-      
-      for addr in self.successes.keys():
-        if type(self.successes[addr]) == int:
-          self.successes[addr] = sets.Set(xrange(0,self.successes[addr]))
-      plog("INFO", "Upgraded "+self.__class__.__name__+" to v1")
-    if self._pickle_revision < 2: 
-      self._pickle_revision = 2
-    if self._pickle_revision < 3:
-      self.timeout_fails = {}
-      self._pickle_revision = 3
-    if self._pickle_revision < 4:
-      self.connect_fails = {}
-      self._pickle_revision = 4
-    if self._pickle_revision < 5:
-      self.dns_fails = {}
-      self._pickle_revision = 5
-    if self._pickle_revision < 6:
-      self.dns_fails_per_exit = self.dns_fails
-      self.timeout_fails_per_exit = self.timeout_fails
-      self.connect_fails_per_exit = {}
-      self._pickle_revision = 6
-
-  def refill_targets(self):
-    if len(self.targets) < self.min_targets:
-      plog("NOTICE", self.proto+" scanner short on targets. Adding more")
-      self.targets.extend(self.get_targets())
-
-  def _remove_target_addr(self, target):
-    if target in self.targets: self.targets.remove(target)
-
-  def remove_target(self, target, reason="None"):
-    self.banned_targets.add(target)
-    self.refill_targets()
-    self._remove_target_addr(target)
-    if target in self.dynamic_fails: del self.dynamic_fails[target]
-    if target in self.successes: del self.successes[target]
-    if target in self.exit_fails: del self.exit_fails[target]
-    if target in self.connect_fails: del self.connect_fails[target]
-    kill_results = []
-    for r in self.results: 
-      if r.site == target:
-        kill_results.append(r)
-    for r in kill_results:
-      # XXX: Need to re-add this node to our test set 
-      # (If it is still up)
-      if r.status == TEST_FAILURE:
-        # Save this new result file in false positive dir 
-        # and remove old one
-        try:
-          os.unlink(r.filename)
-        except:
-          pass
-        r.mark_false_positive(reason)
-        datahandler.saveResult(r)
-      self.results.remove(r)
-
-  def load_rescan(self, type, since=None):
-    self.rescan_nodes = sets.Set([])
-    results = datahandler.getAll()
-    for r in results:
-      if r.status == type:
-        if not since or r.timestamp >= since:
-          self.rescan_nodes.add(r.exit_node[1:])
-    plog("INFO", "Loaded "+str(len(self.rescan_nodes))+" nodes to rescan")
-    if self.nodes and self.rescan_nodes:
-      self.nodes &= self.rescan_nodes
-    self.scan_nodes = len(self.nodes)
-    self.tests_per_node = num_rescan_tests_per_node
-    self.nodes_to_mark = self.scan_nodes*self.tests_per_node
-
-  def toggle_rescan(self):
-    if self.rescan_nodes:
-      plog("NOTICE", self.proto+" rescan complete. Switching back to normal scan")
-      self.rescan_nodes = sets.Set([])
-      self.tests_per_node = num_tests_per_node
-      self.update_nodes()
-    else:
-      plog("NOTICE", self.proto+" switching to recan mode.")
-      self.load_rescan(TEST_FAILURE, self.run_start)
-
-  def get_node(self):
-    return random.choice(list(self.nodes))
-
-  def update_nodes(self):
-    nodes = metacon.node_manager.get_nodes_for_port(self.port)
-    self.node_map = {}
-    for n in nodes: 
-      self.node_map[n.idhex] = n
-    self.total_nodes = len(nodes)
-    self.nodes = sets.Set(map(lambda n: n.idhex, nodes))
-    marked_nodes = sets.Set(self.node_results.keys())
-    self.nodes -= marked_nodes # Remove marked nodes
-    # Only scan the stuff loaded from the rescan
-    if self.rescan_nodes: self.nodes &= self.rescan_nodes
-    if not self.nodes:
-      plog("ERROR", "No nodes remain after rescan load!")
-    self.scan_nodes = len(self.nodes)
-    self.nodes_to_mark = self.scan_nodes*self.tests_per_node
-    metacon.node_manager._sanity_check(map(lambda id: self.node_map[id], 
-                     self.nodes))
-
-  def mark_chosen(self, node, result):
-    exit_node = metacon.get_exit_node()[1:]
-    if exit_node != node:
-      plog("ERROR", "Asked to mark a node that is not current: "+node+" vs "+exit_node)
-    plog("INFO", "Marking "+node+" with result "+str(result))
-    self.nodes_marked += 1
-    if not node in self.node_results: self.node_results[node] = []
-    self.node_results[node].append(result)
-    if len(self.node_results[node]) >= self.tests_per_node:
-      self.nodes.remove(node)
-      self.scan_nodes = len(self.nodes)
-      self.nodes_to_mark = self.scan_nodes*self.tests_per_node
-      plog("INFO", "Removed node "+node+". "+str(len(self.nodes))+" nodes remain")
-    else:
-      plog("DEBUG", "Keeping node "+node+". "+str(len(self.nodes))+" nodes remain. Tests: "+str(len(self.node_results[node]))+"/"+str(self.tests_per_node))
-
-     
-  def finished(self):
-    return not self.nodes
-   
-  def percent_complete(self):
-    return round(100.0 - (100.0*self.scan_nodes)/self.total_nodes, 1)
-
-  def _remove_false_positive_type(self, failset, failtype, max_rate):
-    if self.rescan_nodes: return
-    to_remove = copy.copy(failset)
-    for address in to_remove:
-      fails = len(failset[address])
-
-      if (100.0*fails)/(self.site_tests(address)) > max_rate:
-        plog("NOTICE", "Excessive "+self.proto+" "+failtype+" ("+str(fails)+"/"+str(self.site_tests(address))+") for "+address+". Removing.")
-        self.remove_target(address, failtype)
-
-  def remove_false_positives(self):
-    if self.rescan_nodes: 
-      plog("INFO", "Not removing false positives for rescan of "+self.__class__.__name__)
-    else:
-      plog("INFO", "Removing false positives for "+self.__class__.__name__)
-    self._remove_false_positive_type(self.exit_fails,
-                                     FALSEPOSITIVE_DYNAMIC_TOR,
-                                     max_exit_fail_pct)
-    self._remove_false_positive_type(self.dynamic_fails,
-                                     FALSEPOSITIVE_DYNAMIC,
-                                     max_dynamic_fail_pct)
-    self._remove_false_positive_type(self.connect_fails,
-                                     FALSEPOSITIVE_DEADSITE,
-                                     max_connect_fail_pct)
-
-  def _reset(self):
-    self.results = []
-    self.targets = []
-    self.tests_run = 0
-    self.nodes_marked = 0
-    self.run_start = time.time()
-    # These are indexed by idhex
-    self.connect_fails_per_exit = {}
-    self.timeout_fails_per_exit = {}
-    self.dns_fails_per_exit = {}
-    self.node_results = {}
-    # These are indexed by site url:
-    self.connect_fails = {}
-    self.exit_fails = {}
-    self.successes = {}
-    self.dynamic_fails = {}
- 
-  def rewind(self):
-    self._reset()
-    self.update_nodes()
-    self.targets = self.get_targets()
-    if not self.targets:
-      raise NoURLsFound("No URLS found for protocol "+self.proto)
-    if type(self.targets) == dict:
-      for subtype in self.targets.iterkeys():
-        targets = "\n\t".join(self.targets[subtype])
-        plog("INFO", "Using the following urls for "+self.proto+"/"+subtype+" scan:\n\t"+targets) 
-        
-    else:
-      targets = "\n\t".join(self.targets)
-      plog("INFO", "Using the following urls for "+self.proto+" scan:\n\t"+targets) 
-
-  def site_tests(self, site):
-    tot_cnt = 0
-    if site in self.successes:
-      tot_cnt += len(self.successes[site])
-    if site in self.exit_fails:
-      tot_cnt += len(self.exit_fails[site])
-    if site in self.dynamic_fails:
-      tot_cnt += len(self.dynamic_fails[site])
-    if site in self.connect_fails:
-      tot_cnt += len(self.connect_fails[site])
-    return tot_cnt
-
-  def register_success(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    #datahandler.saveResult(result)
-    if result.site in self.successes: 
-      self.successes[result.site].add(result.exit_node)
-    else: self.successes[result.site]=sets.Set([result.exit_node])
-
-    win_cnt = len(self.successes[result.site])
-    
-    plog("INFO", self.proto+" success at "+result.exit_node+". This makes "+str(win_cnt)+"/"+str(self.site_tests(result.site))+" node successes for "+result.site)
-
-  def _register_site_connect_failure(self, result): 
-    if self.rescan_nodes: result.from_rescan = True
-    self.results.append(result)
-    datahandler.saveResult(result)
-    if result.site in self.connect_fails:
-      self.connect_fails[result.site].add(result.exit_node)
-    else:
-      self.connect_fails[result.site] = sets.Set([result.exit_node])
-    
-    err_cnt = len(self.connect_fails[result.site])
-
-    plog("ERROR", self.proto+" connection fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
-
-  def register_connect_failure(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    if result.exit_node not in self.connect_fails_per_exit:
-      self.connect_fails_per_exit[result.exit_node] = 0
-    self.connect_fails_per_exit[result.exit_node] += 1
-
-    c_cnt = self.connect_fails_per_exit[result.exit_node]
-   
-    if c_cnt > num_connfails_per_node:
-      if result.extra_info:
-        result.extra_info = str(result.extra_info) + " count: "+str(c_cnt)
-      else: 
-        result.extra_info = str(c_cnt)
-      self._register_site_connect_failure(result)
-      del self.connect_fails_per_exit[result.exit_node]
-      return TEST_FAILURE
-    else:
-      plog("NOTICE", self.proto+" connect fail at "+result.exit_node+". This makes "+str(c_cnt)+" fails")
-      return TEST_INCONCLUSIVE
-
-  def register_dns_failure(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    if result.exit_node not in self.dns_fails_per_exit:
-      self.dns_fails_per_exit[result.exit_node] = 0
-    self.dns_fails_per_exit[result.exit_node] += 1
-
-    d_cnt = self.dns_fails_per_exit[result.exit_node]
-   
-    if d_cnt > num_dnsfails_per_node:
-      if result.extra_info:
-        result.extra_info = str(result.extra_info) + " count: "+str(d_cnt)
-      else: 
-        result.extra_info = str(d_cnt)
-      self._register_site_connect_failure(result)
-      del self.dns_fails_per_exit[result.exit_node]
-      return TEST_FAILURE
-    else:
-      plog("NOTICE", self.proto+" dns fail at "+result.exit_node+". This makes "+str(d_cnt)+" fails")
-      return TEST_INCONCLUSIVE
-
-  def register_timeout_failure(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    if result.exit_node not in self.timeout_fails_per_exit:
-      self.timeout_fails_per_exit[result.exit_node] = 0
-    self.timeout_fails_per_exit[result.exit_node] += 1
-
-    t_cnt = self.timeout_fails_per_exit[result.exit_node]
-   
-    if t_cnt > num_timeouts_per_node:
-      if result.extra_info:
-        result.extra_info = str(result.extra_info) + " count: "+str(t_cnt)
-      else: 
-        result.extra_info = str(t_cnt)
-      self._register_site_connect_failure(result)
-      del self.timeout_fails_per_exit[result.exit_node]
-      return TEST_FAILURE
-    else:
-      plog("NOTICE", self.proto+" timeout at "+result.exit_node+". This makes "+str(t_cnt)+" timeouts")
-      return TEST_INCONCLUSIVE
-
-  def register_exit_failure(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    datahandler.saveResult(result)
-    self.results.append(result)
-
-    if result.site in self.exit_fails: 
-      self.exit_fails[result.site].add(result.exit_node)
-    else: self.exit_fails[result.site] = sets.Set([result.exit_node])
-
-    err_cnt = len(self.exit_fails[result.site])
-
-    plog("ERROR", self.proto+" exit-only fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
-
-  def register_dynamic_failure(self, result):
-    if self.rescan_nodes: result.from_rescan = True
-    self.results.append(result)
-    datahandler.saveResult(result)
-    if result.site in self.dynamic_fails:
-      self.dynamic_fails[result.site].add(result.exit_node)
-    else:
-      self.dynamic_fails[result.site] = sets.Set([result.exit_node])
-
-    err_cnt = len(self.dynamic_fails[result.site])
-
-    plog("ERROR", self.proto+" dynamic fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
-
-
-class SearchBasedTest(Test):
-  def __init__(self, proto, port, wordlist_file):
-    self.wordlist_file = wordlist_file
-    Test.__init__(self, proto, port)
-
-  def rewind(self):
-    self.wordlist = load_wordlist(self.wordlist_file)
-    Test.rewind(self)
-
-  def _is_useable_url(self, url, valid_schemes=None, filetypes=None):
-    (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
-    if netloc.rfind(":") != -1:
-      # FIXME: %-encoding?
-      port = netloc[netloc.rfind(":")+1:]
-      try:
-        if int(port) != self.port:
-          plog("DEBUG", "Unusable port "+port+" in "+url)
-          return False
-      except:
-        traceback.print_exc()
-        plog("WARN", "Unparseable port "+port+" in "+url)
-        return False
-    if valid_schemes and scheme not in valid_schemes:
-      plog("DEBUG", "Unusable scheme "+scheme+" in "+url)
-      return False
-    if url in self.banned_targets:
-      plog("DEBUG", "Banned url "+url)
-      return False
-    if filetypes: # Must be checked last
-      for filetype in filetypes:
-        if url[-len(filetype):] == filetype:
-          return True
-      plog("DEBUG", "Bad filetype for "+url)
-      return False
-    return True
-
-  def get_search_urls(self, protocol='any', results_per_type=10, host_only=False, filetypes=['any'], search_mode=default_search_mode):
-    ''' 
-    construct a list of urls based on the wordlist, filetypes and protocol. 
-    '''
-    plog('INFO', 'Searching google for relevant sites...')
-  
-    urllist = Set([])
-    for filetype in filetypes:
-      type_urls = Set([])
-  
-      while len(type_urls) < results_per_type:
-        query = random.choice(self.wordlist)
-        if filetype != 'any':
-          query += " "+search_mode["filetype"]+filetype
-        if protocol != 'any' and search_mode["inurl"]:
-          query += " "+search_mode["inurl"]+protocol # this isn't too reliable, but we'll re-filter results later
-        #query += '&num=' + `g_results_per_page` 
-  
-        # search google for relevant pages
-        # note: google only accepts requests from idenitified browsers
-        host = search_mode["host"]
-        params = urllib.urlencode({search_mode["query"] : query})
-        search_path = '/search' + '?' + params
-        search_url = "http://"+host+search_path
-         
-        plog("INFO", "Search url: "+search_url)
-        try:
-          if search_mode["useragent"]:
-            (code, resp_headers, new_cookies, mime_type, content) = http_request(search_url, search_cookies)
-          else:
-            headers = filter(lambda h: h[0] != "User-Agent", 
-                             copy.copy(firefox_headers))
-            (code, resp_headers, new_cookies, mime_type, content) = http_request(search_url, search_cookies, headers)
-        except socket.gaierror:
-          plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
-          traceback.print_exc()
-          return list(urllist)
-        except:
-          plog('ERROR', 'Scraping of http://'+host+search_path+" failed")
-          traceback.print_exc()
-          # Bloody hack just to run some tests overnight
-          return [protocol+"://www.eff.org", protocol+"://www.fastmail.fm", protocol+"://www.torproject.org", protocol+"://secure.wikileaks.org/"]
-  
-        links = SoupStrainer('a')
-        try:
-          soup = TheChosenSoup(content, parseOnlyThese=links)
-        except Exception:
-          plog('ERROR', 'Soup-scraping of http://'+host+search_path+" failed")
-          traceback.print_exc()
-          print "Content is: "+str(content)
-          return [protocol+"://www.eff.org", protocol+"://www.fastmail.fm", protocol+"://www.torproject.org", protocol+"://secure.wikileaks.org/"] 
-        # get the links and do some additional filtering
-        for link in soup.findAll('a'):
-          skip = True
-          for a in link.attrs:
-            if a[0] == "class" and search_mode["class"] in a[1]:
-              skip = False
-              break
-          if skip: continue
-          if link.has_key(search_mode['realtgt']):
-            url = link[search_mode['realtgt']]
-          else:
-            url = link['href']
-          if protocol == 'any': prot_list = None
-          else: prot_list = [protocol]
-          if filetype == 'any': file_list = None
-          else: file_list = filetypes
-
-          if self._is_useable_url(url, prot_list, file_list):
-            if host_only:
-              # FIXME: %-encoding, @'s, etc?
-              host = urlparse.urlparse(url)[1]
-              # Have to check again here after parsing the url: 
-              if host not in self.banned_targets:
-                type_urls.add(host)
-            else:
-              type_urls.add(url)
-          else:
-            pass
-        plog("INFO", "Have "+str(len(type_urls))+"/"+str(results_per_type)+" google urls so far..") 
-
-      # make sure we don't get more urls than needed
-      if len(type_urls) > results_per_type:
-        type_urls = Set(random.sample(type_urls, results_per_type))
-      urllist.union_update(type_urls)
-       
-    return list(urllist)
-
-class HTTPTest(SearchBasedTest):
-  def __init__(self, wordlist, filetypes=scan_filetypes):
-    # FIXME: Handle http urls w/ non-80 ports..
-    SearchBasedTest.__init__(self, "HTTP", 80, wordlist)
-    self.fetch_targets = urls_per_filetype
-    self.httpcode_fails = {}
-    self.scan_filetypes = filetypes
-
-  def _reset(self):
-    SearchBasedTest._reset(self)
-    self.targets = {}
-
-  def rewind(self):
-    SearchBasedTest.rewind(self)
-    self.httpcode_fails = {}
-
-  def check_cookies(self):
-    # FIXME: This test is badly broken..
-    # We probably only want to do this on a per-url basis.. Then
-    # we can do the dynamic compare..
-    return TEST_SUCCESS
-    tor_cookies = "\n"
-    plain_cookies = "\n"
-    # FIXME: do we need to sort these? So far we have worse problems..
-    for cookie in self.tor_cookie_jar:
-      tor_cookies += "\t"+cookie.name+":"+cookie.domain+cookie.path+" discard="+str(cookie.discard)+"\n"
-    for cookie in self.cookie_jar:
-      plain_cookies += "\t"+cookie.name+":"+cookie.domain+cookie.path+" discard="+str(cookie.discard)+"\n"
-    if tor_cookies != plain_cookies:
-      exit_node = metacon.get_exit_node()
-      plog("ERROR", "Cookie mismatch at "+exit_node+":\nTor Cookies:"+tor_cookies+"\nPlain Cookies:\n"+plain_cookies)
-      result = CookieTestResult(self.node_map[exit_node[1:]],
-                          TEST_FAILURE, FAILURE_COOKIEMISMATCH, plain_cookies, 
-                          tor_cookies)
-      if self.rescan_nodes: result.from_rescan = True
-      self.results.append(result)
-      datahandler.saveResult(result)
-      return TEST_FAILURE
-    return TEST_SUCCESS
-
-
-  def run_test(self):
-    # A single test should have a single cookie jar
-    self.tor_cookie_jar = cookielib.MozillaCookieJar()
-    self.cookie_jar = cookielib.MozillaCookieJar()
-    self.headers = copy.copy(firefox_headers)
-   
-    self.tests_run += 1
-
-    n_tests = random.choice(xrange(1,len(self.targets)+1))
-    filetypes = random.sample(self.targets.keys(), n_tests)
-    
-    plog("INFO", "HTTPTest decided to fetch "+str(n_tests)+" urls of types: "+str(filetypes))
-
-    n_success = n_fail = n_inconclusive = 0 
-    for ftype in filetypes:
-      # FIXME: Set referrer to random or none for each of these
-      address = random.choice(self.targets[ftype])
-      result = self.check_http(address)
-      if result == TEST_INCONCLUSIVE: n_inconclusive += 1
-      if result == TEST_FAILURE: n_fail += 1
-      if result == TEST_SUCCESS: n_success += 1
-
-    # Cookie jars contain locks and can't be pickled. Clear them away.
-    self.tor_cookie_jar = None
-    self.cookie_jar = None
-  
-    if n_fail: return TEST_FAILURE
-    elif n_inconclusive > 2*n_success: # > 66% inconclusive -> redo
-      return TEST_INCONCLUSIVE
-    else:
-      return TEST_SUCCESS 
-
-  def _remove_target_addr(self, target):
-    for ftype in self.targets:
-      if target in self.targets[ftype]: self.targets[ftype].remove(target)
-
-  def remove_target(self, address, reason):
-    SearchBasedTest.remove_target(self, address, reason)
-    if address in self.httpcode_fails: del self.httpcode_fails[address]
-
-  def refill_targets(self):
-    for ftype in self.targets:
-      if len(self.targets[ftype]) < self.fetch_targets:
-        plog("NOTICE", self.proto+" scanner short on "+ftype+" targets. Adding more")
-        raw_urls = self.get_search_urls('http', self.fetch_targets, 
-                                        filetypes=[ftype])
-        self.targets[ftype].extend(raw_urls)
-
-    
-  def get_targets(self):
-    raw_urls = self.get_search_urls('http', self.fetch_targets, 
-                                     filetypes=self.scan_filetypes)
-    urls = {} 
-    # Slow, but meh..
-    for ftype in self.scan_filetypes: urls[ftype] = []
-    for url in raw_urls:
-      for ftype in self.scan_filetypes:
-        if url[-len(ftype):] == ftype:
-          urls[ftype].append(url)
-    return urls     
-
-  def remove_false_positives(self):
-    SearchBasedTest.remove_false_positives(self)
-    self._remove_false_positive_type(self.httpcode_fails,
-                                     FALSEPOSITIVE_HTTPERRORS,
-                                     max_httpcode_fail_pct)
-  def site_tests(self, site):
-    tot_cnt = SearchBasedTest.site_tests(self, site) 
-    if site in self.httpcode_fails:
-      tot_cnt += len(self.httpcode_fails[site])
-    return tot_cnt
-    
-  def register_http_failure(self, result): # XXX: Currently deadcode
-    if self.rescan_nodes: result.from_rescan = True
-    self.results.append(result)
-    datahandler.saveResult(result)
-    if result.site in self.httpcode_fails:
-      self.httpcode_fails[result.site].add(result.exit_node)
-    else:
-      self.httpcode_fails[result.site] = sets.Set([result.exit_node])
-    
-    err_cnt = len(self.httpcode_fails[result.site])
-
-    plog("ERROR", self.proto+" http error code fail of "+result.reason+" at "+result.exit_node+". This makes "+str(err_cnt)+"/"+str(self.site_tests(result.site))+" node failures for "+result.site)
-    
-
-  def check_http_nodynamic(self, address, nocontent=False):
-    # TODO: use nocontent to cause us to not load content into memory.
-    # This will require refactoring http_response though.
-    ''' check whether a http connection to a given address is molested '''
-
-    # an address representation acceptable for a filename 
-    address_file = DataHandler.safeFilename(address[7:])
-    content_prefix = http_content_dir+address_file
-    
-    # Keep a copy of the cookie jar before mods for refetch or
-    # to restore on errors that cancel a fetch
-    orig_cookie_jar = cookielib.MozillaCookieJar()
-    for cookie in self.cookie_jar: orig_cookie_jar.set_cookie(cookie)
-    orig_tor_cookie_jar = cookielib.MozillaCookieJar()
-    for cookie in self.tor_cookie_jar: orig_tor_cookie_jar.set_cookie(cookie)
-
-    try:
-      # Load content from disk, md5
-      content_file = open(content_prefix+'.content', 'r')
-      sha1sum = sha.sha()
-      buf = content_file.read(4096)
-      while buf:
-        sha1sum.update(buf)
-        buf = content_file.read(4096)
-      content_file.close()
-      
-      added_cookie_jar = cookielib.MozillaCookieJar()
-      added_cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
-      self.cookie_jar.load(content_prefix+'.cookies', ignore_discard=True)
-
-      headerdiffer = SnakePickler.load(content_prefix+'.headerdiff')
-
-      content = None
-      mime_type = None 
-
-    except IOError:
-      (code, resp_headers, new_cookies, mime_type, content) = http_request(address, self.cookie_jar, self.headers)
-
-      if code - (code % 100) != 200:
-        plog("NOTICE", "Non-tor HTTP error "+str(code)+" fetching content for "+address)
-        # Just remove it
-        self.remove_target(address, FALSEPOSITIVE_HTTPERRORS)
-        # Restore cookie jars
-        self.cookie_jar = orig_cookie_jar
-        self.tor_cookie_jar = orig_tor_cookie_jar
-        return TEST_INCONCLUSIVE
-
-      if not content:
-        plog("WARN", "Failed to direct load "+address)
-        # Just remove it
-        self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
-        # Restore cookie jar
-        self.cookie_jar = orig_cookie_jar
-        self.tor_cookie_jar = orig_tor_cookie_jar
-        return TEST_INCONCLUSIVE 
-      sha1sum = sha.sha(content)
-
-      content_file = open(content_prefix+'.content', 'w')
-      content_file.write(content)
-      content_file.close()
-      
-      headerdiffer = HeaderDiffer(resp_headers)
-      SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
-      
-      # Need to do set subtraction and only save new cookies.. 
-      # or extract/make_cookies
-      added_cookie_jar = cookielib.MozillaCookieJar()
-      for cookie in new_cookies: added_cookie_jar.set_cookie(cookie)
-      try:
-        added_cookie_jar.save(content_prefix+'.cookies', ignore_discard=True)
-      except:
-        traceback.print_exc()
-        plog("WARN", "Error saving cookies in "+str(self.cookie_jar)+" to "+content_prefix+".cookies")
-
-    except TypeError, e:
-      plog('ERROR', 'Failed obtaining the shasum for ' + address)
-      plog('ERROR', e)
-      # Restore cookie jars
-      self.cookie_jar = orig_cookie_jar
-      self.tor_cookie_jar = orig_tor_cookie_jar
-      return TEST_INCONCLUSIVE
-
-    defaultsocket = socket.socket
-    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
-    socket.socket = socks.socksocket
-
-    (pcode, presp_headers, pnew_cookies, pmime_type, pcontent) = http_request(address, self.tor_cookie_jar, self.headers)
-    psha1sum = sha.sha(pcontent)
-
-    # reset the connection to direct
-    socket.socket = defaultsocket
-
-    exit_node = metacon.get_exit_node()
-    if exit_node == 0 or exit_node == '0' or not exit_node:
-      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
-      result = HttpTestResult(None, 
-                              address, TEST_INCONCLUSIVE, INCONCLUSIVE_NOEXIT)
-      if self.rescan_nodes: result.from_rescan = True
-      self.results.append(result)
-      datahandler.saveResult(result)
-
-      # Restore cookie jars
-      self.cookie_jar = orig_cookie_jar
-      self.tor_cookie_jar = orig_tor_cookie_jar
-      return TEST_INCONCLUSIVE
-
-    if pcode - (pcode % 100) != 200:
-      plog("NOTICE", exit_node+" had error "+str(pcode)+" fetching content for "+address)
-      # Restore cookie jars
-      # XXX: This is odd and possibly wrong for the refetch
-      self.cookie_jar = orig_cookie_jar
-      self.tor_cookie_jar = orig_tor_cookie_jar
-      BindingSocket.bind_to = refetch_ip
-      (code_new, resp_headers_new, new_cookies_new, mime_type_new, content_new) = http_request(address, orig_tor_cookie_jar, self.headers)
-      BindingSocket.bind_to = None
-      
-      if code_new == pcode:
-        plog("NOTICE", "Non-tor HTTP error "+str(code_new)+" fetching content for "+address)
-        # Just remove it
-        self.remove_target(address, FALSEPOSITIVE_HTTPERRORS)
-        return TEST_INCONCLUSIVE 
-
-      if pcode < 0 and type(pcode) == float:
-        if pcode == -1: # "General socks error"
-          fail_reason = FAILURE_CONNERROR
-        elif pcode == -2: # "connection not allowed aka ExitPolicy
-          fail_reason = FAILURE_EXITPOLICY
-        elif pcode == -3: # "Net Unreach" ??
-          fail_reason = FAILURE_NETUNREACH
-        elif pcode == -4: # "Host Unreach" aka RESOLVEFAILED
-          fail_reason = FAILURE_HOSTUNREACH
-          result = HttpTestResult(self.node_map[exit_node[1:]],
-                                 address, TEST_FAILURE, fail_reason)
-          return self.register_dns_failure(result)
-        elif pcode == -5: # Connection refused
-          fail_reason = FAILURE_CONNREFUSED
-          result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_FAILURE, fail_reason)
-          self.register_exit_failure(result)
-          return TEST_FAILURE
-        elif pcode == -6: # timeout
-          fail_reason = FAILURE_TIMEOUT
-          result = HttpTestResult(self.node_map[exit_node[1:]],
-                                 address, TEST_FAILURE, fail_reason)
-          return self.register_timeout_failure(result)
-        elif pcode == -13:
-          fail_reason = FAILURE_NOEXITCONTENT
-          result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_FAILURE, fail_reason)
-          self.register_exit_failure(result)
-          return TEST_FAILURE
-        elif pcode == -23: 
-          fail_reason = FAILURE_URLERROR
-        else:
-          fail_reason = FAILURE_MISCEXCEPTION
-      else: 
-        fail_reason = FAILURE_BADHTTPCODE+str(pcode)
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                            address, TEST_FAILURE, fail_reason)
-      result.extra_info = str(pcontent)
-      self.register_connect_failure(result)
-      return TEST_FAILURE
-
-    # if we have no content, we had a connection error
-    if pcontent == "":
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_FAILURE, FAILURE_NOEXITCONTENT)
-      self.register_exit_failure(result)
-      # Restore cookie jars
-      self.cookie_jar = orig_cookie_jar
-      self.tor_cookie_jar = orig_tor_cookie_jar
-      return TEST_FAILURE
-
-    hdiffs = headerdiffer.show_differences(presp_headers)
-    if hdiffs:
-      plog("NOTICE", "Header differences for "+address+": \n"+hdiffs)
-
-    # compare the content
-    # if content matches, everything is ok
-    if not hdiffs and psha1sum.hexdigest() == sha1sum.hexdigest():
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_SUCCESS)
-      self.register_success(result)
-      return TEST_SUCCESS
-
-    # Check for a simple truncation failure, which seems
-    # common with many nodes
-    if not content and not nocontent:
-      load_file = content_prefix+'.content'
-      content_file = open(load_file, 'r')
-      content = content_file.read()
-      content_file.close()
-    
-    if content and len(pcontent) < len(content):
-      if content[0:len(pcontent)] == pcontent[0:len(pcontent)]:
-        failed_prefix = http_failed_dir+address_file
-        exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
-        exit_content_file.write(pcontent)
-        exit_content_file.close()
-        result = HttpTestResult(self.node_map[exit_node[1:]], 
-                                address, TEST_FAILURE, FAILURE_EXITTRUNCATION, 
-                                sha1sum.hexdigest(), psha1sum.hexdigest(), 
-                                content_prefix+".content",
-                                exit_content_file.name)
-        self.register_exit_failure(result)
-        # Restore cookie jars
-        self.cookie_jar = orig_cookie_jar
-        self.tor_cookie_jar = orig_tor_cookie_jar
-        return TEST_FAILURE
-
-    # if content doesnt match, update the direct content and use new cookies
-    # If we have alternate IPs to bind to on this box, use them?
-    # Sometimes pages have the client IP encoded in them..
-    # Also, use the Tor cookies, since those identifiers are
-    # probably embeded in the Tor page as well.
-    BindingSocket.bind_to = refetch_ip
-    (code_new, resp_headers_new, new_cookies_new, mime_type_new, content_new) = http_request(address, orig_tor_cookie_jar, self.headers)
-    BindingSocket.bind_to = None
-    
-    if not content_new:
-      plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_INCONCLUSIVE, 
-                              INCONCLUSIVE_NOLOCALCONTENT)
-      if self.rescan_nodes: result.from_rescan = True
-      self.results.append(result)
-      datahandler.saveResult(result)
-      return TEST_INCONCLUSIVE
-
-    headerdiffer.prune_differences(resp_headers_new)
-    hdiffs = headerdiffer.show_differences(presp_headers)
-
-    SnakePickler.dump(headerdiffer, content_prefix+'.headerdiff')
-
-    sha1sum_new = sha.sha(content_new)
-
-    if sha1sum.hexdigest() != sha1sum_new.hexdigest():
-      # if content has changed outside of tor, update the saved file
-      os.rename(content_prefix+'.content', content_prefix+'.content-old')
-      new_content_file = open(content_prefix+'.content', 'w')
-      new_content_file.write(content_new)
-      new_content_file.close()
-
-    # Need to do set subtraction and only save new cookies.. 
-    # or extract/make_cookies
-    
-    self.cookie_jar = orig_cookie_jar
-    new_cookie_jar = cookielib.MozillaCookieJar()
-    for cookie in new_cookies_new: 
-      new_cookie_jar.set_cookie(cookie)
-      self.cookie_jar.set_cookie(cookie) # Update..
-    os.rename(content_prefix+'.cookies', content_prefix+'.cookies-old')
-    try:
-      new_cookie_jar.save(content_prefix+'.cookies', ignore_discard=True)
-    except:
-      traceback.print_exc()
-      plog("WARN", "Error saving cookies in "+str(new_cookie_jar)+" to "+content_prefix+".cookies")
-
-    if hdiffs:
-      # XXX: We probably should store the header differ + exit headers 
-      # for later comparison (ie if the header differ picks up more diffs)
-      plog("NOTICE", "Post-refetch header changes for "+address+": \n"+hdiffs)
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_FAILURE, FAILURE_HEADERCHANGE)
-      result.extra_info = hdiffs
-      self.register_dynamic_failure(result)
-      # Lets let the rest of the tests run too actually
-      #return TEST_FAILURE 
-
-    # compare the node content and the new content
-    # if it matches, everything is ok
-    if psha1sum.hexdigest() == sha1sum_new.hexdigest():
-      result = HttpTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_SUCCESS)
-      self.register_success(result)
-      return TEST_SUCCESS
- 
-    if not content and not nocontent:
-      if sha1sum.hexdigest() != sha1sum_new.hexdigest():
-        load_file = content_prefix+'.content-old'
-      else:
-        load_file = content_prefix+'.content'
-      content_file = open(load_file, 'r')
-      content = content_file.read()
-      content_file.close()
-    
-    if not ((mime_type == mime_type_new or not mime_type) \
-               and mime_type_new == pmime_type):
-      if not mime_type: mime_type = "text/disk"
-      plog("WARN", "Mime type change: 1st: "+mime_type+", 2nd: "+mime_type_new+", Tor: "+pmime_type)
-      # TODO: If this actually happens, store a result.
-
-    # Dirty dirty dirty...
-    return (mime_type_new, pcontent, psha1sum, content, sha1sum, content_new, 
-            sha1sum_new, exit_node)
-
-  def check_http(self, address):
-    plog('INFO', 'Conducting an http test with destination ' + address)
-    ret = self.check_http_nodynamic(address)
-    if type(ret) == int:
-      return ret
-    return self._check_http_worker(address, ret) 
-
-  def _check_http_worker(self, address, http_ret):
-    (mime_type,pcontent,psha1sum,content,sha1sum,content_new,sha1sum_new,exit_node) = http_ret
-     
-    address_file = DataHandler.safeFilename(address[7:])
-    content_prefix = http_content_dir+address_file
-    failed_prefix = http_failed_dir+address_file
-
-    # compare the new and old content
-    # if they match, means the node has been changing the content
-    if sha1sum.hexdigest() == sha1sum_new.hexdigest():
-      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
-      exit_content_file.write(pcontent)
-      exit_content_file.close()
-
-      result = HttpTestResult(self.node_map[exit_node[1:]],
-                              address, TEST_FAILURE, FAILURE_EXITONLY, 
-                              sha1sum.hexdigest(), psha1sum.hexdigest(), 
-                              content_prefix+".content", exit_content_file.name)
-      self.register_exit_failure(result)
-      return TEST_FAILURE
-
-    exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'),'w')
-    exit_content_file.write(pcontent)
-    exit_content_file.close()
-
-    result = HttpTestResult(self.node_map[exit_node[1:]], 
-                            address, TEST_FAILURE, FAILURE_DYNAMIC, 
-                            sha1sum_new.hexdigest(), psha1sum.hexdigest(), 
-                            content_prefix+".content", exit_content_file.name, 
-                            content_prefix+'.content-old',
-                            sha1sum.hexdigest())
-    if self.rescan_nodes: result.from_rescan = True
-    self.results.append(result)
-    datahandler.saveResult(result)
-
-    # The HTTP Test should remove address immediately...
-    plog("WARN", "HTTP Test is removing dynamic URL "+address)
-    self.remove_target(address, FALSEPOSITIVE_DYNAMIC)
-    return TEST_FAILURE
-
-class HTMLTest(HTTPTest):
-  def __init__(self, wordlist, recurse_filetypes=scan_filetypes):
-    HTTPTest.__init__(self, wordlist, recurse_filetypes)
-    self.fetch_targets = num_html_urls
-    self.proto = "HTML"
-    self.recurse_filetypes = recurse_filetypes
-    self.fetch_queue = []
-   
-  def _reset(self):
-    HTTPTest._reset(self)
-    self.targets = [] # FIXME: Lame..
-    self.soupdiffer_files = {} # XXX: These two are now deprecated
-    self.jsdiffer_files = {}
- 
-  def depickle_upgrade(self):
-    if self._pickle_revision < 2:
-      self.soupdiffer_files = {}
-      self.jsdiffer_files = {}
-    SearchBasedTest.depickle_upgrade(self)
-
-  def run_test(self):
-    # A single test should have a single cookie jar
-    self.tor_cookie_jar = cookielib.MozillaCookieJar()
-    self.cookie_jar = cookielib.MozillaCookieJar()
-    self.headers = copy.copy(firefox_headers)
-
-    use_referers = False
-    first_referer = None    
-    if random.randint(1,100) < referer_chance_pct:
-      use_referers = True
-      # FIXME: Hrmm.. May want to do this a bit better..
-      first_referer = random.choice(self.targets)
-      plog("INFO", "Chose random referer "+first_referer)
-    
-    self.tests_run += 1
-    address = random.choice(self.targets)
-    
-    # Keep a trail log for this test and check for loops
-    fetched = sets.Set([])
-
-    self.fetch_queue.append(("html", address, first_referer))
-    n_success = n_fail = n_inconclusive = 0 
-    while self.fetch_queue:
-      (test, url, referer) = self.fetch_queue.pop(0)
-      if url in fetched:
-        plog("INFO", "Already fetched "+url+", skipping")
-        continue
-      fetched.add(url)
-      if use_referers and referer: 
-        self.headers.append(('Referer', referer))
-      # Technically both html and js tests check and dispatch via mime types
-      # but I want to know when link tags lie
-      if test == "html" or test == "http": result = self.check_html(url)
-      elif test == "js": result = self.check_js(url)
-      elif test == "image":
-        accept_hdr = filter(lambda h: h[0] == "Accept", self.headers)[0]
-        orig_accept = accept_hdr[1]
-        accept_hdr[1] = image_accept_hdr
-        result = self.check_http(url)
-        accept_hdr[1] = orig_accept
-      else: 
-        plog("WARN", "Unknown test type: "+test+" for "+url)
-        result = TEST_SUCCESS
-      if result == TEST_INCONCLUSIVE: n_inconclusive += 1
-      if result == TEST_FAILURE: n_fail += 1
-      if result == TEST_SUCCESS: n_success += 1
-
-    # Need to clear because the cookiejars use locks...
-    self.tor_cookie_jar = None
-    self.cookie_jar = None
-
-    if n_fail: return TEST_FAILURE
-    elif 2*n_inconclusive > n_success: # > 33% inconclusive -> redo
-      return TEST_INCONCLUSIVE
-    else:
-      return TEST_SUCCESS 
-
-  # FIXME: This is pretty lame.. We should change how
-  # the HTTPTest stores URLs so we don't have to do this.
-  def _remove_target_addr(self, target):
-    Test._remove_target_addr(self, target)
-    if target in self.soupdiffer_files: del self.soupdiffer_files[target]
-    if target in self.jsdiffer_files: del self.jsdiffer_files[target]
-
-  def refill_targets(self):
-    Test.refill_targets(self)
-
-  def get_targets(self):
-    return self.get_search_urls('http', self.fetch_targets) 
-
-  def _add_recursive_targets(self, soup, orig_addr):
-    # Only pull at most one filetype from the list of 'a' links
-    targets = []
-    got_type = {}
-    found_favicon = False
-    # Hrmm, if we recursively strained only these tags, this might be faster
-    for tag in tags_to_recurse:
-      tags = soup.findAll(tag)
-      for t in tags:
-        #plog("DEBUG", "Got tag: "+str(t))
-        for a in t.attrs:
-          attr_name = a[0]
-          attr_tgt = a[1]
-          if attr_name in attrs_to_recurse:
-            if t.name in recurse_html:
-              targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
-            elif t.name in recurse_script:
-              if t.name == "link":
-                for a in t.attrs:
-                  a = map(lambda x: x.lower(), a)
-                  # Special case CSS and favicons
-                  if (a[0] == "type" and a[1] == "text/css") or \
-                   ((a[0] == "rel" or a[0] == "rev") and a[1] == "stylesheet"):
-                    plog("INFO", "Adding CSS of: "+str(t))
-                    targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
-                  elif (a[0] == "rel" or a[0] == "rev") and \
-                       ("shortcut" in a[1] or "icon" in a[1]):
-                    plog("INFO", "Adding favicon of: "+str(t))
-                    found_favicon = True
-                    targets.append(("image", urlparse.urljoin(orig_addr, attr_tgt)))
-                  elif a[0] == "type" and self.is_script(a[1], ""):
-                    plog("INFO", "Adding link script of: "+str(t))
-                    targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
-              else:
-                plog("INFO", "Adding script tag of: "+str(t))
-                targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
-            elif t.name in recurse_image:
-              plog("INFO", "Adding image tag of: "+str(t))
-              targets.append(("image", urlparse.urljoin(orig_addr, attr_tgt)))
-            elif t.name == 'a':
-              if attr_name == "href":
-                for f in self.recurse_filetypes:
-                  if f not in got_type and attr_tgt[-len(f):] == f:
-                    got_type[f] = 1
-                    targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
-            else:
-              targets.append(("http", urlparse.urljoin(orig_addr, attr_tgt)))
-    
-    if not found_favicon:
-      targets.insert(0, ("image", urlparse.urljoin(orig_addr, "/favicon.ico")))
-
-    loaded = sets.Set([])
-
-    for i in targets:
-      if i[1] in loaded: continue
-      loaded.add(i[1])
-      if self._is_useable_url(i[1], html_schemes):
-        plog("NOTICE", "Adding "+i[0]+" target: "+i[1])
-        self.fetch_queue.append((i[0], i[1], orig_addr))
-      else:
-        plog("NOTICE", "Skipping "+i[0]+" target: "+i[1])
-
-  def check_js(self, address):
-    plog('INFO', 'Conducting a js test with destination ' + address)
-
-    accept_hdr = filter(lambda h: h[0] == "Accept", self.headers)[0]
-    orig_accept = accept_hdr[1]
-    accept_hdr[1] = script_accept_hdr
-    ret = self.check_http_nodynamic(address)
-    accept_hdr[1] = orig_accept
-
-    if type(ret) == int:
-      return ret
-    return self._check_js_worker(address, ret)
-
-  def is_html(self, mime_type, content):
-    is_html = False
-    for type_match in html_mime_types:
-      if re.match(type_match, mime_type.lower()): 
-        is_html = True
-        break
-    return is_html
- 
-  def is_script(self, mime_type, content):
-    is_script = False
-    for type_match in script_mime_types:
-      if re.match(type_match, mime_type.lower()): 
-        is_script = True
-        break
-    return is_script
-
-  def _check_js_worker(self, address, http_ret):
-    (mime_type, tor_js, tsha, orig_js, osha, new_js, nsha, exit_node) = http_ret
-
-    if not self.is_script(mime_type, orig_js):
-      plog("WARN", "Non-script mime type "+mime_type+" fed to JS test for "+address)
-     
-      if self.is_html(mime_type, orig_js):
-        return self._check_html_worker(address, http_ret)
-      else:
-        return self._check_http_worker(address, http_ret)
-
-    address_file = DataHandler.safeFilename(address[7:])
-    content_prefix = http_content_dir+address_file
-    failed_prefix = http_failed_dir+address_file
-
-    if os.path.exists(content_prefix+".jsdiff"):
-      plog("DEBUG", "Loading jsdiff for "+address)
-      jsdiff = SnakePickler.load(content_prefix+".jsdiff")
-    else:
-      plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
-      jsdiff = JSDiffer(orig_js)
-    
-    jsdiff.prune_differences(new_js)
-    SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
-
-    has_js_changes = jsdiff.contains_differences(tor_js)
-
-    if not has_js_changes:
-      result = JsTestResult(self.node_map[exit_node[1:]], 
-                            address, TEST_SUCCESS)
-      self.register_success(result)
-      return TEST_SUCCESS
-    else:
-      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'), 'w')
-      exit_content_file.write(tor_js)
-      exit_content_file.close()
-
-      result = JsTestResult(self.node_map[exit_node[1:]], 
-                             address, TEST_FAILURE, FAILURE_DYNAMIC, 
-                             content_prefix+".content", exit_content_file.name, 
-                             content_prefix+'.content-old',
-                             content_prefix+".jsdiff")
-      self.register_dynamic_failure(result)
-      return TEST_FAILURE
-
-  def check_html(self, address):
-    plog('INFO', 'Conducting an html test with destination ' + address)
-    ret = self.check_http_nodynamic(address)
-    
-    if type(ret) == int:
-      return ret
-
-    return self._check_html_worker(address, ret)
-
-  def _check_html_worker(self, address, http_ret):
-    (mime_type,tor_html,tsha,orig_html,osha,new_html,nsha,exit_node)=http_ret
-
-    if not self.is_html(mime_type, orig_html):
-      # XXX: Keep an eye on this logline.
-      plog("WARN", "Non-html mime type "+mime_type+" fed to HTML test for "+address)
-      if self.is_script(mime_type, orig_html):
-        return self._check_js_worker(address, http_ret)
-      else:
-        return self._check_http_worker(address, http_ret)
-
-    # an address representation acceptable for a filename 
-    address_file = DataHandler.safeFilename(address[7:])
-    content_prefix = http_content_dir+address_file
-    failed_prefix = http_failed_dir+address_file
-
-    orig_soup = FullyStrainedSoup(orig_html.decode('ascii', 'ignore'))
-    tor_soup = FullyStrainedSoup(tor_html.decode('ascii', 'ignore'))
-
-    # Also find recursive urls
-    recurse_elements = SoupStrainer(lambda name, attrs: 
-        name in tags_to_recurse and 
-       len(Set(map(lambda a: a[0], attrs)).intersection(Set(attrs_to_recurse))) > 0)
-    self._add_recursive_targets(TheChosenSoup(tor_html.decode('ascii',
-                                   'ignore'), recurse_elements), address) 
-
-    # compare the content
-    # if content matches, everything is ok
-    if str(orig_soup) == str(tor_soup):
-      plog("INFO", "Successful soup comparison after SHA1 fail for "+address+" via "+exit_node)
-      result = HtmlTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_SUCCESS)
-      self.register_success(result)
-
-      return TEST_SUCCESS
-
-    content_new = new_html.decode('ascii', 'ignore')
-    if not content_new:
-      plog("WARN", "Failed to re-frech "+address+" outside of Tor. Did our network fail?")
-      result = HtmlTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_INCONCLUSIVE, 
-                              INCONCLUSIVE_NOLOCALCONTENT)
-      if self.rescan_nodes: result.from_rescan = True
-      self.results.append(result)
-      datahandler.saveResult(result)
-      return TEST_INCONCLUSIVE
-
-    new_soup = FullyStrainedSoup(content_new)
-
-    # compare the new and old content
-    # if they match, means the node has been changing the content
-    if str(orig_soup) == str(new_soup):
-      exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.content'), 'w')
-      exit_content_file.write(tor_html)
-      exit_content_file.close()
-
-      result = HtmlTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_FAILURE, FAILURE_EXITONLY, 
-                              content_prefix+".content", exit_content_file.name)
-      self.register_exit_failure(result)
-      return TEST_FAILURE
-
-    # Lets try getting just the tag differences
-    # 1. Take difference between old and new tags both ways
-    # 2. Make map of tags that change to their attributes
-    # 3. Compare list of changed tags for tor vs new and
-    #    see if any extra tags changed or if new attributes
-    #    were added to additional tags
-    if os.path.exists(content_prefix+".soupdiff"):
-      plog("DEBUG", "Loading soupdiff for "+address)
-      soupdiff = SnakePickler.load(content_prefix+".soupdiff")
-      soupdiff.prune_differences(new_soup)
-    else:
-      plog("DEBUG", "No soupdiff for "+address+". Creating+dumping")
-      soupdiff = SoupDiffer(orig_soup, new_soup)
-
-    SnakePickler.dump(soupdiff, content_prefix+".soupdiff")
-    
-    more_tags = soupdiff.show_changed_tags(tor_soup)     
-    more_attrs = soupdiff.show_changed_attrs(tor_soup)
-    more_content = soupdiff.show_changed_content(tor_soup)
-
-    # Verify all of our changed tags are present here 
-    if more_tags or more_attrs or (more_content and not soupdiff.content_changed):
-      false_positive = False
-      plog("NOTICE", "SoupDiffer finds differences for "+address)
-      plog("NOTICE", "New Tags:\n"+more_tags)
-      plog("NOTICE", "New Attrs:\n"+more_attrs)
-      if more_content and not soupdiff.content_changed:
-        plog("NOTICE", "New Content:\n"+more_content)
-    else:
-      plog("INFO", "SoupDiffer predicts false_positive")
-      false_positive = True
-
-    if false_positive:
-      if os.path.exists(content_prefix+".jsdiff"):
-        plog("DEBUG", "Loading jsdiff for "+address)
-        jsdiff = SnakePickler.load(content_prefix+".jsdiff")
-      else:
-        plog("DEBUG", "No jsdiff for "+address+". Creating+dumping")
-        jsdiff = JSSoupDiffer(orig_soup)
-      
-      jsdiff.prune_differences(new_soup)
-      SnakePickler.dump(jsdiff, content_prefix+".jsdiff")
-
-      differences = jsdiff.show_differences(tor_soup)
-      false_positive = not differences
-      plog("INFO", "JSSoupDiffer predicts false_positive="+str(false_positive))
-      if not false_positive:
-        plog("NOTICE", "JSSoupDiffer finds differences: "+differences)
-
-    if false_positive:
-      plog("NOTICE", "False positive detected for dynamic change at "+address+" via "+exit_node)
-      result = HtmlTestResult(self.node_map[exit_node[1:]], 
-                              address, TEST_SUCCESS)
-      self.register_success(result)
-      return TEST_SUCCESS
-
-    exit_content_file = open(DataHandler.uniqueFilename(failed_prefix+'.'+exit_node[1:]+'.dyn-content'),'w')
-    exit_content_file.write(tor_html)
-    exit_content_file.close()
- 
-    if os.path.exists(content_prefix+".jsdiff"):
-      jsdiff_file = content_prefix+".jsdiff"
-    else: jsdiff_file = None
-    if os.path.exists(content_prefix+".soupdiff"):
-      soupdiff_file = content_prefix+".soupdiff"
-    else: soupdiff_file = None
-
-    result = HtmlTestResult(self.node_map[exit_node[1:]], 
-                            address, TEST_FAILURE, FAILURE_DYNAMIC, 
-                            content_prefix+".content", exit_content_file.name, 
-                            content_prefix+'.content-old',
-                            soupdiff_file, jsdiff_file)
-    self.register_dynamic_failure(result)
-    return TEST_FAILURE
-    
-
-class SSLTest(SearchBasedTest):
-  def __init__(self, wordlist):
-    self.test_hosts = num_ssl_hosts
-    SearchBasedTest.__init__(self, "SSL", 443, wordlist)
-
-  def run_test(self):
-    self.tests_run += 1
-    return self.check_openssl(random.choice(self.targets))
-
-  def get_targets(self):
-    return self.get_search_urls('https', self.test_hosts, True, search_mode=google_search_mode) 
-
-  def ssl_request(self, address):
-    ''' initiate an ssl connection and return the server certificate '''
-    address=str(address) # Unicode hostnames not supported..
-     
-    # specify the context
-    ctx = SSL.Context(SSL.TLSv1_METHOD)
-    ctx.set_timeout(int(read_timeout))
-    ctx.set_verify_depth(1)
-
-    # ready the certificate request
-    request = crypto.X509Req()
-
-    # open an ssl connection
-    # FIXME: Hrmmm. handshake considerations
-    try:
-      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-      # SSL has its own timeouts handled above. Undo ours from BindingSocket
-      s.settimeout(None) 
-      c = SSL.Connection(ctx, s)
-      c.set_connect_state()
-      c.connect((address, 443)) # DNS OK.
-      c.send(crypto.dump_certificate_request(crypto.FILETYPE_PEM,request))
-      # return the cert
-      return (0, c.get_peer_certificate(), None)
-    except socket.timeout, e:
-      plog('WARN','Socket timeout for '+address+": "+str(e))
-      return (-6.0, None,  e.__class__.__name__+str(e))
-    except socket.error, e:
-      plog('WARN','An error occured while opening an ssl connection to '+address+": "+str(e))
-      return (-666.0, None,  e.__class__.__name__+str(e))
-    except socks.Socks5Error, e:
-      plog('WARN', 'A SOCKS5 error '+str(e.value[0])+' occured for '+address+": "+str(e))
-      return (-float(e.value[0]), None,  e.__class__.__name__+str(e))
-    except KeyboardInterrupt:
-      raise KeyboardInterrupt
-    except OpenSSL.crypto.Error, e:
-      traceback.print_exc()
-      return (-23.0, None, e.__class__.__name__+str(e)) 
-    except Exception, e:
-      plog('WARN', 'An unknown SSL error occured for '+address+': '+str(e))
-      traceback.print_exc()
-      return (-666.0, None,  e.__class__.__name__+str(e))
-
-  def get_resolved_ip(self, hostname):
-    mappings = metacon.control.get_address_mappings("cache")
-    ret = None
-    for m in mappings:
-      if m.from_addr == hostname:
-        if ret:
-          plog("WARN", "Multiple maps for "+hostname)
-        ret = m.to_addr
-    return ret
-
-  def _update_cert_list(self, ssl_domain, check_ips):
-    changed = False
-    for ip in check_ips:
-      #let's always check.
-      #if not ssl_domain.seen_ip(ip):
-      plog('INFO', 'Ssl connection to new ip '+ip+" for "+ssl_domain.domain)
-      (code, raw_cert, exc) = self.ssl_request(ip)
-      if not raw_cert:
-        plog('WARN', 'Error getting the correct cert for '+ssl_domain.domain+":"+ip+" "+str(code)+"("+str(exc)+")")
-        continue
-      try:
-        ssl_domain.add_cert(ip,
-             crypto.dump_certificate(crypto.FILETYPE_PEM, raw_cert))
-        changed = True # Always save new copy.
-      except Exception, e:
-        traceback.print_exc()
-        plog('WARN', 'Error dumping cert for '+ssl_domain.domain+":"+ip+" E:"+str(e))
-    return changed
-
-  def check_openssl(self, address):
-    ''' check whether an https connection to a given address is molested '''
-    plog('INFO', 'Conducting an ssl test with destination ' + address)
-
-    # an address representation acceptable for a filename 
-    address_file = DataHandler.safeFilename(address[8:])
-    ssl_file_name = ssl_certs_dir + address_file + '.ssl'
-
-    # load the original cert and compare
-    # if we don't have the original cert yet, get it
-    try:
-      ssl_domain = SnakePickler.load(ssl_file_name)
-    except IOError:
-      ssl_domain = SSLDomain(address)
-
-    check_ips = []
-    # Make 3 resolution attempts
-    for attempt in xrange(1,4):
-      try:
-        resolved = []
-        resolved = socket.getaddrinfo(address, 443)
-        break
-      except socket.gaierror:
-        plog("NOTICE", "Local resolution failure #"+str(attempt)+" for "+address)
-       
-    for res in resolved:
-      if res[0] == socket.AF_INET and res[2] == socket.IPPROTO_TCP:
-        check_ips.append(res[4][0])
-
-    if not check_ips:
-      plog("WARN", "Local resolution failure for "+address)
-      self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
-      return TEST_INCONCLUSIVE
-
-    if self._update_cert_list(ssl_domain, check_ips):
-      SnakePickler.dump(ssl_domain, ssl_file_name)
-
-    if not ssl_domain.cert_map:
-      plog('WARN', 'Error getting the correct cert for ' + address)
-      self.remove_target(address, INCONCLUSIVE_NOLOCALCONTENT)
-      return TEST_INCONCLUSIVE
-
-    if ssl_domain.cert_changed:
-      ssl_domain = SSLDomain(address)
-      plog('INFO', 'Fetching all new certs for '+address)
-      if self._update_cert_list(ssl_domain, check_ips):
-        SnakePickler.dump(ssl_domain, ssl_file_name)
-      if ssl_domain.cert_changed:
-        plog("NOTICE", "Fully dynamic certificate host "+address)
-
-        result = SSLTestResult("NoExit", "NotStored!", address, ssl_file_name, 
-                               TEST_INCONCLUSIVE,
-                               INCONCLUSIVE_DYNAMICSSL)
-        if self.rescan_nodes: result.from_rescan = True
-        datahandler.saveResult(result)
-        self.results.append(result)
-        self.remove_target(address, FALSEPOSITIVE_DYNAMIC)
-        return TEST_INCONCLUSIVE
-
-    if not ssl_domain.num_certs():
-        plog("NOTICE", "No non-tor certs available for "+address)
-        result = SSLTestResult("NoExit", "NoStored!", address, ssl_file_name, 
-                               TEST_INCONCLUSIVE,
-                               INCONCLUSIVE_NOLOCALCONTENT)
-        if self.rescan_nodes: result.from_rescan = True
-        datahandler.saveResult(result)
-        self.results.append(result)
-        self.remove_target(address, FALSEPOSITIVE_DEADSITE)
-        return TEST_INCONCLUSIVE
-
-    # get the cert via tor
-    defaultsocket = socket.socket
-    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
-    socket.socket = socks.socksocket
-
-    (code, cert, exc) = self.ssl_request(address)
-
-    # reset the connection method back to direct
-    socket.socket = defaultsocket
-
-    exit_node = metacon.get_exit_node()
-    if not exit_node or exit_node == '0':
-      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
-      result = SSLTestResult(None, 
-                              address, ssl_file_name, TEST_INCONCLUSIVE,
-                              INCONCLUSIVE_NOEXIT)
-      if self.rescan_nodes: result.from_rescan = True
-      self.results.append(result)
-      datahandler.saveResult(result)
-      return TEST_INCONCLUSIVE
-
-    if not cert:
-      if code < 0 and type(code) == float:
-        if code == -1: # "General socks error"
-          fail_reason = FAILURE_CONNERROR
-        elif code == -2: # "connection not allowed" aka ExitPolicy
-          fail_reason = FAILURE_EXITPOLICY
-        elif code == -3: # "Net Unreach" ??
-          fail_reason = FAILURE_NETUNREACH
-        elif code == -4: # "Host Unreach" aka RESOLVEFAILED
-          fail_reason = FAILURE_HOSTUNREACH
-          result = SSLTestResult(self.node_map[exit_node[1:]], address,
-                                ssl_file_name, TEST_FAILURE, fail_reason)
-          return self.register_dns_failure(result)
-        elif code == -5: # Connection refused
-          fail_reason = FAILURE_CONNREFUSED
-          result = SSLTestResult(self.node_map[exit_node[1:]],
-                       address, ssl_file_name, TEST_FAILURE, fail_reason)
-          self.extra_info=exc
-          self.register_exit_failure(result)
-          return TEST_FAILURE
-        elif code == -6: # timeout
-          fail_reason = FAILURE_TIMEOUT
-          result = SSLTestResult(self.node_map[exit_node[1:]], address,
-                                ssl_file_name, TEST_FAILURE, fail_reason)
-          return self.register_timeout_failure(result)
-        elif code == -13:
-          fail_reason = FAILURE_NOEXITCONTENT # shouldn't happen here
-          result = SSLTestResult(self.node_map[exit_node[1:]],
-                       address, ssl_file_name, TEST_FAILURE, fail_reason)
-          self.extra_info=exc
-          self.register_exit_failure(result)
-          return TEST_FAILURE
-        elif code == -23: 
-          fail_reason = FAILURE_CRYPTOERROR
-          result = SSLTestResult(self.node_map[exit_node[1:]],
-                       address, ssl_file_name, TEST_FAILURE, fail_reason)
-          self.extra_info=exc
-          self.register_exit_failure(result)
-          return TEST_FAILURE
-        else:
-          fail_reason = FAILURE_MISCEXCEPTION
-      else:
-          fail_reason = FAILURE_MISCEXCEPTION
-
-      result = SSLTestResult(self.node_map[exit_node[1:]], 
-                             address, ssl_file_name, TEST_FAILURE, fail_reason) 
-      result.extra_info = exc
-      self.register_connect_failure(result)
-      return TEST_FAILURE
-
-    try:
-      # get an easily comparable representation of the certs
-      cert_pem = crypto.dump_certificate(crypto.FILETYPE_PEM, cert)
-    except OpenSSL.crypto.Error, e:
-      result = SSLTestResult(self.node_map[exit_node[1:]],
-                   address, ssl_file_name, TEST_FAILURE, FAILURE_CRYPTOERROR)
-      self.extra_info=e.__class__.__name__+str(e)
-      self.register_exit_failure(result)
-      return TEST_FAILURE
-
-    # if certs match, everything is ok
-    if ssl_domain.seen_cert(cert_pem):
-      result = SSLTestResult(self.node_map[exit_node[1:]], 
-                             address, ssl_file_name, TEST_SUCCESS)
-      self.register_success(result)
-      return TEST_SUCCESS
-
-    # False positive case.. Can't help it if the cert rotates AND we have a
-    # failure... Need to prune all results for this cert and give up.
-    if ssl_domain.cert_rotates:
-      result = SSLTestResult(self.node_map[exit_node[1:]], 
-                             address, ssl_file_name, TEST_FAILURE, 
-                             FAILURE_DYNAMIC, self.get_resolved_ip(address), 
-                             cert_pem)
-      self.register_dynamic_failure(result)
-      return TEST_FAILURE
-
-    # if certs dont match, means the exit node has been messing with the cert
-    result = SSLTestResult(self.node_map[exit_node[1:]], 
-                           address, ssl_file_name, TEST_FAILURE,
-                           FAILURE_EXITONLY, self.get_resolved_ip(address), 
-                           cert_pem)
-    self.register_exit_failure(result)
-    return TEST_FAILURE
-
-class POP3STest(Test):
-  def __init__(self):
-    Test.__init__(self, "POP3S", 110)
-
-  def run_test(self):
-    self.tests_run += 1
-    return self.check_pop(random.choice(self.targets))
-
-  def get_targets(self):
-    return [] 
-
-  def check_pop(self, address, port=''):
-    ''' 
-    check whether a pop + tls connection to a given address is molested 
-    it is implied that the server reads/sends messages compliant with RFC1939 & RFC2449
-    '''
-
-    plog('INFO', 'Conducting a pop test with destination ' + address)
-
-    if not port:
-      port = 110
-
-    defaultsocket = socket.socket
-    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
-    socket.socket = socks.socksocket
-
-    capabilities_ok = False
-    starttls_present = False
-    tls_started = None
-    tls_succeeded = None
-
-    try:
-      pop = Client(address, port)
-    
-      # read the server greeting
-      server_greeting = pop.readline()
-
-      # get the server capabilities
-      pop.writeline('CAPA')
-      capabilities = ''
-      while 1:
-        curr = pop.readline()
-        if '+OK' in curr:
-          capabilities_ok = True
-        elif curr == '.':
-          break
-        elif 'STLS' in curr:
-          starttls_present = True
-      
-      if not capabilities_ok:
-        return TEST_INCONCLUSIVE
-
-      # try to start tls negotiation
-      if starttls_present:
-        pop.writeline('STLS')
-
-      starttls_response = pop.readline()
-      starttls_started = '+OK' in starttls_response
-
-      # negotiate TLS and issue some request to feel good about it
-      # TODO check certs? 
-      ctx = SSL.Context(SSL.SSLv23_METHOD)
-      c = SSL.Connection(ctx, pop.sock)
-      c.set_connect_state()
-      c.do_handshake()
-      c.send('CAPA' + linebreak)
-      
-      while tls_succeeded == None:
-        line = ''
-        char = None
-        while char != '\n':
-          char = c.read(1)
-          if not char:
-            break
-          elif char == '.':
-            tls_succeeded = False
-          line += char
-
-        if '-ERR' in line:
-          tls_succeeded = False
-        elif '+OK' in line:
-          tls_succeeded = True
-        elif not line:
-          tls_succeeded = False
-
-    except socket.error, e: 
-      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except OpenSSL.SSL.SysCallError, e:
-      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-
-    # reset the connection to default
-    socket.socket = defaultsocket
-
-    # check whether the test was valid at all
-    exit_node = metacon.get_exit_node()
-    if exit_node == 0 or exit_node == '0':
-      plog('INFO', 'We had no exit node to test, skipping to the next test.')
-      return TEST_INCONCLUSIVE
-
-    # do the same for the direct connection
-
-    capabilities_ok_d = False
-    starttls_present_d = False
-    tls_started_d = None
-    tls_succeeded_d = None
-
-    try:
-      pop = Client(address, port)
-    
-      # read the server greeting
-      server_greeting = pop.readline()
-
-      # get the server capabilities
-      pop.writeline('CAPA')
-      capabilities = ''
-      while 1:
-        curr = pop.readline()
-        if '+OK' in curr:
-          capabilities_ok_d = True
-        elif curr == '.':
-          break
-        elif 'STLS' in curr:
-          starttls_present_d = True
-      
-      if not capabilities_ok_d:
-        return TEST_INCONCLUSIVE
-
-      # try to start tls negotiation
-      if starttls_present_d:
-        pop.writeline('STLS')
-
-      starttls_started_d = '+OK' in starttls_response
-
-      # negotiate TLS, issue some request to feel good about it
-      ctx = SSL.Context(SSL.SSLv23_METHOD)
-      c = SSL.Connection(ctx, pop.sock)
-      c.set_connect_state()
-      c.do_handshake()
-      c.send('CAPA' + linebreak)
-      
-      while tls_succeeded_d == None:
-        line = ''
-        char = None
-        while char != '\n':
-          char = c.read(1)
-          if not char:
-            break
-          elif char == '.':
-            tls_succeeded_d = False
-          line += char
-
-        if '-ERR' in line:
-          tls_succeeded_d = False
-        elif '+OK' in line:
-          tls_succeeded_d = True
-        elif not line:
-          tls_succeeded_d = False
-
-    except socket.error, e: 
-      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except OpenSSL.SSL.SysCallError, e:
-      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-
-    # compare
-    if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
-        tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
-      result = POPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
-      datahandler.saveResult(result)
-      return TEST_FAILURE
-    
-    result = POPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
-    datahandler.saveResult(result)
-    return TEST_SUCCESS
-
-class SMTPSTest(Test):
-  def __init__(self):
-    Test.__init__(self, "SMTPS", 587)
-
-  def run_test(self):
-    self.tests_run += 1
-    return self.check_smtp(random.choice(self.targets))
-
-  def get_targets(self):
-    return [('smtp.gmail.com','587')]
-
-  def check_smtp(self, address, port=''):
-    ''' 
-    check whether smtp + tls connection to a given address is molested
-    this is done by going through the STARTTLS sequence and comparing server
-    responses for the direct and tor connections
-    '''
-
-    plog('INFO', 'Conducting an smtp test with destination ' + address)
-
-    defaultsocket = socket.socket
-    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
-    socket.socket = socks.socksocket
-
-    ehlo1_reply = 0
-    has_starttls = 0
-    ehlo2_reply = 0
-
-    try:
-      s = smtplib.SMTP(address, port)
-      ehlo1_reply = s.ehlo()[0]
-      if ehlo1_reply != 250:
-        raise smtplib.SMTPException('First ehlo failed')
-      has_starttls = s.has_extn('starttls')
-      if not has_starttls:
-        raise smtplib.SMTPException('It seems the server doesn\'t support starttls')
-      s.starttls()
-      # TODO check certs?
-      ehlo2_reply = s.ehlo()[0]
-      if ehlo2_reply != 250:
-        raise smtplib.SMTPException('Second ehlo failed')
-    except socket.gaierror, e:
-      plog('WARN', 'A connection error occured while testing smtp at ' + address)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except smtplib.SMTPException, e:
-      plog('WARN','An error occured while testing smtp at ' + address)
-      plog('WARN', e)
-      return TEST_INCONCLUSIVE
-    # reset the connection method back to direct
-    socket.socket = defaultsocket 
-
-    # check whether the test was valid at all
-    exit_node = metacon.get_exit_node()
-    if exit_node == 0 or exit_node == '0':
-      plog('INFO', 'We had no exit node to test, skipping to the next test.')
-      return TEST_INCONCLUSIVE
-
-    # now directly
-
-    ehlo1_reply_d = 0
-    has_starttls_d = 0
-    ehlo2_reply_d = 0
-
-    try:
-      s = smtplib.SMTP(address, port)
-      ehlo1_reply_d = s.ehlo()[0]
-      if ehlo1_reply != 250:
-        raise smtplib.SMTPException('First ehlo failed')
-      has_starttls_d = s.has_extn('starttls')
-      if not has_starttls_d:
-        raise smtplib.SMTPException('It seems that the server doesn\'t support starttls')
-      s.starttls()
-      ehlo2_reply_d = s.ehlo()[0]
-      if ehlo2_reply_d != 250:
-        raise smtplib.SMTPException('Second ehlo failed')
-    except socket.gaierror, e:
-      plog('WARN', 'A connection error occured while testing smtp at ' + address)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except smtplib.SMTPException, e:
-      plog('WARN', 'An error occurred while testing smtp at ' + address)
-      plog('WARN', e)
-      return TEST_INCONCLUSIVE
-
-    print ehlo1_reply, ehlo1_reply_d, has_starttls, has_starttls_d, ehlo2_reply, ehlo2_reply_d
-
-    # compare
-    if ehlo1_reply != ehlo1_reply_d or has_starttls != has_starttls_d or ehlo2_reply != ehlo2_reply_d:
-      result = SMTPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
-      datahandler.saveResult(result)
-      return TEST_FAILURE
-
-    result = SMTPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
-    datahandler.saveResult(result)
-    return TEST_SUCCESS
-
-
-class IMAPSTest(Test):
-  def __init__(self):
-    Test.__init__(self, "IMAPS", 143)
-
-  def run_test(self):
-    self.tests_run += 1
-    return self.check_imap(random.choice(self.targets))
-
-  def get_targets(self):
-    return []
-
-  def check_imap(self, address, port=''):
-    ''' 
-    check whether an imap + tls connection to a given address is molested 
-    it is implied that the server reads/sends messages compliant with RFC3501
-    ''' 
-    plog('INFO', 'Conducting an imap test with destination ' + address)
-
-    if not port:
-      port = 143
-
-    defaultsocket = socket.socket
-    socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
-    socket.socket = socks.socksocket
-    
-    capabilities_ok = None
-    starttls_present = None
-    tls_started = None
-    tls_succeeded = None
-
-    try:
-      imap = Client(address, port)
-
-      # read server greeting
-      server_greeting = imap.readline()
-
-      # get server capabilities
-      imap.writeline('a001 CAPABILITY')
-      capabilities = imap.readline() # first line - list of capabilities
-      capabilities_ok = 'OK' in imap.readline() # second line - the request status
-    
-      if not capabilities_ok:
-         return TEST_INCONCLUSIVE
-
-      # check if starttls is present
-      starttls_present = 'STARTTLS' in capabilities
-
-      if starttls_present:
-        imap.writeline('a002 STARTTLS')
-        tls_started = 'OK' in imap.readline()
-
-      # negotiate TLS, issue a request to feel good about it
-      # TODO check the cert aswell ?
-      ctx = SSL.Context(SSL.SSLv23_METHOD)
-      c = SSL.Connection(ctx, imap.sock)
-      c.set_connect_state()
-      c.do_handshake()
-      c.send('a003 CAPABILITY' + linebreak)
-      
-      while tls_succeeded == None:
-        line = ''
-        char = None
-        while char != '\n':
-          char = c.read(1)
-          if not char:
-            break
-          line += char
-
-        if 'Error' in line or 'error' in line:
-          tls_succeeded = False
-        elif 'OK' in line:
-          tls_succeeded = True
-        elif not line:
-          tls_succeeded = False
-  
-    except socket.error, e: 
-      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except OpenSSL.SSL.SysCallError, e:
-      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    
-    socket.socket = defaultsocket 
-
-    # check whether the test was valid at all
-    exit_node = metacon.get_exit_node()
-    if exit_node == 0 or exit_node == '0':
-      plog('NOTICE', 'We had no exit node to test, skipping to the next test.')
-      return TEST_INCONCLUSIVE
-
-    # do the same for the direct connection
-    capabilities_ok_d = None
-    starttls_present_d = None
-    tls_started_d = None
-    tls_succeeded_d = None
-
-    try:
-      imap = Client(address, port)
-
-      # read server greeting
-      server_greeting = imap.readline()
-
-      # get server capabilities
-      imap.writeline('a001 CAPABILITY')
-      capabilities = imap.readline() # first line - list of capabilities
-      capabilities_ok_d = 'OK' in imap.readline() # second line - the request status
-
-      if not capabilities_ok_d:
-        return TEST_INCONCLUSIVE
-
-      # check if starttls is present
-      starttls_present_d = 'STARTTLS' in capabilities
-
-      if starttls_present_d:
-        imap.writeline('a002 STARTTLS')
-        tls_started = 'OK' in imap.readline()
-
-      # negotiate TLS, issue some request to feel good about it
-      ctx = SSL.Context(SSL.SSLv23_METHOD)
-      c = SSL.Connection(ctx, imap.sock)
-      c.set_connect_state()
-      c.do_handshake()
-      c.send('a003 CAPABILITY' + linebreak)
-
-      while tls_succeeded_d == None:
-        line = ''
-        char = None
-        while char != '\n':
-          char = c.read(1)
-          if not char:
-            break
-          line += char
-
-        if 'Error' in line or 'error' in line:
-          tls_succeeded_d = False
-        elif 'OK' in line:
-          tls_succeeded_d = True
-        elif not line:
-          tls_succeeded_d = False
-
-    except socket.error, e: 
-      plog('WARN', 'Connection to ' + address + ':' + port + ' refused')
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-    except OpenSSL.SSL.SysCallError, e:
-      plog('WARN', 'Error while negotiating an SSL connection to ' + address + ':' + port)
-      plog('WARN', e)
-      socket.socket = defaultsocket
-      return TEST_INCONCLUSIVE
-
-    # compare
-    if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
-      tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
-      result = IMAPTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
-      datahandler.saveResult(result)
-      return TEST_FAILURE
-
-    result = IMAPTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
-    datahandler.saveResult(result)
-    return TEST_SUCCESS
-
-class DNSTest(Test):
-  def check_dns(self, address):
-    ''' A basic comparison DNS test. Rather unreliable. '''
-    # TODO Spawns a lot of false positives (for ex. doesn't work for google.com). 
-    # TODO: This should be done passive like the DNSRebind test (possibly as
-    # part of it)
-    plog('INFO', 'Conducting a basic dns test for destination ' + address)
-
-    ip = tor_resolve(address)
-
-    # check whether the test was valid at all
-    exit_node = metacon.get_exit_node()
-    if exit_node == 0 or exit_node == '0':
-      plog('INFO', 'We had no exit node to test, skipping to the next test.')
-      return TEST_SUCCESS
-
-    ips_d = Set([])
-    try:
-      results = socket.getaddrinfo(address,None)
-      for result in results:
-        ips_d.add(result[4][0])
-    except socket.herror, e:
-      plog('WARN', 'An error occured while performing a basic dns test')
-      plog('WARN', e)
-      return TEST_INCONCLUSIVE
-
-    if ip in ips_d:
-      result = DNSTestResult(self.node_map[exit_node[1:]], address, TEST_SUCCESS)
-      return TEST_SUCCESS
-    else:
-      plog('ERROR', 'The basic DNS test suspects ' + exit_node + ' to be malicious.')
-      result = DNSTestResult(self.node_map[exit_node[1:]], address, TEST_FAILURE)
-      return TEST_FAILURE
-
-class SSHTest(Test):
-  def check_openssh(self, address):
-    ''' check whether an openssh connection to a given address is molested '''
-    # TODO
-    #ssh = pyssh.Ssh('username', 'host', 22)
-    #ssh.set_sshpath(pyssh.SSH_PATH)
-    #response = self.ssh.sendcmd('ls')
-    #print response
-
-    return 0 
-
-
-# a simple interface to handle a socket connection
-class Client:
-  def __init__(self, host, port):
-    self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    self.sock.connect((host, port))
-    self.buffer = self.sock.makefile('rb')
-
-  def writeline(self, line):
-    self.sock.send(line + linebreak)
-
-  def readline(self):
-    response = self.buffer.readline()
-    if not response:
-      raise EOFError
-    elif response[-2:] == linebreak:
-      response = response[:-2]
-    elif response[-1:] in linebreak:
-      response = response[:-1]
-    return response 
-
-class NodeManager(ConsensusTracker):
-  ''' 
-  A tor control event handler extending TorCtl.EventHandler.
-  Monitors NS and NEWDESC events, and updates each test
-  with new nodes
-  '''
-  def __init__(self, c):
-    ConsensusTracker.__init__(self, c)
-    self.rlock = threading.Lock()
-    self.new_nodes=True
-    c.set_event_handler(self)
-    c.set_events([TorCtl.EVENT_TYPE.NEWCONSENSUS,
-                  TorCtl.EVENT_TYPE.NEWDESC], True)
-
-  def idhex_to_r(self, idhex):
-    self.rlock.acquire()
-    result = None
-    try:
-      if idhex in self.routers:
-        result = self.routers[idhex]
-    finally:
-      self.rlock.release()
-    return result
-
-  def name_to_idhex(self, nick):
-    self.rlock.acquire()
-    result = None
-    try:
-      if nick in self.name_to_key:
-        result = self.name_to_key[nick]
-    finally:
-      self.rlock.release()
-    return result
-
-  def has_new_nodes(self):
-    ret = False
-    plog("DEBUG", "has_new_nodes begin")
-    try:
-      self.rlock.acquire()
-      ret = self.new_nodes
-      self.new_nodes = False
-    finally:
-      self.rlock.release()
-    plog("DEBUG", "has_new_nodes end")
-    return ret
-
-  def get_nodes_for_port(self, port):
-    ''' return a list of nodes that allow exiting to a given port '''
-    plog("DEBUG", "get_nodes_for_port begin")
-    restriction = NodeRestrictionList([FlagsRestriction(["Running", "Valid",
-"Fast"]), MinBWRestriction(min_node_bw), ExitPolicyRestriction('255.255.255.255', port)])
-    try:
-      self.rlock.acquire()
-      ret = [x for x in self.sorted_r if restriction.r_is_ok(x)]
-      # XXX: Can remove.
-      self._sanity_check(ret)
-    finally:
-      self.rlock.release()
-    plog("DEBUG", "get_nodes_for_port end")
-    return ret
- 
-  def new_consensus_event(self, n):
-    plog("DEBUG", "newconsensus_event begin")
-    try:
-      self.rlock.acquire()
-      ConsensusTracker.new_consensus_event(self, n)
-      self.new_nodes = True
-    finally:
-      self.rlock.release()
-    plog("DEBUG", "newconsensus_event end")
-
-  def new_desc_event(self, d):
-    plog("DEBUG", "newdesc_event begin")
-    try:
-      self.rlock.acquire()
-      if ConsensusTracker.new_desc_event(self, d):
-        self.new_nodes = True
-    finally:
-      self.rlock.release()
-    plog("DEBUG", "newdesc_event end")
-
-class DNSRebindScanner(EventHandler):
-  ''' 
-  A tor control event handler extending TorCtl.EventHandler 
-  Monitors for REMAP events (see check_dns_rebind())
-  '''
-  def __init__(self, mt, c):
-    EventHandler.__init__(self)
-    self.__mt = mt
-    c.set_event_handler(self)
-    c.set_events([TorCtl.EVENT_TYPE.STREAM], True)
-    self.c=c
-
-  def stream_status_event(self, event):
-    if event.status == 'REMAP':
-      octets = map(lambda x: int2bin(x).zfill(8), event.target_host.split('.'))
-      ipbin = ''.join(octets)
-      for network in ipv4_nonpublic:
-        if ipbin[:len(network)] == network:
-          handler = DataHandler()
-          node = self.__mt.get_exit_node()
-          plog("ERROR", "DNS Rebeind failure via "+node)
-
-          result = DNSRebindTestResult(self.__mt.node_manager.idhex_to_r(node), 
-                                       '', TEST_FAILURE)
-          handler.saveResult(result)
-    # TODO: This is currently handled via socks error codes,
-    # but stream events would give us more info...
-    #elif event.status == "FAILED" or event.status == "CLOSED":
-       # check remote_reason == "RESOLVEFAILED"
-       # getinfo.circuit_status()
-       # TODO: Check what we do in these detached cases..
-       #metacon.node_manager.name_to_idhex(exit)
-
-class Metaconnection:
-  ''' Abstracts operations with the Metatroller '''
-  def __init__(self):
-    ''' 
-    Establish a connection to metatroller & control port, 
-    configure metatroller, load the number of previously tested nodes 
-    '''
-    # establish a metatroller connection
-    try:
-      self.__meta = Client(meta_host, meta_port)
-    except socket.error:
-      plog('ERROR', 'Couldn\'t connect to metatroller. Is it on?')
-      exit()
-  
-    # skip two lines of metatroller introduction
-    data = self.__meta.readline()
-    data = self.__meta.readline()
-    
-    # configure metatroller
-    commands = [
-      'PATHLEN 2',
-      'PERCENTFAST 10', # Cheat to win!
-      'USEALLEXITS 1',
-      'UNIFORM 0',
-      'BWCUTOFF 1',
-      'ORDEREXITS 1',
-      'GUARDNODES 0',
-      'RESETSTATS']
-
-    for c in commands:
-      self.__meta.writeline(c)
-      reply = self.__meta.readline()
-      if reply[:3] != '250': # first three chars indicate the reply code
-        reply += self.__meta.readline()
-        plog('ERROR', 'Error configuring metatroller (' + c + ' failed)')
-        plog('ERROR', reply)
-        exit()
-
-    # establish a control port connection
-    try:
-      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-      s.connect((control_host, control_port))
-      c = Connection(s)
-      c.authenticate()
-      self.control = c
-    except socket.error, e:
-      plog('ERROR', 'Couldn\'t connect to the control port')
-      plog('ERROR', e)
-      exit()
-    except AttributeError, e:
-      plog('ERROR', 'A service other that the Tor control port is listening on ' + control_host + ':' + control_port)
-      plog('ERROR', e)
-      exit()
-    self.node_manager = NodeManager(c)
-   
-
-  def get_exit_node(self):
-    ''' ask metatroller for the last exit used '''
-    self.__meta.writeline("GETLASTEXIT")
-    reply = self.__meta.readline()
-    
-    if reply[:3] != '250':
-      reply += self.__meta.readline()
-      plog('ERROR', reply)
-      return 0
-    
-    p = re.compile('250 LASTEXIT=[\S]+')
-    m = p.match(reply)
-    self.__exit = m.group()[13:] # drop the irrelevant characters  
-    plog('INFO','Current node: ' + self.__exit)
-    return self.__exit
-
-  def get_new_circuit(self):
-    ''' tell metatroller to close the current circuit and open a new one '''
-    plog('DEBUG', 'Trying to construct a new circuit')
-    self.__meta.writeline("NEWEXIT")
-    reply = self.__meta.readline()
-
-    if reply[:3] != '250':
-      plog('ERROR', 'Choosing a new exit failed')
-      plog('ERROR', reply)
-
-  def set_new_exit(self, exit):
-    ''' 
-    tell metatroller to set the given node as the exit in the next circuit 
-    '''
-    plog('DEBUG', 'Trying to set ' + `exit` + ' as the exit for the next circuit')
-    self.__meta.writeline("SETEXIT $"+exit)
-    reply = self.__meta.readline()
-
-    if reply[:3] != '250':
-      plog('ERROR', 'Setting ' + exit + ' as the new exit failed')
-      plog('ERROR', reply)
-
-  def report_bad_exit(self, exit):
-    ''' 
-    report an evil exit to the control port using AuthDirBadExit 
-    Note: currently not used  
-    '''
-    # self.__contol.set_option('AuthDirBadExit', exit) ?
-    pass
-
-  # FIXME: Hrmm is this in the right place?
-  def check_all_exits_port_consistency(self):
-    ''' 
-    an independent test that finds nodes that allow connections over a common protocol
-    while disallowing connections over its secure version (for instance http/https)
-    '''
-
-    # get the structure
-    routers = self.control.read_routers(self.control.get_network_status())
-    bad_exits = Set([])
-    specific_bad_exits = [None]*len(ports_to_check)
-    for i in range(len(ports_to_check)):
-      specific_bad_exits[i] = []
-
-    # check exit policies
-    for router in routers:
-      for i in range(len(ports_to_check)):
-        [common_protocol, common_restriction, secure_protocol, secure_restriction] = ports_to_check[i]
-        if common_restriction.r_is_ok(router) and not secure_restriction.r_is_ok(router):
-          bad_exits.add(router)
-          specific_bad_exits[i].append(router)
-          #plog('INFO', 'Router ' + router.nickname + ' allows ' + common_protocol + ' but not ' + secure_protocol)
-  
-
-    for i,exits in enumerate(specific_bad_exits):
-      [common_protocol, common_restriction, secure_protocol, secure_restriction] = ports_to_check[i]
-      plog("NOTICE", "Nodes allowing "+common_protocol+" but not "+secure_protocol+":\n\t"+"\n\t".join(map(lambda r: r.nickname+"="+r.idhex, exits)))
-      #plog('INFO', 'Router ' + router.nickname + ' allows ' + common_protocol + ' but not ' + secure_protocol)
-     
-
-    # report results
-    plog('INFO', 'Total nodes: ' + `len(routers)`)
-    for i in range(len(ports_to_check)):
-      [common_protocol, _, secure_protocol, _] = ports_to_check[i]
-      plog('INFO', 'Exits with ' + common_protocol + ' / ' + secure_protocol + ' problem: ' + `len(specific_bad_exits[i])` + ' (~' + `(len(specific_bad_exits[i]) * 100 / len(routers))` + '%)')
-    plog('INFO', 'Total bad exits: ' + `len(bad_exits)` + ' (~' + `(len(bad_exits) * 100 / len(routers))` + '%)')
-
-  # FIXME: Hrmm is this in the right place?
-  def check_dns_rebind(self):
-    ''' 
-    A DNS-rebind attack test that runs in the background and monitors REMAP events
-    The test makes sure that external hosts are not resolved to private addresses  
-    '''
-    plog('INFO', 'Monitoring REMAP events for weirdness')
-    # establish a control port connection
-    try:
-      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-      s.connect((control_host, control_port))
-      c = Connection(s)
-      c.authenticate()
-    except socket.error, e:
-      plog('ERROR', 'Couldn\'t connect to the control port')
-      plog('ERROR', e)
-      exit()
-    except AttributeError, e:
-      plog('ERROR', 'A service other that the Tor control port is listening on ' + control_host + ':' + control_port)
-      plog('ERROR', e)
-      exit()
-
-    self.__dnshandler = DNSRebindScanner(self, c)
-
-
-# some helpful methods
-
-def load_wordlist(file):
-  ''' load a list of strings from a file (which contains words separated by newlines) '''
-  plog('INFO', 'Loading the wordlist')
-  
-  wordlist = []
-  fh = None
-  try:
-    fh = open(file, 'r')
-  except IOError, e:
-    plog('ERROR', 'Reading the wordlist file failed.')
-    plog('ERROR', e)
-  
-  try:
-    for line in fh:
-      wordlist.append(line[:-1]) # get rid of the linebreaks
-  finally:
-    fh.close()
-
-  return wordlist
-
-
-def decompress_response_data(response):
-  encoding = None
-
-  # a reponse to a httplib.HTTPRequest 
-  if (response.__class__.__name__ == "HTTPResponse"):
-    encoding = response.getheader("Content-Encoding")
-  # a response to urllib2.urlopen()
-  elif (response.__class__.__name__ == "addinfourl"):
-    encoding = response.info().get("Content-Encoding")
-
-  tot_len = response.info().get("Content-Length")
-  if not tot_len:
-    tot_len = "0"
-
-  start = 0
-  data = ""
-  while True:
-    data_read = response.read(500) # Cells are 495 bytes..
-    if not start: start = time.time()
-    # TODO: if this doesn't work, check stream observer for 
-    # lack of progress.. or for a sign we should read..
-    len_read = len(data)
-    now = time.time()
-
-    plog("DEBUG", "Read "+str(len_read)+"/"+str(tot_len))
-    # Wait 5 seconds before counting data
-    if (now-start) > 5 and len_read/(now-start) < min_rate:
-      plog("WARN", "Minimum xfer rate not maintained. Aborting xfer")
-      return ""
-      
-    if not data_read:
-      break
-    data += data_read 
- 
-  if encoding == 'gzip' or encoding == 'x-gzip':
-    return gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)).read()
-  elif encoding == 'deflate':
-    return StringIO.StringIO(zlib.decompress(data)).read()
-  else:
-    return data
-
-def tor_resolve(address):
-  ''' performs a DNS query explicitly via tor '''
-  return commands.getoutput("tor-resolve " + address)
-
-def int2bin(n):
-  '''
-  simple decimal -> binary conversion, needed for comparing IP addresses 
-  '''
-  n = int(n)
-  if n < 0:
-    raise ValueError, "Negative values are not accepted."
-  elif n == 0:
-    return '0'
-  else:
-    bin = ''
-    while n > 0:
-      bin += str(n % 2)
-      n = n >> 1
-    return bin[::-1]
-
-
-class NoURLsFound(Exception):
-  pass
-
-#
-# main logic
-#
-def main(argv):
-  # make sure we have something to test for
-  if len(argv) < 2:
-    print ''
-    print 'Please provide at least one test option:'
-    print '--pernode <n>'
-    print '--resume [<n>]'
-    print '--rescan [<n>]'
-    print '--ssl'
-    print '--http'
-    print '--html'
-#    print '--ssh (doesn\'t work yet)'
-#    print '--smtp (~works)'
-#    print '--pop (~works)'
-#    print '--imap (~works)'
-    print '--dnsrebind (use with one or more of above tests)'
-    print '--policies'
-    print '--exit <exit>'
-    print ''
-    return
-
-  opts = ['ssl','rescan', 'pernode=', 'resume', 'html','http','ssh','smtp','pop','imap','dns','dnsrebind','policies','exit=']
-  flags, trailer = getopt.getopt(argv[1:], [], opts)
-  
-  # get specific test types
-  do_resume = False
-  do_rescan = ('--rescan','') in flags
-  do_ssl = ('--ssl','') in flags
-  do_http = ('--http','') in flags
-  do_html = ('--html','') in flags
-  #do_ssh = ('--ssh','') in flags
-  #do_smtp = ('--smtp','') in flags
-  #do_pop = ('--pop','') in flags
-  #do_imap = ('--imap','') in flags
-  do_dns_rebind = ('--dnsrebind','') in flags
-  do_consistency = ('--policies','') in flags
-
-  scan_exit=None
-  for flag in flags:
-    if flag[0] == "--exit":
-      scan_exit = flag[1]
-    if flag[0] == "--pernode":
-      global num_tests_per_node
-      num_tests_per_node = int(flag[1])
-    if flag[0] == "--rescan" and flag[1]:
-      global num_rescan_tests_per_node
-      num_rescan_tests_per_node = int(flag[1])
-    if flag[0] == "--resume":
-      do_resume = True
-      if flag[1]:
-        resume_run=int(flag[1])
-      else:
-        resume_run=-1
-
-  # Make logs go to disk so resumes are less painful
-  #TorUtil.logfile = open(log_file_name, "a")
-
-  # initiate the connection to the metatroller
-  global metacon
-  metacon = Metaconnection()
-  global datahandler
-  datahandler = DataHandler()
-
-  # initiate the passive dns rebind attack monitor
-  if do_dns_rebind:
-    metacon.check_dns_rebind()
-
-  # check for sketchy exit policies
-  if do_consistency:
-    metacon.check_all_exits_port_consistency()
-
-  # maybe only the consistency test was required
-  if not (do_ssl or do_html or do_http):
-    plog('INFO', 'Done.')
-    return
-
-  # Load the cookie jar
-  global search_cookies
-  search_cookies = cookielib.LWPCookieJar()
-  if os.path.isfile(search_cookie_file):
-    search_cookies.load(search_cookie_file, ignore_discard=True)
-  search_cookies.__filename = search_cookie_file
-
-  tests = {}
-
-  if do_resume:
-    plog("NOTICE", "Resuming previous SoaT run")
-    if do_ssl:
-      tests["SSL"] = datahandler.loadTest("SSLTest", resume_run)
-
-    if do_http:
-      tests["HTTP"] = datahandler.loadTest("HTTPTest", resume_run)
-
-    if do_html:
-      tests["HTML"] = datahandler.loadTest("HTMLTest", resume_run)
-  
-  else:
-    if do_ssl:
-      tests["SSL"] = SSLTest(ssl_wordlist_file)
-
-    if do_http:
-      tests["HTTP"] = HTTPTest(filetype_wordlist_file)
-
-    if do_html:
-      tests["HTML"] = HTMLTest(html_wordlist_file)
-
-
-  # maybe no tests could be initialized
-  if not tests:
-    plog('INFO', 'Done.')
-    sys.exit(0)
-
-  # Make sure refetch_ip is valid rather than exploding mid-test
-  global refetch_ip
-  BindingSocket.bind_to = refetch_ip
-  try:
-    socket.socket()
-  except socket.error:
-    plog("WARN", "Cannot bind to "+refetch_ip+". Ignoring refetch_ip setting.")
-    refetch_ip = None
-  BindingSocket.bind_to = None
- 
-  if do_rescan:
-    plog("NOTICE", "Loading rescan.")
-    for test in tests.itervalues():
-      test.load_rescan(TEST_FAILURE)
-
-  if not do_resume:
-    for test in tests.itervalues():
-      test.rewind()
- 
-  if scan_exit:
-    plog("NOTICE", "Scanning only "+scan_exit)
-    metacon.set_new_exit(scan_exit)
-    metacon.get_new_circuit()
-
-    while 1:
-      for test in tests.values():
-        result = test.run_test()
-        plog("INFO", test.proto+" test via "+scan_exit+" has result "+str(result))
-
-  # start testing
-  while 1:
-    avail_tests = tests.values()
-    if metacon.node_manager.has_new_nodes():
-      plog("INFO", "Got signal for node update.")
-      for test in avail_tests:
-        test.update_nodes()
-      plog("INFO", "Node update complete.")
-
-    # Get as much milage out of each exit as we safely can:
-    # Run a random subset of our tests in random order
-    n_tests = random.choice(xrange(1,len(avail_tests)+1))
-    
-    to_run = random.sample(avail_tests, n_tests)
-
-    common_nodes = None
-    # Do set intersection and reuse nodes for shared tests
-    for test in to_run:
-      if test.finished(): continue
-      if not common_nodes: common_nodes = copy.copy(test.nodes)
-      else: common_nodes &= test.nodes
-      metacon.node_manager._sanity_check(map(lambda id: test.node_map[id], 
-                                             test.nodes))
-
-    if common_nodes:
-      current_exit_idhex = random.choice(list(common_nodes))
-      plog("DEBUG", "Chose to run "+str(n_tests)+" tests via "+current_exit_idhex+" (tests share "+str(len(common_nodes))+" exit nodes)")
-
-      metacon.set_new_exit(current_exit_idhex)
-      metacon.get_new_circuit()
-      for test in to_run:
-        result = test.run_test()
-        if result != TEST_INCONCLUSIVE:
-          test.mark_chosen(current_exit_idhex, result)
-        datahandler.saveTest(test)
-        plog("INFO", test.proto+" test via "+current_exit_idhex+" has result "+str(result))
-        plog("INFO", test.proto+" attempts: "+str(test.tests_run)+".  Completed: "+str(test.total_nodes - test.scan_nodes)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
-    else:
-      plog("NOTICE", "No nodes in common between "+", ".join(map(lambda t: t.proto, to_run)))
-      for test in to_run:
-        if test.finished(): continue
-        current_exit = test.get_node()
-        metacon.set_new_exit(current_exit.idhex)
-        metacon.get_new_circuit()
-        result = test.run_test()
-        if result != TEST_INCONCLUSIVE: 
-          test.mark_chosen(current_exit_idhex, result)
-        datahandler.saveTest(test)
-        plog("INFO", test.proto+" test via "+current_exit.idhex+" has result "+str(result))
-        plog("INFO", test.proto+" attempts: "+str(test.tests_run)+".  Completed: "+str(test.total_nodes - test.scan_nodes)+"/"+str(test.total_nodes)+" ("+str(test.percent_complete())+"%)")
-     
-    # Check each test for rewind 
-    for test in tests.itervalues():
-      if test.finished():
-        plog("NOTICE", test.proto+" test has finished all nodes.")
-        datahandler.saveTest(test)
-        test.remove_false_positives()
-        if not do_rescan and rescan_at_finish:
-          test.toggle_rescan()
-          test.rewind()
-        elif restart_at_finish:
-          test.rewind()
-    all_finished = True
-    for test in tests.itervalues():
-      if not test.finished():
-        all_finished = False
-    if all_finished:
-      plog("NOTICE", "All tests have finished. Exiting\n")
-      sys.exit(0)
-
-# initiate the program
-#
-if __name__ == '__main__':
-  try:
-    main(sys.argv)
-  except KeyboardInterrupt:
-    plog('INFO', "Ctrl + C was pressed. Exiting ... ")
-    traceback.print_exc()
-  except Exception, e:
-    plog('ERROR', "An unexpected error occured.")
-    traceback.print_exc()

Deleted: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/soatstats.py	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,116 +0,0 @@
-#!/usr/bin/python
-#
-# 2008 Aleksei Gorny, mentored by Mike Perry
-
-import dircache
-import operator
-import os
-import pickle
-import sys
-import time
-
-import sets
-from sets import Set
-
-import libsoat
-from libsoat import *
-
-sys.path.append("../")
-from TorCtl.TorUtil import *
-
-class ResultCount:
-  def __init__(self, type):
-    self.type = type
-    self.good = 0
-    self.bad = 0
-    self.inconclusive = 0
-
-class ResultNode:
-  def __init__(self, idhex):
-    self.total = ResultCount("All")
-    self.counts = {}
-    self.idhex = idhex 
-
-def main(argv):
-  dh = DataHandler()
-  data = dh.getAll()
-
-  reason_counts = {}
-  nodeResults = {}
-  tests = Set([])
-
-  total = len(data)
-
-  for result in data:
-    if result.exit_node in nodeResults:
-      rn = nodeResults[result.exit_node]
-    else:
-      rn = ResultNode(result.exit_node)
-      nodeResults[result.exit_node] = rn
-
-    tests.add(result.__class__.__name__) 
-    if result.__class__.__name__ not in rn.counts:
-      rn.counts[result.__class__.__name__] = ResultCount(result.__class__.__name__)
-
-    if result.status == TEST_SUCCESS:
-      rn.total.good += 1
-      rn.counts[result.__class__.__name__].good += 1
-    elif result.status == TEST_INCONCLUSIVE:
-      rn.total.inconclusive += 1
-      rn.counts[result.__class__.__name__].inconclusive += 1
-    elif result.status == TEST_FAILURE:
-      rn.total.bad += 1
-      rn.counts[result.__class__.__name__].bad += 1
-      if result.reason not in reason_counts:
-        reason_counts[result.reason] = 1
-      else:
-        reason_counts[result.reason] += 1
-    
-  # Sort by total counts, print out nodes with highest counts first
-  failed_nodes = nodeResults.values()
-  failed_nodes.sort(lambda x, y: cmp(y.total.bad, x.total.bad))
-
-  inconclusive_nodes = nodeResults.values()
-  inconclusive_nodes.sort(lambda x, y: cmp(y.total.inconclusive, x.total.inconclusive))
-
-  # Sort by individual test counts, print out nodes with highest counts first
-
-  failed_nodes_specific = {}
-  inconclusive_nodes_specific = {}
-  for test in tests:
-    tested = [node for node in nodeResults.values() if node.counts.get(test)]
-    failed_nodes_specific[test] = list(sorted(tested, lambda x, y: cmp(y.counts[test].bad, x.counts[test].bad)))
-    inconclusive_nodes_specific[test] = list(sorted(tested, lambda x, y: cmp(y.counts[test].inconclusive, x.counts[test].inconclusive)))
-
-  print "\nFailures"
-  for node in failed_nodes:
-    if node.total.bad != 0:
-      print `node.idhex` + "\t" + `node.total.bad`
-
-  #print "\nInconclusive test results"
-  #for node in inconclusive_nodes:
-  #  if node.total.inconclusive != 0:
-  #    print `node.idhex` + "\t" + `node.total.inconclusive`
-
-  for test in tests:
-    print "\n" + test[:(-6)] + " failures"
-    for node in failed_nodes_specific[test]:
-      if node.counts[test].bad != 0:
-        print `node.idhex` + "\t" + `node.counts[test].bad`
-
-  #for test in tests:
-  #  print "\n" + test[:(-6)] + " inconclusive results"
-  #  for node in inconclusive_nodes_specific[test]:
-  #    if node.counts[test].inconclusive != 0:
-  #      print `node.idhex` + "\t" + `node.counts[test].inconclusive`
-
-  print ""
-
-  reasons = sorted(reason_counts.iterkeys(), lambda x, y:
-cmp(reason_counts[x], reason_counts[y]))
-
-  for r in reasons:
-    print r+": "+str(reason_counts[r])
-
-if __name__ == "__main__":
-  main(sys.argv)

Deleted: torflow/trunk/NetworkScanners/wordlist.txt
===================================================================
--- torflow/trunk/NetworkScanners/wordlist.txt	2009-08-13 18:29:55 UTC (rev 20278)
+++ torflow/trunk/NetworkScanners/wordlist.txt	2009-08-13 19:34:12 UTC (rev 20279)
@@ -1,30 +0,0 @@
-document
-important
-download
-setup
-install
-plugin
-file
-program
-run
-microsoft
-windows
-xp
-installer
-slides
-presentation
-paper
-browser
-winscp
-vidalia+bundle
-putty
-torpark
-firefox+setup
-mozilla
-privoxy
-privacy
-extension
-firefox+extension
-example
-sample
-censorship



More information about the tor-commits mailing list