[or-cvs] r15990: SOAT: fixed result saving problems. Added a setexit stub. (torflow/branches/gsoc2008)

aleksei at seul.org aleksei at seul.org
Wed Jul 16 17:16:49 UTC 2008


Author: aleksei
Date: 2008-07-16 13:16:48 -0400 (Wed, 16 Jul 2008)
New Revision: 15990

Modified:
   torflow/branches/gsoc2008/soat.py
   torflow/branches/gsoc2008/soatstats.py
Log:
SOAT: fixed result saving problems. Added a setexit stub.

Modified: torflow/branches/gsoc2008/soat.py
===================================================================
--- torflow/branches/gsoc2008/soat.py	2008-07-16 16:45:24 UTC (rev 15989)
+++ torflow/branches/gsoc2008/soat.py	2008-07-16 17:16:48 UTC (rev 15990)
@@ -16,7 +16,6 @@
 
 import httplib
 import os
-import pickle
 import random
 import re
 from sets import Set
@@ -197,10 +196,10 @@
         tell metatroller to set the given node as the exit in the next circuit 
         Note: currently not used
         '''
-        plog('NOTICE', 'Trying to set ' + exit + ' as the exit for the next circuit')
-        self.__meta.writeline("SETEXIT " + exit)
+        plog('NOTICE', 'Trying to set ' + `exit` + ' as the exit for the next circuit')
+        self.__meta.writeline("SETEXIT " + `exit`)
         reply = self.__meta.readline()
-    
+
         if reply[:3] != '250':
             plog('ERROR', 'Setting ' + exit + ' as the new exit failed')
             plog('ERROR', reply)
@@ -210,7 +209,7 @@
         report an evil exit to the control port using AuthDirBadExit 
         Note: currently not used    
         '''
-        # self__contol.set_option('AuthDirBadExit', exit) ?
+        # self.__contol.set_option('AuthDirBadExit', exit) ?
         pass
 
     def get_nodes_for_port(self, port):
@@ -266,14 +265,13 @@
             plog('INFO', 'We had no exit node to test, skipping to the next test.')
             return 0
 
-        address_file = address[7:].replace('/','_') # an address representation acceptable for a filename (leave out the http:// and replace slashes)
+        # an address representation acceptable for a filename (leave out the http:// and replace slashes)
+        address_file = address[7:].replace('/','_')
 
         # if we have no content, we had a connection error
         if pcontent == 0:
             result = HttpTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
-            result_file = open(http_i_dir + `exit_node` + '_' + address_file + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
         elements = SoupStrainer(lambda name, attrs : name in tags_to_check or 
@@ -303,13 +301,13 @@
             plog('ERROR', 'Failed to get the correct tag structure for ' + address)
             return TEST_INCONCLUSIVE
 
+        self.http_tested.add(exit_node)
+
         # compare the content
         # if content matches, everything is ok
         if psoup == soup:
             result = HttpTestResult(exit_node, address, 0, TEST_SUCCESS)
-            result_file = open(http_s_dir + `exit_node` + '_' + address_file + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_SUCCESS
 
         # if content doesnt match, update the direct content
@@ -317,9 +315,7 @@
         content_new = content_new.decode('ascii', 'ignore')
         if content_new == 0:
             result = HttpTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
-            result_file = open(http_i_dir + `exit_node` + '_' + address_file + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
         soup_new = BeautifulSoup(content_new, parseOnlyThese=elements)
@@ -327,13 +323,14 @@
         # if they match, means the node has been changing the content
         if soup == soup_new:
             result = HttpTestResult(exit_node, address, 0, TEST_FAILURE)
-            result_file = open(http_f_dir + `exit_node` + '_' + address_file + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
+            tag_file = open(http_tags_dir + `exit_node` + '_' + address_file + '.tags', 'w')
+            tag_file.write(psoup.__str__())
+            tag_file.close()
             return TEST_FAILURE
 
         # if content has changed outside of tor, update the saved file
-        tag_file = open(http_tags_dir + '_' + address_file + '.tags', 'w')
+        tag_file = open(http_tags_dir + address_file + '.tags', 'w')
         tag_file.write(soup_new.__str__())
         tag_file.close()
 
@@ -341,16 +338,16 @@
         # if it matches, everything is ok
         if psoup == soup_new:
             result = HttpTestResult(exit_node, address, 0, TEST_SUCCESS)
-            result_file = open(http_s_dir + `exit_node` + '_' + address_file + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_SUCCESS
 
         # if it doesn't match, means the node has been changing the content
         result = HttpTestResult(exit_node, address, 0, TEST_FAILURE)
-        result_file = open(http_f_dir + `exit_node` + '_' + address_file + '.result','w')
-        pickle.dump(result, result_file)
-        result_file.close()
+        self.__datahandler.saveResult(result)
+        tag_file = open(http_tags_dir + `exit_node` + '_' + address_file + '.tags', 'w')
+        tag_file.write(psoup.__str__())
+        tag_file.close()
+        
         return TEST_FAILURE
 
     def check_openssh(self, address):
@@ -367,6 +364,9 @@
         ''' check whether an https connection to a given address is molested '''
         plog('INFO', 'Conducting an ssl test with destination ' + address)
 
+        # an address representation acceptable for a filename (leave out the https:// and replace slashes)
+        address_file = address[8:].replace('/','_')
+
         # get the cert via tor
 
         defaultsocket = socket.socket
@@ -386,17 +386,14 @@
         # if we got no cert, there was an ssl error
         if cert == 0:
             result = OpenSSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
-            result_file = open(ssl_i_dir + `exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
-            self.ssl_tested.add(exit_node)
+            self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
         # load the original cert and compare
         # if we don't have the original cert yet, get it
         original_cert = 0
         try:
-            cert_file = open(ssl_certs_dir + address + '.pem', 'r')
+            cert_file = open(ssl_certs_dir + address_file + '.pem', 'r')
             cert_string = cert_file.read()
             original_cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_string)
         except IOError:
@@ -405,11 +402,11 @@
             if original_cert.has_expired():
                 plog('ERROR', 'The ssl cert for ' + address + 'seems to have expired. Skipping to the next test...')
                 return TEST_INCONCLUSIVE
-            cert_file = open(ssl_certs_dir + address + '.pem', 'w')
+            cert_file = open(ssl_certs_dir + address_file + '.pem', 'w')
             cert_file.write(crypto.dump_certificate(crypto.FILETYPE_PEM, original_cert))
             cert_file.close()
         except:
-            plog('ERROR', 'Error occured while acquiring the correct cert (' + ssl_certs_dir +  address + '.pem)')
+            plog('ERROR', 'Error occured while acquiring the correct cert (' + ssl_certs_dir +  address_file + '.pem)')
             return TEST_INCONCLUSIVE
         if original_cert == 0:
             plog('ERROR', 'Error getting the correct cert for ' + address)
@@ -424,11 +421,9 @@
 
         # if certs match, everything is ok
         if cert_pem == original_cert_pem:
-            cert_file = ssl_certs_dir + address + '.pem'
+            cert_file = ssl_certs_dir + address_file + '.pem'
             result = OpenSSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
-            result_file = open(ssl_s_dir + `exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_SUCCESS
         
         # if certs dont match, open up a direct connection and update the cert
@@ -437,9 +432,7 @@
         if original_cert_new == 0:
             plog('ERROR', 'Error getting the correct cert for ' + address)
             result = OpenSSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
-            result_file = open(ssl_i_dir + `exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
         original_cert_new_pem = crypto.dump_certificate(crypto.FILETYPE_PEM, original_cert_new)
@@ -449,33 +442,29 @@
         if original_cert_pem == original_cert_new_pem:
             plog('ERROR', 'Exit node ' + `exit_node` + ' seems to be meddling with certificates. (' + address + ')')
 
-            cert_file_name = ssl_certs_dir + address + '_' + `exit_node` + '.pem'
+            cert_file_name = ssl_certs_dir + address_file + '_' + `exit_node` + '.pem'
             cert_file = open(cert_file, 'w')
             cert_file.write(cert_pem)
             cert_file.close()
 
             result = OpenSSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
-            result_file = open(ssl_f_dir + `exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_FAILURE
 
         # if comparsion fails, replace the old cert with the new one
         # XXX: Hrmm, probably should store as a seperate IP file in this case
         # so we don't keep alternating on sites that have round robin
         # DNS and different certs for each IP.. 
-        cert_file = open(ssl_certs_dir + address + '.pem', 'w')
+        cert_file = open(ssl_certs_dir + address_file + '.pem', 'w')
         cert_file.write(original_cert_new_pem)
         cert_file.close()
             
         # compare the new cert and the node cert
         # if certs match, everything is ok
         if cert_pem == original_cert_new_pem:
-            cert_file = ssl_certs_dir + address + '.pem'
+            cert_file = ssl_certs_dir + address_file + '.pem'
             result = OpenSSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
-            result_file = open(ssl_s_dir + `exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close()
+            self.__datahandler.saveResult(result)
             return TEST_SUCCESS
 
         # if certs dont match, means the exit node has been messing with the cert
@@ -487,9 +476,7 @@
         cert_file.close()
 
         result = OpenSSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
-        result_file = open(ssl_f_dir + `exit_node` + '_' + address + '.result','w')
-        pickle.dump(result, result_file)
-        result_file.close()
+        self.__datahandler.saveResult(result)
 
         return TEST_FAILURE
 
@@ -868,6 +855,11 @@
 
     def ssl_request(self, address):
         ''' initiate an ssl connection and return the server certificate '''
+        
+        # drop the https:// prefix if present (not needed for a socket connection)
+        if address[:8] == 'https://':
+            address = address[8:]
+    
         # specify the context
         ctx = SSL.Context(SSL.SSLv23_METHOD)
         ctx.set_verify_depth(1)
@@ -879,7 +871,7 @@
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         c = SSL.Connection(ctx, s)
         c.set_connect_state()
-
+        
         try:
             c.connect((address, 443))
             c.send(crypto.dump_certificate_request(crypto.FILETYPE_PEM,request))
@@ -887,7 +879,7 @@
             plog('ERROR','An error occured while opening an ssl connection to ' + address)
             plog('ERROR', e)
             return 0
-            
+        
         # return the cert
         return c.get_peer_certificate()
 
@@ -989,14 +981,18 @@
     wordlist = load_wordlist(wordlist_file)
     
     # get the total number of nodes for ports
-    ssl_nodes = len(scanner.get_nodes_for_port(443))
-    http_nodes = len(scanner.get_nodes_for_port(80))
-    #ssh_nodes = len(scanner.get_nodes_for_port(22)) 
+    ssl_nodes = scanner.get_nodes_for_port(443)
+    # http_nodes = scanner.get_nodes_for_port(80)
+    #ssh_nodes = scanner.get_nodes_for_port(22)
 
+    ssl_nodes_n = len(ssl_nodes)
+    # http_nodes_n = len(http_nodes)
+    # ssh_nodes_n = len(ssh_nodes)
+
     # lists of addresses (generated later with get_urls)
     ssl_urls = []
-    http_urls = []
-    ssh_urls = []
+    # http_urls = []
+    # ssh_urls = []
 
     # test terminating conditions for somewhat ok network coverage
     ssl_done = False
@@ -1006,37 +1002,52 @@
     # get some semi-random urls, try to test the exit node for each protocol needed, get a new node
     while 1: 
         
-        http_urls = get_urls(wordlist, protocol='http')
-        ssl_urls = ['mail.google.com', 'addons.mozilla.org', 'www.fastmail.fm'] # the search for https stuff is yet too slow
+        #http_urls = get_urls(wordlist, protocol='http')
+        ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
         
         # https test  
         if not ssl_done:
+            # XXX Uncomment this to try using SETEXIT
+            # current_exit = random.choice([x for x in ssl_nodes if ('$' + `x.idhex`) not in scanner.ssl_tested])
+            # scanner.set_new_exit(current_exit.idhex)
+            
+            # Uncomment this to try NEWNYM after SETEXIT
+            # scanner.get_new_circuit()
+
             ssl_site = random.choice(ssl_urls)
             scanner.check_openssl(ssl_site)
             ssl_tested_n = len(scanner.ssl_tested)
-            plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes`
-                + ' (~' + `((ssl_tested_n * 100) / ssl_nodes)` + '%)')
-            if ssl_tested_n >= ssl_nodes:
+            plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes_n`
+                + ' (~' + `((ssl_tested_n * 100) / ssl_nodes_n)` + '%)')
+            if ssl_tested_n >= ssl_nodes_n:
                 ssl_done = True
         
         # http test
+        '''
         if not http_done:
+            # XXX Uncomment this to try using SETEXIT
+            # current_exit = random.choice([x for x in http_nodes if ('$' + `x.idhex`) not in scanner.http_tested])
+            # scanner.set_new_exit(current_exit.idhex)
+            # XXX Uncomment this to try NEWNYM after SETEXIT
+            # scanner.get_new_circuit()
+
             http_site = random.choice(http_urls)
             scanner.check_http(http_site)
             http_tested_n = len(scanner.http_tested)
-            plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes`
-                + ' (~' + `((http_tested_n * 100) / http_nodes)` + '%)')
-            if http_tested_n >= http_nodes:
+            plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes_n`
+                + ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
+            if http_tested_n >= http_nodes_n:
                 http_done = True
         '''
         # ssh test
+        '''
         if not ssh_done:
             ssh_site = random.choice(ssh_urls)
             scanner.check_openssh(ssh_site)
             ssh_tested_n = len(scanner.ssh_tested)
-            plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes`
-                + '(~' + `((ssh_tested_n * 100) / ssh_nodes)` + '%')')
-            if ssh_tested_n >= ssh_nodes:
+            plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes_n`
+                + '(~' + `((ssh_tested_n * 100) / ssh_nodes_n)` + '%')')
+            if ssh_tested_n >= ssh_nodes_n:
                 ssh_done = True
         '''
 
@@ -1045,6 +1056,7 @@
             plog('INFO','Wow! We have tested the whole tor network. Check soatstats.py for results')
             break
         else:
+            pass
             scanner.get_new_circuit()
             time.sleep(1)
 #

Modified: torflow/branches/gsoc2008/soatstats.py
===================================================================
--- torflow/branches/gsoc2008/soatstats.py	2008-07-16 16:45:24 UTC (rev 15989)
+++ torflow/branches/gsoc2008/soatstats.py	2008-07-16 17:16:48 UTC (rev 15990)
@@ -126,6 +126,54 @@
                     results.append(result)
 
         return results
+
+    # generic method for saving test results
+    def saveResult(self, result):
+        if result.__class__.__name__ == 'HttpTestResult':
+            self.__saveHttp(result)
+        elif result.__class__.__name__ == 'OpenSSHTestResult':
+            self.__saveSsh(result)
+        elif result.__class__.__name__ == 'OpenSSLTestResult':
+            self.__saveSsl(result)
+
+    # save results per protocol
+    def __saveHttp(self, result):
+        dir = None
+        if result.status == TEST_SUCCESS:
+            dir = http_s_dir
+        elif result.status == TEST_FAILURE:
+            dir = http_f_dir
+        elif result.status == TEST_INCONCLUSIVE:
+            dir = http_i_dir
+
+        # an address representation acceptable for a filename (leave out the http:// and replace slashes)
+        address = result.site[7:].replace('/','_') 
+
+        if dir:
+            result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
+            pickle.dump(result, result_file)
+            result_file.close() 
+
+
+    def __saveSsh(self, result):
+        pass
+
+    def __saveSsl(self, result):
+        dir = None
+        if result.status == TEST_SUCCESS:
+            dir = ssl_s_dir
+        elif result.status == TEST_FAILURE:
+            dir = ssl_f_dir
+        elif result.status == TEST_INCONCLUSIVE:
+            dir = ssl_i_dir
+
+        # an address representation acceptable for a filename (leave out the https:// and replace slashes)
+        address = result.site[8:].replace('/','_') 
+
+        if dir:
+            result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
+            pickle.dump(result, result_file)
+            result_file.close() 
     
 #
 # Displaying stats on the console



More information about the tor-commits mailing list