[or-cvs] [torflow/master 60/92] Hold extra urls from get_search_urls in reserve. New pickle revision

mikeperry at torproject.org mikeperry at torproject.org
Sat Aug 21 05:14:00 UTC 2010


Author: John M. Schanck <john at anomos.info>
Date: Sat, 7 Aug 2010 16:13:45 -0400
Subject: Hold extra urls from get_search_urls in reserve. New pickle revision
Commit: 924541afc9448a2b8d46d8939b22179d5fcd6c21

---
 NetworkScanners/ExitAuthority/soat.py |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/NetworkScanners/ExitAuthority/soat.py b/NetworkScanners/ExitAuthority/soat.py
index 59055f7..7e583a5 100755
--- a/NetworkScanners/ExitAuthority/soat.py
+++ b/NetworkScanners/ExitAuthority/soat.py
@@ -488,8 +488,9 @@ class Test:
     self.scan_nodes = 0
     self.nodes_to_mark = 0
     self.tests_per_node = num_tests_per_node
+    self.url_reserve = {}
     self._reset()
-    self._pickle_revision = 7 # Will increment as fields are added
+    self._pickle_revision = 8 # Will increment as fields are added
 
   def run_test(self):
     raise NotImplementedError()
@@ -526,6 +527,9 @@ class Test:
       self.timeout_fails = {}
       self.dns_fails = {}
       self._pickle_revision = 7
+    if self._pickle_revision < 8:
+      self.url_reserve = {}
+      self._pickle_revision = 8
 
   def _is_useable_url(self, url, valid_schemes=None, filetypes=None):
     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
@@ -1977,7 +1981,12 @@ class SearchBasedTest:
       type_urls = self.get_search_urls_for_filetype(filetype)
       # make sure we don't get more urls than needed
       if len(type_urls) > self.results_per_type:
-        type_urls = set(random.sample(type_urls, self.results_per_type))
+        chosen_urls = set(random.sample(type_urls, self.results_per_type))
+        if filetype in self.url_reserve:
+          self.url_reserve[filetype].extend(list(type_urls - chosen_urls))
+        else:
+          self.url_reserve[filetype] = list(type_urls - chosen_urls)
+        type_urls = chosen_urls
       urllist.update(type_urls)
 
     return list(urllist)
-- 
1.7.1




More information about the tor-commits mailing list