[or-cvs] [torflow/master] Make email handling a bit cleaner.

mikeperry at torproject.org mikeperry at torproject.org
Mon Sep 27 14:49:48 UTC 2010


Author: Mike Perry <mikeperry-git at fscked.org>
Date: Mon, 27 Sep 2010 07:39:36 -0700
Subject: Make email handling a bit cleaner.
Commit: 50d47d83148908b6619b7848dfdfb0efd0d262a7

Fix some bugs that caused it to mysteriously fail under bad option combos, and
use some result rate filters on the cron email output. Also, lower some
threshholds for false positive filters.
---
 NetworkScanners/ExitAuthority/README.ExitScanning |   10 +++++-----
 NetworkScanners/ExitAuthority/cron.sh             |   11 ++++++++++-
 NetworkScanners/ExitAuthority/snakeinspector.py   |   19 +++++++++++++------
 NetworkScanners/ExitAuthority/soat.py             |   10 ++++++++--
 NetworkScanners/ExitAuthority/soat_config.py      |   20 +++++++-------------
 5 files changed, 43 insertions(+), 27 deletions(-)
 mode change 100644 => 100755 NetworkScanners/ExitAuthority/cron.sh

diff --git a/NetworkScanners/ExitAuthority/README.ExitScanning b/NetworkScanners/ExitAuthority/README.ExitScanning
index 0a66f50..dfb5989 100644
--- a/NetworkScanners/ExitAuthority/README.ExitScanning
+++ b/NetworkScanners/ExitAuthority/README.ExitScanning
@@ -250,11 +250,11 @@ If you're wary of leaving your email password in plaintext in the
 soat_config, you can set mail_password = None, and you'll be
 prompted to provide it when snakeinspector is run.
 
-Also note you should either use the --after or --finishedafter flag
-to ensure you don't email results which you've already reported. Or,
-if you've automated the running snakeinspector, you should set the
-mail_interval variable in your soat_config.py to the length of time,
-in seconds, between your snakeinspector runs.
+In this current directory is a cron.sh script that calls snakeinspector to
+email results that completed in the last 24 hours, or since the last time
+you've run it. Add it to `crontab -e` like so:
+
+0 3 * * * ~/code/torflow.git/NetworkScanners/ExitAuthority/cron.sh
 
 Alright that covers the basics. Let's get those motherfuckin snakes off
 this motherfuckin Tor!
diff --git a/NetworkScanners/ExitAuthority/cron.sh b/NetworkScanners/ExitAuthority/cron.sh
old mode 100644
new mode 100755
index aa1c940..f768810
--- a/NetworkScanners/ExitAuthority/cron.sh
+++ b/NetworkScanners/ExitAuthority/cron.sh
@@ -1,2 +1,11 @@
+#!/bin/bash
 
-~/code/torflow-soat/NetworkScanners/ExitAuthority/snakeinspector.py --confirmed --email --noreason FailureTimeout
+SCANDIR=~/code/torflow.git/NetworkScanners/ExitAuthority/
+
+$SCANDIR/snakeinspector.py --confirmed --email --noreason FailureTimeout --croninterval 24 --siterate 3
+
+# Optionally, you can use these two lines to allow less regular cron
+# scheduling:
+
+#$SCANDIR/snakeinspector.py --confirmed --email --noreason FailureTimeout --siterate 3 --finishedafter "`cat $SCANDIR/lastmail.time`"
+#date +"%a %b %d %H:%M:%S %Y" > $SCANDIR/lastmail.time
diff --git a/NetworkScanners/ExitAuthority/snakeinspector.py b/NetworkScanners/ExitAuthority/snakeinspector.py
index 4269165..d085914 100755
--- a/NetworkScanners/ExitAuthority/snakeinspector.py
+++ b/NetworkScanners/ExitAuthority/snakeinspector.py
@@ -72,6 +72,7 @@ class SIConf(object):
     self.falsepositives=False
     self.send_email = False
     self.confirmed = False
+    self.cron_interval = 0
     if argv:
       self.getargs(argv)
 
@@ -81,7 +82,8 @@ class SIConf(object):
                ["dir=", "file=", "exit=", "reason=", "resultfilter=", "proto=",
                 "verbose", "statuscode=", "siterate=", "exitrate=", "sortby=",
                 "noreason=", "after=", "before=", "finishedafter=",
-                "finishedbefore=","falsepositives", "email", "confirmed"])
+                "finishedbefore=", "croninterval=", "falsepositives",
+                "email", "confirmed"])
     except getopt.GetoptError,err:
       print str(err)
       usage(argv)
@@ -106,6 +108,8 @@ class SIConf(object):
       elif o == '--finishedafter':
         self.finishedafter = time.mktime(time.strptime(a))
         self.finished = True
+      elif o == '--croninterval':
+        self.cron_interval = int(a)*3600
       elif o == '-t' or o == '--resultfilter':
         self.resultfilter = a
       elif o == '-p' or o == '--proto':
@@ -126,7 +130,7 @@ class SIConf(object):
         if a not in ["proto", "site", "exit", "reason"]:
           usage(argv)
         else:
-          sortby = a
+          self.sortby = a
       elif o == '-s' or o == '--statuscode':
         try:
           self.statuscode = int(a)
@@ -209,6 +213,12 @@ def main(argv):
       if conf.finishedbefore < r.finish_timestamp: continue
     if (conf.falsepositives) ^ r.false_positive: continue
     if conf.confirmed != r.confirmed: continue
+    if conf.confirmed:
+      if conf.cron_interval and r.finish_timestamp < now-conf.cron_interval-60:
+        continue
+    else:
+      if conf.cron_interval and r.timestamp < now-conf.cron_interval-60:
+        continue
     if r.site_result_rate[1] != 0 and \
         conf.siterate < (100.0*r.site_result_rate[0])/r.site_result_rate[1]:
       continue
@@ -219,10 +229,7 @@ def main(argv):
        (not conf.proto or r.proto == conf.proto) and \
        (not conf.resultfilter or r.__class__.__name__ == conf.resultfilter):
       if conf.send_email:
-        if mail_interval and r.timestamp > now - mail_interval - 60:
-          by_proto.setdefault(r.proto, []).append(r)
-        elif not mail_interval:
-          by_proto.setdefault(r.proto, []).append(r)
+        by_proto.setdefault(r.proto, []).append(r)
         continue
       try:
         print r
diff --git a/NetworkScanners/ExitAuthority/soat.py b/NetworkScanners/ExitAuthority/soat.py
index a33c8c3..100907d 100755
--- a/NetworkScanners/ExitAuthority/soat.py
+++ b/NetworkScanners/ExitAuthority/soat.py
@@ -642,9 +642,11 @@ class Test:
       self.rescan_nodes = set([])
       self.tests_per_node = num_tests_per_node
       self.update_nodes()
+      return 0
     else:
       plog("NOTICE", self.proto+" switching to recan mode.")
       self.load_rescan(TEST_FAILURE, self.run_start)
+      return 1
 
   def get_node(self):
     return random.choice(list(self.nodes))
@@ -3159,14 +3161,18 @@ def main(argv):
           test.remove_false_positives()
         else:
           plog("NOTICE", "Not removing false positives for fixed-exit scan")
-        test.timestamp_results(time.time())
         if not do_rescan and rescan_at_finish:
-          test.toggle_rescan()
+          if not test.toggle_rescan():
+            # Only timestamp as finished after the rescan
+            test.timestamp_results(time.time())
           test.rewind()
           all_finished = False
         elif restart_at_finish:
+          test.timestamp_results(time.time())
           test.rewind()
           all_finished = False
+        else:
+          test.timestamp_results(time.time())
     if all_finished:
       plog("NOTICE", "All tests have finished. Exiting\n")
       return
diff --git a/NetworkScanners/ExitAuthority/soat_config.py b/NetworkScanners/ExitAuthority/soat_config.py
index 3f4d713..32e12fd 100644
--- a/NetworkScanners/ExitAuthority/soat_config.py
+++ b/NetworkScanners/ExitAuthority/soat_config.py
@@ -45,13 +45,7 @@ refetch_ip = "4.4.4.4"
 
 # Email settings for email scans.
 from_email = "Tor Exit Scanner <noreply at torproject.org>"
-to_email = ["Tor Exit Scanner List <root at localhost>"]
-
-# If you're running snakeinspector.py with cron you'll want to set
-# mail_interval to the number of seconds between runs.
-# eg.
-# mail_interval = 24*60*60 # Daily email
-mail_interval = None
+to_email = ["Tor Exit Scanner List <flynn at torproject.org>"]
 
 mail_server = "127.0.0.1"
 # Email authentication
@@ -59,29 +53,29 @@ mail_server = "127.0.0.1"
 # mail_auth to True. In this case, one of mail_tls or
 # mail_starttls must also be set to True.
 mail_auth = False
-mail_user = "user at example.com"
+mail_user = "soat at fscked.org"
 mail_password = "password"
 mail_tls = False # Requires Python >= 2.6
 mail_starttls = False
 
 # What percentage of tested nodes must disagree with our local fetches before
 # we ignore the target site/url
-max_exit_fail_pct = 10
+max_exit_fail_pct = 5
 
 # What percentage of tested nodes must get a non 2xx response 
 # (including timeouts and DNS resolution failures) to a request
 # before we ignore the target site/url
 # XXX: current unused
-max_httpcode_fail_pct = 35
+max_httpcode_fail_pct = 10
 
 # What percentage of tested nodes must get a bad http response
 # or other connection issue (including timeouts and DNS resolution 
 # failures) to a request # before we ignore the target site/url
-max_connect_fail_pct = 35
+max_connect_fail_pct = 10
 
-# What percentage of tests can fail that diffet between all 3 fetches
+# What percentage of tests can fail that differ between all 3 fetches
 # fetches (Tor and two non-tor), AFTER applying HTML false positive filters
-max_dynamic_fail_pct = 10
+max_dynamic_fail_pct = 5
 
 # We fetch more target sites/urls if discarding them causes us to fall
 # below this many:
-- 
1.7.1



More information about the tor-commits mailing list