[tor-commits] [metrics-tasks/master] Accept generic search term and implement multi-threaded approach (#9889)

karsten at torproject.org karsten at torproject.org
Mon Mar 31 13:25:43 UTC 2014


commit b4ee6de5e33f844d562a11a1e5c7685bc549ea96
Author: Sreenatha Bhatlapenumarthi <sreenatha.dev at gmail.com>
Date:   Thu Mar 20 02:12:37 2014 +0530

    Accept generic search term and implement multi-threaded approach (#9889)
---
 task-9889/tshirt.py |  178 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 124 insertions(+), 54 deletions(-)

diff --git a/task-9889/tshirt.py b/task-9889/tshirt.py
index b04148b..1262323 100755
--- a/task-9889/tshirt.py
+++ b/task-9889/tshirt.py
@@ -2,46 +2,66 @@
 
 import urllib, urllib2
 import json
+import threading
+import _strptime
 from datetime import datetime
 
-def print_debug_info(exit_port_check, uptime_percent, avg_bandwidth):
+TWO_MONTHS = 2 * 30 * 86400
+
+# Global variables
+bandwidth_data = []
+uptime_data = []
+exit_policies = []
+thread_lock = threading.Lock()
+
+
+def print_debug_info(fingerprint, exit_port_check, uptime_percent, avg_bandwidth):
   """ Provides debugging information about relay operator's eligibility 
       for acquiring a t-shirt """
 
+  print("=================================================================")
   print("\nRelay details")
-  print("=============")
+  print("-------------")
+  print("Fingerprint : " + fingerprint)
   print("Exit to port 80 allowed : " + str(exit_port_check))
-  print("Uptime percentage in past 2 months : " + str(uptime_percent))
-  print("Average bandwidth in past 2 months : " + str(avg_bandwidth) + "KBytes/s")
+  if uptime_percent == -1:
+    print("Uptime percentage in past 2 months : Insufficient data")
+  else:
+    print("Uptime percentage in past 2 months : " + str(uptime_percent))
+  if avg_bandwidth == -1:
+    print("Average bandwidth in past 2 months : Insufficient data")
+  else:
+    print("Average bandwidth in past 2 months : " + str(avg_bandwidth) + "KBytes/s")
 
   print("\nElligibility")
-  print("============")
-  if avg_bandwidth >= 500:
-    print("Elligible for T-shirt")
-    print("Reason : Average bandwidth greater than 500KBytes/s")
+  print("------------")
+
+  if uptime_percent < 95:
+    print("Not elligible for T-shirt")
+    print("Reason : Insufficient relay up time")
   else:
     if exit_port_check is False:
-      print("Not elligible for T-shirt")
-      print("Reason : Average bandwidth less than 500KBytes/s and port 80 blocked")
+      if avg_bandwidth >= 500:
+        print("Elligible for T-shirt")
+        print("Reason : Average bandwidth greater than 500KBytes/s")
+      else:
+        print("Not elligible for T-shirt")
+        print("Reason : Average bandwidth less than 500KBytes/s and port 80 blocked")
     else:
-      if uptime_percent < 95:
+      if avg_bandwidth < 100:
         print("Not elligible for T-shirt")
-        print("Reason : Insufficient relay up time")
+        print("Reason : Average bandwidth less than 100KBytes/s")
       else:
-        if avg_bandwidth < 100:
-          print("Not elligible for T-shirt")
-          print("Reason : Average bandwidth less than 100KBytes/s")
-        else:
           print("Elligible for T-shirt")
-	  print("Reason : Average bandwidth greater than 100KBytes/s, relay uptime greater than 95% and port 80 unblocked")
+	  print("Reason : Average bandwidth greater than 100KBytes/s,"
+	         "relay uptime greater than 95% and port 80 unblocked")
   print("")
 
 
 def calculate_sum(relay_history):
   """ Calculates the sum of values in 2-month time frame """
 
-  two_months = 2 * 30 * 86400
-  two_months_values = two_months / relay_history['interval']
+  two_months_values = TWO_MONTHS / relay_history['interval']
   _sum = 0
   for i in relay_history['values'][-two_months_values:]:
     if i is not 'null' and i is not None:
@@ -49,6 +69,18 @@ def calculate_sum(relay_history):
   return _sum * relay_history['interval']
 
 
+def check_in_ports(ports):
+  """ Checks for port 80 is present in the ports list """
+
+  for entry in ports:
+    if entry == '80':
+      return True
+    if '-' in entry:
+      [x,y] = entry.split('-')
+      if 80 in range(int(x),int(y)):
+        return True
+  return False
+
 def fetch_data(doc_type, params):
   """ Fetches onionoo data and returns response formatted as a dictionary """
 
@@ -70,45 +102,35 @@ def fetch_data(doc_type, params):
   return response_dict
 
 
-def check_exit_port(fingerprint):
+def check_exit_port(response):
   """ Checks if relay allows network traffic to exit through port 80 """
 
-  params = {
-      'lookup' : fingerprint,
-      'fields' : 'exit_policy_summary'
-  }
-  response = fetch_data('details', params)
-  exit_policy = response['relays'][0]['exit_policy_summary']
+  exit_policy = response['exit_policy_summary']
   if 'accept' in exit_policy:
-    return '80' in exit_policy['accept']
+    return check_in_ports(exit_policy['accept'])
   elif 'reject' in exit_policy:
-    return '80' not in exit_policy['reject']
-  else:
-    return False
+    return check_in_ports(exit_policy['reject'])
+  return False
 
 
-def get_uptime_percent(fingerprint):
-  """ Calculates the relay's uptime from onionoo's uptime documents """
+def get_uptime_percent(response):
+  """ Calculates the relay's uptime from onionoo's uptime document """
 
-  params = {
-      'lookup' : fingerprint
-  }
-  response = fetch_data('uptime', params)
-  uptime = calculate_sum(response['relays'][0]['uptime']['3_months'])
+  if '3_months' not in response['uptime'].keys():
+    return -1
+  uptime = calculate_sum(response['uptime']['3_months'])
   uptime_percent = round(uptime/(2*30*864), 2)
   return uptime_percent
 
 
-def get_avg_bandwidth(fingerprint):
+def get_avg_bandwidth(response):
   """ Calculates average bandwidth of traffic through the relay """
 
-  params = {
-      'lookup' : fingerprint
-  }
-  response = fetch_data('bandwidth', params)
-  
+  if '3_months' not in response['write_history'].keys():
+    return -1
+
   # Calculate the sum of values in response
-  bandwidth_data = response['relays'][0]['write_history']['3_months']
+  bandwidth_data = response['write_history']['3_months']
   traffic_sum = calculate_sum(bandwidth_data)
   
   # Find number of values between last and today
@@ -118,24 +140,72 @@ def get_avg_bandwidth(fingerprint):
   last_today_values = time_interval/bandwidth_data['interval']
   
   # Calculate the result
-  two_months = 2 * 30 * 86400
-  two_months_values = two_months/bandwidth_data['interval']
+  two_months_values = TWO_MONTHS/bandwidth_data['interval']
   total_values = two_months_values + last_today_values
   result = (traffic_sum * bandwidth_data['factor'])/total_values
 
   return round(result/1000.0,2)
 
 
-def check_tshirt(fingerprint):
-  """ Checks if the relay satisfies qualification criteria for a t-shirt """
+def check_tshirt(search_query):
+  """ Fetches required onionoo documents and invokes threads """
+
+  global exit_policies
+  global uptime_data
+  global bandwidth_data
+  global thread_lock
 
-  exit_port_check = check_exit_port(fingerprint)
-  uptime_percent = get_uptime_percent(fingerprint)
-  avg_bandwidth = get_avg_bandwidth(fingerprint)
-  print_debug_info(exit_port_check, uptime_percent, avg_bandwidth)
+  # Fetch matching relays from summary document
+  params = {
+     'type' : 'relay',
+     'search' : search_query
+  }
+  matched_relays = fetch_data('summary', params)
+  print "Fetched summary document"
+  fingerprints = [i['f'] for i in matched_relays['relays']]
+  if fingerprints == []:
+    print 'No results found'
+    exit()
 
+  # Fetch the required documents from onionoo
+  params.pop('type')
+  bandwidth_data = fetch_data('bandwidth', params)['relays']
+  print "Fetched bandwidth document"
+  uptime_data = fetch_data('uptime', params)['relays']
+  print "Fetched uptime document"
+  params['fields'] = 'exit_policy_summary,fingerprint'
+  exit_policies = fetch_data('details', params)['relays']
+  print "Fetched details document"
+
+  # Create and start the threads
+  threads = []
+  for i in range(len(fingerprints)):
+    threads.append(relay_thread(i))
+    threads[-1].start()
+  # Wait for the threads to finish
+  for thread in threads:
+    thread.join()
+
+
+class relay_thread(threading.Thread):
+  """ A subclass of the Thread class that handles relay-specific data"""
+  def __init__(self, thread_id):
+    threading.Thread.__init__(self)
+    self.thread_id = thread_id
+  def run(self):
+    global exit_polices
+    global uptime_data
+    global bandwidth_data
+    fingerprint = exit_policies[self.thread_id]['fingerprint']
+    exit_port_check = check_exit_port(exit_policies[self.thread_id])
+    uptime_percent = get_uptime_percent(uptime_data[self.thread_id])
+    avg_bandwidth = get_avg_bandwidth(bandwidth_data[self.thread_id])
+    thread_lock.acquire()
+    print_debug_info(fingerprint, exit_port_check, uptime_percent, avg_bandwidth)
+    thread_lock.release()
+    
 
 if __name__ == "__main__":
-  fingerprint = raw_input('Enter relay fingerprint: ')
-  check_tshirt(fingerprint)
+  search_query = raw_input('Enter relay search-query : ')
+  check_tshirt(search_query)
 





More information about the tor-commits mailing list