[tor-commits] [depictor/master] Update the parseOldconsensuses script

tom at torproject.org tom at torproject.org
Wed Dec 22 02:26:09 UTC 2021


commit 97bea9d85284ef38b589cd75d599d39ef5cd64db
Author: Tom Ritter <tom at ritter.vg>
Date:   Tue Dec 21 21:25:46 2021 -0500

    Update the parseOldconsensuses script
    
    Make print statements python3-compatible
    DescribtorReader was removed from stem, replace it
    Add bastet to the known dirauths
    convert tabs to spaces and completely fuck up the diff
---
 parseOldConsensuses.py | 528 +++++++++++++++++++++++++------------------------
 1 file changed, 265 insertions(+), 263 deletions(-)

diff --git a/parseOldConsensuses.py b/parseOldConsensuses.py
index defa2f5..e72d7a9 100755
--- a/parseOldConsensuses.py
+++ b/parseOldConsensuses.py
@@ -18,293 +18,295 @@ import stem.util.conf
 import stem.util.enum
 
 from stem import Flag
-from stem.descriptor.reader import DescriptorReader
-from stem.util.lru_cache import lru_cache
 
 def get_dirauths_in_tables():
-	return "faravahar, gabelmoo, dizum, moria1, urras, maatuska, longclaw, tor26, dannenberg, turtles".split(", ")
+    return "faravahar, gabelmoo, dizum, moria1, urras, maatuska, longclaw, tor26, dannenberg, turtles, bastet".split(", ")
 
 
 def get_dirauth_from_filename(filename):
-	key = filename.split('-')
-	if len(key) < 9:
-		raise Exception("Strange filename: " + filename)
-
-	key = key[-2]
-	if key == "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97":
-		return "faravahar"
-	elif key == "ED03BB616EB2F60BEC80151114BB25CEF515B226":
-		return "gabelmoo"
-	elif key == "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58":
-		return "dizum"
-	elif key == "D586D18309DED4CD6D57C18FDB97EFA96D330566":
-		return "moria1"
-	elif key == "80550987E1D626E3EBA5E5E75A458DE0626D088C":
-		return "urras"
-	elif key == "49015F787433103580E3B66A1707A00E60F2D15B":
-		return "maatuska"
-	elif key == "23D15D965BC35114467363C165C4F724B64B4F66":
-		return "longclaw"
-	elif key == "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4":
-		return "tor26"
-	elif key == "0232AF901C31A04EE9848595AF9BB7620D4C5B2E" or key == "585769C78764D58426B8B52B6651A5A71137189A":
-		return "dannenberg"
-	elif key == "27B6B5996C426270A5C95488AA5BCEB6BCC86956":
-		return "turtles"
-	else:
-		raise Exception("Unexpcected dirauth key: " + key + " " + filename)
+    key = filename.split('-')
+    if len(key) < 9:
+        raise Exception("Strange filename: " + filename)
+
+    key = key[-2]
+    if key == "EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97":
+        return "faravahar"
+    elif key == "ED03BB616EB2F60BEC80151114BB25CEF515B226":
+        return "gabelmoo"
+    elif key == "E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58":
+        return "dizum"
+    elif key == "D586D18309DED4CD6D57C18FDB97EFA96D330566":
+        return "moria1"
+    elif key == "80550987E1D626E3EBA5E5E75A458DE0626D088C":
+        return "urras"
+    elif key == "49015F787433103580E3B66A1707A00E60F2D15B":
+        return "maatuska"
+    elif key == "23D15D965BC35114467363C165C4F724B64B4F66":
+        return "longclaw"
+    elif key == "14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4":
+        return "tor26"
+    elif key == "0232AF901C31A04EE9848595AF9BB7620D4C5B2E" or key == "585769C78764D58426B8B52B6651A5A71137189A":
+        return "dannenberg"
+    elif key == "27B6B5996C426270A5C95488AA5BCEB6BCC86956":
+        return "turtles"
+    elif key == "27102BC123E7AF1D4741AE047E160C91ADC76B21":
+        return "bastet"
+    else:
+        raise Exception("Unexpcected dirauth key: " + key + " " + filename)
 
 def unix_time(dt):
     return (dt - datetime.datetime.utcfromtimestamp(0)).total_seconds() * 1000.0
 
 def ut_to_datetime(ut):
-	return datetime.datetime.utcfromtimestamp(ut / 1000)
+    return datetime.datetime.utcfromtimestamp(ut / 1000)
 
 def ut_to_datetime_format(ut):
-	return ut_to_datetime(ut).strftime("%Y-%m-%d-%H-%M-%S")
+    return ut_to_datetime(ut).strftime("%Y-%m-%d-%H-%M-%S")
 
 def get_time_from_filename(filename):
-	voteTime = filename.split('-')
-	if len(voteTime) < 7:
-		raise Exception("Strange filename: " + filename)
+    voteTime = filename.split('-')
+    if len(voteTime) < 7:
+        raise Exception("Strange filename: " + filename)
 
-	v = [int(x) for x in filename.split('-')[0:6]]
-	voteTime = datetime.datetime(v[0], v[1], v[2], v[3], v[4], v[5])
-	voteTime = unix_time(voteTime)
-	return voteTime
+    v = [int(x) for x in filename.split('-')[0:6]]
+    voteTime = datetime.datetime(v[0], v[1], v[2], v[3], v[4], v[5])
+    voteTime = unix_time(voteTime)
+    return voteTime
 
 def dirauth_relay_votes(directory, dirAuths, dbc):
-	dirauth_columns = ""
-	dirauth_columns_questions = ""
-	for d in dirAuths:
-		dirauth_columns += d + "_known integer, " + d + "_running integer, " + d + "_bwauth integer, "
-		dirauth_columns_questions += ",?,?,?"
-
-	dbc.execute("CREATE TABLE IF NOT EXISTS vote_data(date integer, " + dirauth_columns + "PRIMARY KEY(date ASC))")
-	dbc.commit()
-
-	votes = {}
-	for root, dirs, files in os.walk(directory):
-		for f in files:
-			filepath = os.path.join(root, f)
-			print filepath
-
-			if '"' in f:
-				raise Exception("Potentially malicious filename")
-			elif "votes-" in f and ".tar" in f:
-				continue
-			elif "consensuses-" in f and ".tar" in f:
-				continue
-			elif "-vote-" not in f:
-				continue
-
-			voteTime = get_time_from_filename(f)
-			if voteTime not in votes:
-				votes[voteTime] = {}
-
-			dirauth = get_dirauth_from_filename(f)
-			if dirauth not in dirAuths:
-				raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
-			elif dirauth not in votes[voteTime]:
-				votes[voteTime][dirauth] = {}
-			else:
-				print "Found two votes for dirauth " + dirauth + " and time " + filepath
-
-			votes[voteTime][dirauth]['present'] = 1
-			votes[voteTime][dirauth]['known'] = int(subprocess.check_output('egrep "^r " "' + filepath + '" | wc -l', shell=True))
-			votes[voteTime][dirauth]['running'] = int(subprocess.check_output('egrep "^s " "' + filepath + '" | grep " Running" | wc -l', shell=True))
-			votes[voteTime][dirauth]['bwlines'] = int(subprocess.check_output('grep Measured= "' + filepath + '" | wc -l', shell=True))
-
-	for t in votes:
-		print ut_to_datetime(t)
-		print "\t", len(votes[t])
-		for d in votes[t]:
-			print "\t", d, votes[t][d]['bwlines'], votes[t][d]['running']
-	
-		insertValues = [t]
-		for d in dirAuths:
-			if d in votes[t]:
-				insertValues.append(votes[t][d]['known'])
-				insertValues.append(votes[t][d]['running'])
-				insertValues.append(votes[t][d]['bwlines'])
-			else:
-				insertValues.append(None)
-				insertValues.append(None)
-				insertValues.append(None)
-
-		dbc.execute("INSERT OR REPLACE INTO vote_data VALUES (?" + dirauth_columns_questions + ")", insertValues)
-		dbc.commit()
+    dirauth_columns = ""
+    dirauth_columns_questions = ""
+    for d in dirAuths:
+        dirauth_columns += d + "_known integer, " + d + "_running integer, " + d + "_bwauth integer, "
+        dirauth_columns_questions += ",?,?,?"
+
+    dbc.execute("CREATE TABLE IF NOT EXISTS vote_data(date integer, " + dirauth_columns + "PRIMARY KEY(date ASC))")
+    dbc.commit()
+
+    votes = {}
+    for root, dirs, files in os.walk(directory):
+        for f in files:
+            filepath = os.path.join(root, f)
+            print(filepath)
+
+            if '"' in f:
+                raise Exception("Potentially malicious filename")
+            elif "votes-" in f and ".tar" in f:
+                continue
+            elif "consensuses-" in f and ".tar" in f:
+                continue
+            elif "-vote-" not in f:
+                continue
+
+            voteTime = get_time_from_filename(f)
+            if voteTime not in votes:
+                votes[voteTime] = {}
+
+            dirauth = get_dirauth_from_filename(f)
+            if dirauth not in dirAuths:
+                raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
+            elif dirauth not in votes[voteTime]:
+                votes[voteTime][dirauth] = {}
+            else:
+                print("Found two votes for dirauth " + dirauth + " and time " + filepath)
+
+            votes[voteTime][dirauth]['present'] = 1
+            votes[voteTime][dirauth]['known'] = int(subprocess.check_output('egrep "^r " "' + filepath + '" | wc -l', shell=True))
+            votes[voteTime][dirauth]['running'] = int(subprocess.check_output('egrep "^s " "' + filepath + '" | grep " Running" | wc -l', shell=True))
+            votes[voteTime][dirauth]['bwlines'] = int(subprocess.check_output('grep Measured= "' + filepath + '" | wc -l', shell=True))
+
+    for t in votes:
+        print(ut_to_datetime(t))
+        print("\t", len(votes[t]))
+        for d in votes[t]:
+            print("\t", d, votes[t][d]['bwlines'], votes[t][d]['running'])
+    
+        insertValues = [t]
+        for d in dirAuths:
+            if d in votes[t]:
+                insertValues.append(votes[t][d]['known'])
+                insertValues.append(votes[t][d]['running'])
+                insertValues.append(votes[t][d]['bwlines'])
+            else:
+                insertValues.append(None)
+                insertValues.append(None)
+                insertValues.append(None)
+
+        dbc.execute("INSERT OR REPLACE INTO vote_data VALUES (?" + dirauth_columns_questions + ")", insertValues)
+        dbc.commit()
 
 def bwauth_measurements(directory, dirAuths, dbc):
-	#Find all the consensuses and votesrm
-	votes = {}
-	consensuses = {}
-	for root, dirs, files in os.walk(directory):
-		for f in files:
-			filepath = os.path.join(root, f)
-
-			if '"' in f:
-				raise Exception("Potentially malicious filename")
-			elif "votes-" in f and ".tar" in f:
-				continue
-			elif "consensuses-" in f and ".tar" in f:
-				continue
-
-			if "-consensus" in f:
-				consensusTime = get_time_from_filename(f)
-				if consensusTime not in consensuses:
-					consensuses[consensusTime] = filepath
-				else:
-					print "Found two consensuses with the same time:", ut_to_datetime(consensusTime)
-
-				#print "Consensus:", filepath
-			elif "-vote-" in f:
-				voteTime = get_time_from_filename(f)
-
-				# Test to see if we already processed this one
-				cur = dbc.cursor()
-				cur.execute("SELECT * FROM bwauth_data WHERE date = ?", (voteTime,))
-				if cur.fetchone():
-					print "Skipping", f, "because we already processed it"
-					continue
-				elif voteTime not in votes:
-					votes[voteTime] = {}
-
-				dirauth = get_dirauth_from_filename(f)
-
-				if dirauth not in dirAuths:
-					raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
-				elif dirauth not in votes[voteTime]:
-					votes[voteTime][dirauth] = filepath
-				else:
-					print "Found two votes for dirauth " + dirauth + ":", filepath, "and", votes[voteTime][dirauth]
-
-				#print "Vote:", dirauth, filepath
-
-	#Make sure we have a consensus for each vote
-	to_del = []
-	for v in votes:
-		if v not in consensuses:
-			print "Have votes for time", ut_to_datetime(v), "but no consensus!"
-			to_del.append(v)
-			#sys.exit(1)
-	for i in to_del:
-		del votes[i]
-
-	#Make the table
-	bwauth_columns = ""
-	bwauth_columns_questions = ""
-	for d in dirAuths:
-		bwauth_columns += d + "_above integer, " + d + "_shared integer, " + d + "_exclusive integer, " + d + "_below integer, " + d + "_unmeasured integer, "
-		bwauth_columns_questions += ",?,?,?,?,?"
-
-	dbc.execute("CREATE TABLE IF NOT EXISTS bwauth_data(date integer, " + bwauth_columns + "PRIMARY KEY(date ASC))")
-	dbc.commit()
-
-	reviewed = 0
-	for v in votes:
-		reviewed += 1
-		print "Reviewing", consensuses[v], "(" + str(reviewed) + "/" + str(len(votes)) + ")"
-
-		#Get the consensus data
-		consensusRouters = {}
-		with DescriptorReader(consensuses[v]) as reader:
-			reader.register_skip_listener(my_listener)
-			for relay in reader:
-				consensusRouters[relay.fingerprint] = "Unmeasured" if relay.is_unmeasured else relay.bandwidth
-		
-		#The vote data
-		bwauthVotes = {}
-		for d in votes[v]:
-			if d not in bwauthVotes:
-				bwauthVotes[d] = {}
-
-			measured_something = False
-			with DescriptorReader(votes[v][d]) as reader:
-				reader.register_skip_listener(my_listener)
-				for relay in reader:
-					if relay.measured:
-						bwauthVotes[d][relay.fingerprint] = relay.measured
-						measured_something = True
-			if not measured_something:
-				del bwauthVotes[d]
-
-		#Now match them up and store the data
-		thisConsensusResults = {}
-		for r in consensusRouters:
-			for d in bwauthVotes:
-				had_any_value = False
-				if d not in thisConsensusResults:
-					thisConsensusResults[d] = {'unmeasured' : 0, 'above' : 0, 'below' : 0, 'exclusive' : 0 , 'shared' : 0}
-
-				if consensusRouters[r] == "Unmeasured":
-					continue
-				elif r not in bwauthVotes[d]:
-					had_any_value = True
-					thisConsensusResults[d]['unmeasured'] += 1
-				elif consensusRouters[r] < bwauthVotes[d][r]:
-					had_any_value = True
-					thisConsensusResults[d]['above'] += 1
-				elif consensusRouters[r] > bwauthVotes[d][r]:
-					had_any_value = True
-					thisConsensusResults[d]['below'] += 1
-				elif consensusRouters[r] == bwauthVotes[d][r] and \
-					 1 == len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r]]):
-					had_any_value = True
-				 	thisConsensusResults[d]['exclusive'] += 1
-			 	elif consensusRouters[r] == bwauthVotes[d][r] and \
-					 1 != len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r] ]):
-				 	had_any_value = True
-				 	thisConsensusResults[d]['shared'] += 1
-			 	else:
-			 		print "What case am I in???"
-			 		sys.exit(1)
-
-			 	if not had_any_value:
-		 			del thisConsensusResults[d]
-
- 		insertValues = [v]
- 		for d in dirAuths: 
- 			if d in thisConsensusResults:
-				insertValues.append(thisConsensusResults[d]['above'])
-				insertValues.append(thisConsensusResults[d]['shared'])
-				insertValues.append(thisConsensusResults[d]['exclusive'])
-				insertValues.append(thisConsensusResults[d]['below'])
-				insertValues.append(thisConsensusResults[d]['unmeasured'])
-			else:
-				insertValues.append(None)
-				insertValues.append(None)
-				insertValues.append(None)
-				insertValues.append(None)
-				insertValues.append(None)
- 		
-		dbc.execute("INSERT OR REPLACE INTO bwauth_data VALUES (?" + bwauth_columns_questions + ")", insertValues)
-		dbc.commit()
-		
+    #Find all the consensuses and votesrm
+    votes = {}
+    consensuses = {}
+    for root, dirs, files in os.walk(directory):
+        for f in files:
+            filepath = os.path.join(root, f)
+
+            if '"' in f:
+                raise Exception("Potentially malicious filename")
+            elif "votes-" in f and ".tar" in f:
+                continue
+            elif "consensuses-" in f and ".tar" in f:
+                continue
+
+            if "-consensus" in f:
+                consensusTime = get_time_from_filename(f)
+                if consensusTime not in consensuses:
+                    consensuses[consensusTime] = filepath
+                else:
+                    print("Found two consensuses with the same time:", ut_to_datetime(consensusTime))
+
+                #print "Consensus:", filepath
+            elif "-vote-" in f:
+                voteTime = get_time_from_filename(f)
+
+                # Test to see if we already processed this one
+                cur = dbc.cursor()
+                cur.execute("SELECT * FROM bwauth_data WHERE date = ? AND faravahar_above IS NOT NULL", (voteTime,))
+                if cur.fetchone():
+                    #print("Skipping", f, "because we already processed it")
+                    continue
+                elif voteTime not in votes:
+                    votes[voteTime] = {}
+
+                dirauth = get_dirauth_from_filename(f)
+
+                if dirauth not in dirAuths:
+                    raise Exception("Found a dirauth I don't know about (probably spelling): " + dirauth)
+                elif dirauth not in votes[voteTime]:
+                    votes[voteTime][dirauth] = filepath
+                else:
+                    print("Found two votes for dirauth " + dirauth + ":", filepath, "and", votes[voteTime][dirauth])
+
+                #print "Vote:", dirauth, filepath
+
+    print("Found %s consensuses" % len(consensuses))
+    print("Found %s votes" % len(votes))
+
+    #Make sure we have a consensus for each vote
+    to_del = []
+    for v in votes:
+        if v not in consensuses:
+            print("Have votes for time", ut_to_datetime(v), "but no consensus!")
+            to_del.append(v)
+            #sys.exit(1)
+    for i in to_del:
+        del votes[i]
+
+    #Make the table
+    bwauth_columns = ""
+    bwauth_columns_questions = ""
+    for d in dirAuths:
+        bwauth_columns += d + "_above integer, " + d + "_shared integer, " + d + "_exclusive integer, " + d + "_below integer, " + d + "_unmeasured integer, "
+        bwauth_columns_questions += ",?,?,?,?,?"
+
+    dbc.execute("CREATE TABLE IF NOT EXISTS bwauth_data(date integer, " + bwauth_columns + "PRIMARY KEY(date ASC))")
+    dbc.commit()
+
+    reviewed = 0
+    for v in votes:
+        reviewed += 1
+        print("Reviewing", consensuses[v], "(" + str(reviewed) + "/" + str(len(votes)) + ")")
+
+        #Get the consensus data
+        consensusRouters = {}
+        reader = stem.descriptor.parse_file(consensuses[v])
+        for relay in reader:
+            consensusRouters[relay.fingerprint] = "Unmeasured" if relay.is_unmeasured else relay.bandwidth
+        
+        #The vote data
+        bwauthVotes = {}
+        for d in votes[v]:
+            if d not in bwauthVotes:
+                bwauthVotes[d] = {}
+
+            measured_something = False
+            reader = stem.descriptor.parse_file(votes[v][d])
+            for relay in reader:
+                if relay.measured:
+                    bwauthVotes[d][relay.fingerprint] = relay.measured
+                    measured_something = True
+
+            if not measured_something:
+                del bwauthVotes[d]
+
+        #Now match them up and store the data
+        thisConsensusResults = {}
+        for r in consensusRouters:
+            for d in bwauthVotes:
+                had_any_value = False
+                if d not in thisConsensusResults:
+                    thisConsensusResults[d] = {'unmeasured' : 0, 'above' : 0, 'below' : 0, 'exclusive' : 0 , 'shared' : 0}
+
+                if consensusRouters[r] == "Unmeasured":
+                    continue
+                elif r not in bwauthVotes[d]:
+                    had_any_value = True
+                    thisConsensusResults[d]['unmeasured'] += 1
+                elif consensusRouters[r] < bwauthVotes[d][r]:
+                    had_any_value = True
+                    thisConsensusResults[d]['above'] += 1
+                elif consensusRouters[r] > bwauthVotes[d][r]:
+                    had_any_value = True
+                    thisConsensusResults[d]['below'] += 1
+                elif consensusRouters[r] == bwauthVotes[d][r] and \
+                    1 == len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r]]):
+                    had_any_value = True
+                    thisConsensusResults[d]['exclusive'] += 1
+                elif consensusRouters[r] == bwauthVotes[d][r] and \
+                    1 != len([1 for d_i in bwauthVotes if d_i in bwauthVotes and r in bwauthVotes[d_i] and bwauthVotes[d_i][r] == consensusRouters[r] ]):
+                    had_any_value = True
+                    thisConsensusResults[d]['shared'] += 1
+                else:
+                    print("What case am I in???")
+                    sys.exit(1)
+
+                if not had_any_value:
+                    del thisConsensusResults[d]
+
+        insertValues = [v]
+        for d in dirAuths: 
+            if d in thisConsensusResults:
+                insertValues.append(thisConsensusResults[d]['above'])
+                insertValues.append(thisConsensusResults[d]['shared'])
+                insertValues.append(thisConsensusResults[d]['exclusive'])
+                insertValues.append(thisConsensusResults[d]['below'])
+                insertValues.append(thisConsensusResults[d]['unmeasured'])
+            else:
+                insertValues.append(None)
+                insertValues.append(None)
+                insertValues.append(None)
+                insertValues.append(None)
+                insertValues.append(None)
+         
+        dbc.execute("INSERT OR REPLACE INTO bwauth_data VALUES (?" + bwauth_columns_questions + ")", insertValues)
+        dbc.commit()
+        
 def my_listener(path, exception):
-	print "Skipped!"
-	print path
-	print exception
+    print("Skipped!")
+    print(path)
+    print(exception)
 
 
 def main(itype, directory):
-	dirAuths = get_dirauths_in_tables()
-	dbc = sqlite3.connect(os.path.join('data', 'historical.db'))
+    dirAuths = get_dirauths_in_tables()
+    dbc = sqlite3.connect(os.path.join('data', 'historical.db'))
 
-	if itype == "dirauth_relay_votes":
-		dirauth_relay_votes(directory, dirAuths, dbc)
-	elif itype == "bwauth_measurements":
-		bwauth_measurements(directory, dirAuths, dbc)
-	else:
-		print "Unknown ingestion type"
+    if itype == "dirauth_relay_votes":
+        dirauth_relay_votes(directory, dirAuths, dbc)
+    elif itype == "bwauth_measurements":
+        bwauth_measurements(directory, dirAuths, dbc)
+    else:
+        print("Unknown ingestion type")
 
 if __name__ == '__main__':
-	try:
-		if len(sys.argv) != 3:
-			print "Usage: ", sys.argv[0], "ingestion-type vote-directory"
-		else:
-			main(sys.argv[1], sys.argv[2])
-	except:
-		msg = "%s failed with:\n\n%s" % (sys.argv[0], traceback.format_exc())
-		print "Error: %s" % msg
+    try:
+        if len(sys.argv) != 3:
+            print("Usage: ", sys.argv[0], "ingestion-type vote-directory")
+        else:
+            main(sys.argv[1], sys.argv[2])
+    except:
+        msg = "%s failed with:\n\n%s" % (sys.argv[0], traceback.format_exc())
+        print("Error: %s" % msg)
 



More information about the tor-commits mailing list