commit f752aa8a0e3245e53c84b38949515778cfbcd4cd
Author: Damian Johnson <atagar(a)torproject.org>
Date: Thu Oct 5 11:49:45 2017 -0700
Script to provide irc activity
---
scripts/irc_activity.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 104 insertions(+)
diff --git a/scripts/irc_activity.py b/scripts/irc_activity.py
new file mode 100644
index 0000000..4b405ba
--- /dev/null
+++ b/scripts/irc_activity.py
@@ -0,0 +1,104 @@
+import calendar
+import collections
+import datetime
+import os
+import re
+import sys
+
+try:
+ from dateutil import relativedelta
+except ImportError:
+ print("dateutil unavailable, please run 'sudo pip install python-dateutil'")
+ sys.exit(1)
+
+IRC_CHANNELS = (
+ '#tor',
+ '#tor-project',
+ '#tor-dev',
+ '#tor-internal',
+)
+
+AUTHOR = re.compile('^[0-9]{2}:[0-9]{2} <.(\S+)>')
+DATE_LINE = re.compile('^--- Day changed [\S]{3} [\S]{3} [0-9]{2} [0-9]{4}$')
+START_DATE = datetime.date.today() - relativedelta.relativedelta(months = 6)
+LOG_DIR = sys.argv[1] if len(sys.argv) >= 2 else None
+
+if not LOG_DIR:
+ print("Please provide the path of the irc log directory to read.")
+ sys.exit(1)
+elif not os.path.exists(LOG_DIR):
+ print("%s doesn't exist" % LOG_DIR)
+ sys.exit(1)
+elif not os.path.isdir(LOG_DIR):
+ print("%s isn't a directory" % LOG_DIR)
+ sys.exit(1)
+
+for channel in IRC_CHANNELS:
+ log_path = os.path.join(LOG_DIR, channel) + '.log'
+
+ if not os.path.exists(log_path):
+ print("%s doesn't exist" % log_path)
+ sys.exit(1)
+
+
+def latest_date_line(log_path):
+ # gets the closest date header to what we're looking for
+
+ datelines, closest_date = set(), START_DATE
+
+ with open(log_path) as log_file:
+ for line in log_file:
+ if DATE_LINE.match(line):
+ datelines.add(line.strip())
+
+ while True:
+ month = closest_date.strftime("%B")[:3]
+ day_of_week = calendar.day_name[closest_date.weekday()][:3]
+ dateline = closest_date.strftime("--- Day changed %%s %%s %d %Y") % (day_of_week, month)
+
+ if dateline in datelines:
+ return dateline
+ elif closest_date > datetime.date.today():
+ return None
+
+ closest_date += relativedelta.relativedelta(days = 1)
+
+
+all_authorship = []
+messages_for_channel = {} # {author => {channel => count}}
+
+for channel in IRC_CHANNELS:
+ log_path = os.path.join(LOG_DIR, channel) + '.log'
+ date_line = latest_date_line(log_path)
+ all_channel_authors = []
+
+ if not date_line:
+ print('unable to find a date line to start reading from for %s' % log_path)
+ sys.exit(1)
+
+ with open(log_path) as log_file:
+ reached_date = False
+
+ for line in log_file:
+ if not reached_date:
+ if line.strip() == date_line:
+ reached_date = True
+
+ continue # prior to what we should take into consideration
+
+ m = AUTHOR.match(line)
+
+ if m:
+ author = m.group(1)
+ all_authorship.append(author)
+ all_channel_authors.append(author)
+
+ for author, count in collections.Counter(all_channel_authors).items():
+ messages_for_channel.setdefault(author, {})[channel] = count
+
+for author, count in sorted(collections.Counter(all_authorship).items(), key = lambda entry: entry[1], reverse = True):
+ channel_counts = ['%s %s' % (c, a) for (a, c) in sorted(messages_for_channel.get(author, {}).items(), key = lambda e: e[1], reverse = True)]
+
+ print('%s %s' % (count, author))
+ print(' %s' % ', '.join(channel_counts))
+