commit f752aa8a0e3245e53c84b38949515778cfbcd4cd Author: Damian Johnson atagar@torproject.org Date: Thu Oct 5 11:49:45 2017 -0700
Script to provide irc activity --- scripts/irc_activity.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+)
diff --git a/scripts/irc_activity.py b/scripts/irc_activity.py new file mode 100644 index 0000000..4b405ba --- /dev/null +++ b/scripts/irc_activity.py @@ -0,0 +1,104 @@ +import calendar +import collections +import datetime +import os +import re +import sys + +try: + from dateutil import relativedelta +except ImportError: + print("dateutil unavailable, please run 'sudo pip install python-dateutil'") + sys.exit(1) + +IRC_CHANNELS = ( + '#tor', + '#tor-project', + '#tor-dev', + '#tor-internal', +) + +AUTHOR = re.compile('^[0-9]{2}:[0-9]{2} <.(\S+)>') +DATE_LINE = re.compile('^--- Day changed [\S]{3} [\S]{3} [0-9]{2} [0-9]{4}$') +START_DATE = datetime.date.today() - relativedelta.relativedelta(months = 6) +LOG_DIR = sys.argv[1] if len(sys.argv) >= 2 else None + +if not LOG_DIR: + print("Please provide the path of the irc log directory to read.") + sys.exit(1) +elif not os.path.exists(LOG_DIR): + print("%s doesn't exist" % LOG_DIR) + sys.exit(1) +elif not os.path.isdir(LOG_DIR): + print("%s isn't a directory" % LOG_DIR) + sys.exit(1) + +for channel in IRC_CHANNELS: + log_path = os.path.join(LOG_DIR, channel) + '.log' + + if not os.path.exists(log_path): + print("%s doesn't exist" % log_path) + sys.exit(1) + + +def latest_date_line(log_path): + # gets the closest date header to what we're looking for + + datelines, closest_date = set(), START_DATE + + with open(log_path) as log_file: + for line in log_file: + if DATE_LINE.match(line): + datelines.add(line.strip()) + + while True: + month = closest_date.strftime("%B")[:3] + day_of_week = calendar.day_name[closest_date.weekday()][:3] + dateline = closest_date.strftime("--- Day changed %%s %%s %d %Y") % (day_of_week, month) + + if dateline in datelines: + return dateline + elif closest_date > datetime.date.today(): + return None + + closest_date += relativedelta.relativedelta(days = 1) + + +all_authorship = [] +messages_for_channel = {} # {author => {channel => count}} + +for channel in IRC_CHANNELS: + log_path = os.path.join(LOG_DIR, channel) + '.log' + date_line = latest_date_line(log_path) + all_channel_authors = [] + + if not date_line: + print('unable to find a date line to start reading from for %s' % log_path) + sys.exit(1) + + with open(log_path) as log_file: + reached_date = False + + for line in log_file: + if not reached_date: + if line.strip() == date_line: + reached_date = True + + continue # prior to what we should take into consideration + + m = AUTHOR.match(line) + + if m: + author = m.group(1) + all_authorship.append(author) + all_channel_authors.append(author) + + for author, count in collections.Counter(all_channel_authors).items(): + messages_for_channel.setdefault(author, {})[channel] = count + +for author, count in sorted(collections.Counter(all_authorship).items(), key = lambda entry: entry[1], reverse = True): + channel_counts = ['%s %s' % (c, a) for (a, c) in sorted(messages_for_channel.get(author, {}).items(), key = lambda e: e[1], reverse = True)] + + print('%s %s' % (count, author)) + print(' %s' % ', '.join(channel_counts)) +