[tor-commits] [gettor/master] Updates to Twitter module, now it works. Still pending logging.

ilv at torproject.org ilv at torproject.org
Tue Nov 3 19:31:31 UTC 2015


commit 9f2a5194c126f431579099185d5e1a12cbcec878
Author: ilv <ilv at users.noreply.github.com>
Date:   Fri Oct 30 22:11:52 2015 -0300

    Updates to Twitter module, now it works. Still pending logging.
---
 gettor/twitter.py |  507 ++++++++++++-----------------------------------------
 process_tweets.py |   17 +-
 2 files changed, 110 insertions(+), 414 deletions(-)

diff --git a/gettor/twitter.py b/gettor/twitter.py
index 05320ef..a197ac1 100644
--- a/gettor/twitter.py
+++ b/gettor/twitter.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 #
-# This file is part of GetTor, a Tor Browser distribution system.
+# This file is part of GetTor.
 #
-# :authors: Israel Leiva <ilv at riseup.net>
+# :authors: Israel Leiva <ilv at torproject.org>
 #           Based on BridgeDB Twitter distributor (PoC) by wfn
 #           - https://github.com/wfn/twidibot
 #
@@ -11,22 +11,18 @@
 #
 # :license: This is Free Software. See LICENSE for license information.
 
-import sys
-import json
-import time
-import signal
+import os
+import re
 import tweepy
+import logging
 import gettext
-
-from tweepy.models import Status
+import ConfigParser
 
 import core
 import utils
 import blacklist
 
-"""Twitter module for processing requests. Forked from BridgeDB Twitter
-distributor by wfn (https://github.com/wfn/twidibot)"""
-
+"""Twitter channel for distributing links to download Tor Browser."""
 
 class ConfigError(Exception):
     pass
@@ -36,172 +32,24 @@ class InternalError(Exception):
     pass
 
 
-class TwitterBotStreamListener(tweepy.StreamListener):
-    """Listener for twitter's Streaming API."""
-
-    def __init__(self, bot, api=None):
+class GetTorStreamListener(tweepy.StreamListener):
+    """ Basic listener for Twitter's streaming API."""
+    def __init__(self, bot):
         self.bot = bot
-        self.processing_data = False
-
-        super(TwitterBotStreamListener, self).__init__(api)
-
-    def on_data(self, raw_data):
-        """Called when raw data is received from connection.
-
-        This is where all the data comes first. Normally we could use 
-        (inherit) the on_data() in tweepy.StreamListener, but it unnecessarily
-        and naively reports unknown event types as errors (to simple log); 
-        also, we might want to tweak it further later on.
-
-        But for now, this is basically taken from tweepy's on_data().
-
-        Return False to stop stream and close connection.
-
-        """
-
-        self.processing_data = True
-
-        data = json.loads(raw_data)
-
-        if 'in_reply_to_status_id' in data:
-            status = Status.parse(self.api, data)
-            if self.on_status(status) is False:
-                return False
-        elif 'delete' in data:
-            delete = data['delete']['status']
-            if self.on_delete(delete['id'], delete['user_id']) is False:
-                return False
-        elif 'event' in data:
-            status = Status.parse(self.api, data)
-            if self.on_event(status) is False:
-                return False
-        elif 'direct_message' in data:
-            status = Status.parse(self.api, data)
-            if self.on_direct_message(status) is False:
-                return False
-        elif 'limit' in data:
-            if self.on_limit(data['limit']['track']) is False:
-                return False
-        elif 'disconnect' in data:
-            if self.on_disconnect(data['disconnect']) is False:
-                return False
-        else:
-            # we really are ok to receive unknown stream/event types.
-            # log to debug?
-            log.debug('TwitterBotStreamListener::on_data(): got event/stream'
-                    ' data of unknown type. Raw data follows:\n%s', data)
-
-        self.processing_data = False
-
-    def on_status(self, status):
-        """Called when a new status arrives"""
-
-        #log.debug('Got status: %s', status)
-        return
-
-    def on_event(self, status):
-        """Called when a new event arrives"""
-
-        #log.debug('Got event: %s', status)
-
-        # XXX make sure tweepy's given 'status.event' unicode string can
-        # always be safely converted to ascii
-        
-        # now it seems one can reply to dm without following the account
-        # if str(status.event) == 'follow':  
-        #    self.bot.handleFollowEvent(status)
-
-        return
+        super(GetTorStreamListener, self).__init__(self.bot.api)
 
     def on_direct_message(self, status):
-        """Called when a new direct message arrives or is sent from us
-
-        TODO: make a pull request for tweepy or something, because they 
-        say it's only when a direct message is *received* (implying, 'by us')
-
-        """
-
-        # doing twitter user comparisons using id_str makes sense here - it's
-        # safe and id_str's are guaranteed to be unique (re: latter, just like
-        # id's.) maybe consider deciding how comparisons should be made for sure,
-        # and then extend tweepy.models.User to include __eq__?
+        """ Right now we only care about direct messages. """
         if status.direct_message['sender']['id_str'] != self.bot.bot_info.id_str:
-            self.bot.handleDirectMessage(status)
-        else:
-            # log.debug('Caught a direct message sent *from* us')
-            pass
-
-        return
-
-    def on_connect(self):
-        """Called once connected to streaming server.
-
-        This will be invoked once a successful response
-        is received from the server. Allows the listener
-        to perform some work prior to entering the read loop.
-
-        """
-        pass
-
-    def on_exception(self, exception):
-        """Called when an unhandled exception occurs."""
-        return
-
-    def on_delete(self, status_id, user_id):
-        """Called when a delete notice arrives for a status"""
-        return
-
-    def on_limit(self, track):
-        """Called when a limitation notice arrvies"""
-        return
-
-    def on_error(self, status_code):
-        """Called when a non-200 status code is returned"""
-        return False
-
-    def on_timeout(self):
-        """Called when stream connection times out"""
-        return
-
-    def on_disconnect(self, notice):
-        """Called when twitter sends a disconnect notice
+            self.bot.parse_request(status.direct_message)
 
-        Disconnect codes are listed here:
-        https://dev.twitter.com/docs/streaming-apis/messages
-            #Disconnect_messages_disconnect
-
-        """
-        return
-    
 
 class TwitterBot(object):
-    """Main interface between the stateful listener and Twitter APIs."""
-
-    # TODO: think about secure ways of storing twitter access config.
-    # For one, app itself should ideally not be able to have write access
-    # to it. For another, ideally it would request details from some other
-    # component, authenticate, and not be able to re-authenticate to twitter
-    
-    """
-    default_access_config = {
-        'api_key': config.API_KEY,
-        'api_secret': config.API_SECRET,
-        'access_token': config.ACCESS_TOKEN,
-        'token_secret': config.TOKEN_SECRET
-    }"""
-
-    def __init__(self, **kw):
-        """Constructor that accepts custom access config as named arguments
-
-        Easy to test things from interactive shell this way.
-        Probably won't be needed in production code.
+    """ Receive and reply requests via Twitter. """
+    def __init__(self, cfg=None):
+        """ Create new object by reading a configuration file.
 
-        """
-
-        """
-        self.access_config = dict()
-        for key, default in self.default_access_config.iteritems():
-            self.access_config[key] = kw.get(key, default)
+        :param: cfg (string) the path of the configuration file.
         """
         
         default_cfg = 'twitter.cfg'
@@ -222,11 +70,7 @@ class TwitterBot(object):
             self.access_token = config.get('access_config', 'access_token')
             self.token_secret = config.get('access_config', 'token_secret')
 
-            self.async_streaming = config.get('api', 'async_streaming')
-            self.char_limit = config.get('api', 'char_limit')
-
             self.mirrors = config.get('general', 'mirrors')
-            self.max_words = config.get('general', 'max_words')
             self.i18ndir = config.get('i18n', 'dir')
 
             logdir = config.get('log', 'dir')
@@ -235,8 +79,8 @@ class TwitterBot(object):
 
             blacklist_cfg = config.get('blacklist', 'cfg')
             self.bl = blacklist.Blacklist(blacklist_cfg)
-            self.bl_max_req = config.get('blacklist', 'max_requests')
-            self.bl_max_req = int(self.bl_max_req)
+            self.bl_max_request = config.get('blacklist', 'max_requests')
+            self.bl_max_request = int(self.bl_max_request)
             self.bl_wait_time = config.get('blacklist', 'wait_time')
             self.bl_wait_time = int(self.bl_wait_time)
 
@@ -251,13 +95,14 @@ class TwitterBot(object):
             raise InternalError("Core error: %s" % str(e))
 
         # logging
+        """
         log = logging.getLogger(__name__)
 
         logging_format = utils.get_logging_format()
         date_format = utils.get_date_format()
         formatter = logging.Formatter(logging_format, date_format)
 
-        log.info('Redirecting SMTP logging to %s' % logfile)
+        log.info('Redirecting Twitter logging to %s' % logfile)
         logfileh = logging.FileHandler(logfile, mode='a+')
         logfileh.setFormatter(formatter)
         logfileh.setLevel(logging.getLevelName(loglevel))
@@ -265,31 +110,29 @@ class TwitterBot(object):
 
         # stop logging on stdout from now on
         log.propagate = False
-        self.log = log
-
-        self.setSignalHandlers()
-        self.msg = Messages()
-
-
-    def _is_blacklisted(self, account):
+        #self.log = log"""
+    
+    def _is_blacklisted(self, username):
         """Check if a user is blacklisted.
 
-        :param: addr (string) the hashed address of the user.
+        :param: addr (string) the hashed username.
 
-        :return: true is the address is blacklisted, false otherwise.
+        :return: true is the username is blacklisted, false otherwise.
 
         """
-        anon_acc = utils.get_sha256(account)
+        hashed_username = utils.get_sha256(username)
 
         try:
             self.bl.is_blacklisted(
-                anon_acc, 'Twitter', self.bl_max_req, self.bl_wait_time
+                hashed_username,
+                'Twitter',
+                self.bl_max_request,
+                self.bl_wait_time
             )
             return False
         except blacklist.BlacklistError as e:
             return True
 
-
     def _get_msg(self, msgid, lc):
         """Get message identified by msgid in a specific locale.
 
@@ -299,7 +142,7 @@ class TwitterBot(object):
         Return: a string containing the given message.
 
         """
-        self.log.debug("Getting message '%s' for locale %s" % (msgid, lc))
+        #self.log.debug("Getting message '%s' for locale %s" % (msgid, lc))
         try:
             t = gettext.translation(lc, self.i18ndir, languages=[lc])
             _ = t.ugettext
@@ -308,11 +151,17 @@ class TwitterBot(object):
             return msgstr
         except IOError as e:
             raise ConfigError("%s" % str(e))
-    
-    
-    def _parse_request(self, message):
-        """ """
-        self.log.debug("Parsing text.")
+
+    def parse_text(self, msg):
+        """ Parse the text part of a message.
+
+        Split the message in words and look for patterns for locale,
+        operating system and mirrors requests.
+
+        :param: msg (string) the message received.
+
+        :return: request (list) 3-tuple with locale, os and type of request.
+        """
         
         # core knows what OS are supported
         supported_os = self.core.get_supported_os()
@@ -329,7 +178,8 @@ class TwitterBot(object):
         found_mirrors = False
 
         # analyze every word
-        for word in message.split(' '):
+        words = re.split('\s+', msg.strip())
+        for word in words:
             # look for lc and os
             if not found_lc:
                 for lc in supported_lc:
@@ -352,154 +202,49 @@ class TwitterBot(object):
             
         return req
 
+    def parse_request(self, dm):
+        """ Process the request received.
 
-    def setSignalHandlers(self):
-        """Set up relevant SIG* handlers for the bot.
+        Check if the user is not blacklisted and then check the body of
+        the message to find out what is asking.
 
-        Note: if we want to handle some specific signal and not exit after
-        catching it, we may need to store the original CPython handler
-        (signified by signal.SIG_DFL), and restore it after handling the
-        signal in question. For now, we'll only care about signals after
-        which the program does exit.
+        :param: dm (status.direct_message) the direct message object received
+                via Twitter API.
 
         """
 
-        # for now, we'll only handle SIGTERM. it might make sense to handle
-        # SIGINT as well, though.
-
-        signal.signal(signal.SIGTERM, self.handleSIGTERM)
-        self.log.debug("SIGTERM handler is set.")
-
-    def handleSIGTERM(self, sig_number, stack_frame):
-        """Callback function called upon SIGTERM"""
-
-        self.log.info("TwitterBot::handleSIGTERM(): caught SIGTERM signal.")
-
-        self.log.info("Stopping bot listener.")
-        self.listener.running = False
-        while self.listener.processing_data:
-            self.log.info(
-                "Waiting for TwitterBotStreamListener to finish processing"
-                " a data request/package"
-            )
-            time.sleep(0.5)
-
-        self.log.info("Closing down storage controller.")
-        self.storage_controller.closeAll()
-
-        self.log.info("Exiting program.")
-        sys.exit(0)
-
-    def authenticate(self, auth=None):
-        """Authenticate to Twitter API, get API handle, and remember it."""
-
-        if auth:
-            self.auth = auth
-        else:
-            self.auth = tweepy.OAuthHandler(
-                self.api_key,
-                self.api_secret
-            )
-
-            self.auth.set_access_token(
-                self.access_token,
-                self.token_secret
-            )
-
-        try:
-            self.api = tweepy.API(self.auth)
-        except Exception as e:
-            self.log.fatal('Exception while authenticating to Twitter and '
-                           'getting API handle: %s', e)
-            self.api = None
-        finally:
-            # del self.auth # ideally we'd be able to delete this, but 
-            # presently - no; anything?
-            pass
-
-        if self.api:
-            self.log.info('Authenticated and got the RESTful API handle')
-            self.bot_info = self.api.me()
-            #api.update_status('hello world!')
-
-    def subscribeToStreams(self):
-        """Subscribe to relevant streams in the Streaming API."""
-
-        self.listener = TwitterBotStreamListener(
-            bot=self,
-            api=self.api
-        )
-
-        self.stream = tweepy.Stream(self.auth, self.listener)
-
-        # user stream gives us direct messages and follow events
-        self.stream.userstream(async=self.async_streaming)
-        # stream.filter may be useful, but we don't need it for now
-
-        # the following will not be executed if we're not going async -
-        # userstream() blocks, its event handler loop takes over:
-        self.log.info('Subscribed to relevant streams via Streaming API')
-
-    def handleFollowEvent(self, event):
-        """
-        user_id = event.source['id']  # 'id' is unique big int
-
-        if user_id != self.bot_info.id:
-            user = self.api.get_user(id=user_id)
-            user.follow()
-
-        if config.RESPOND_AFTER_FOLLOW:
-            # the following line *blocks* the thread that we care about.
-            # we should not do this, ever. as long as we're just testing
-            # with a few cat accounts, it's ok.
-            
-            # TODO: use sched.scheduler, or threading.Timer (or sth)
-            time.sleep(config.WAIT_TIME_AFTER_FOLLOW) 
-
-        # for now, english by default
-        self.sendMessage(
-            user_id,
-            get_msg('welcome', 'en')
-        )
-        """
-        # it seems that we don't need to be followed to send dm
-        pass
-
-
-    def handleDirectMessage(self, status):
-        """ Handle direct messages received (i.e. parse request). """
-        sender_id = status.direct_message['sender_id']
-        message = status.direct_message['text'].strip().lower()
-
-        self.log.debug("Parsing request")
+        sender_id = dm['sender']['id_str']
+        msg = dm['text'].strip().lower()
+        bogus_request = False
+        request = None
+        status = ''
 
         try:
             if self._is_blacklisted(str(sender_id)):
-                self.log.info("Request from blacklisted account!")
+                #self.log.info("Request from blacklisted account!")
                 status = 'blacklisted'
                 bogus_request = True
 
-            # first let's find out how many words are in the message
-            # request shouldn't be longer than 3 words, but just in case
-            words = message.split(' ')
-            if len(words) > self.max_words:
-                bogus_request = True
-                self.log.info("Message way too long")
-                status = 'error'
-                reply = self._get_msg('message_error', 'en')
-
             if not bogus_request:
-                self.log.debug("Request seems legit, let's parse it")
+                #self.log.debug("Request seems legit, let's parse it")
                 # let's try to guess what the user is asking
-                req = self._parse_text(str(msg))
-
-                if req['type'] == 'help':
-                    self.log.debug("Type of request: help")
-                    status = 'success'
-                    reply = self._get_msg('help', 'en')
+                request = self.parse_text(str(msg))
+        
+                # possible options: links, mirrors, help
+                if request['type'] == 'links':
+                    #self.log.debug("Type of request: help")
+                    links = self.core.get_links(
+                        'Twitter',
+                        request['os'],
+                        request['lc']
+                    )
+                    
+                    reply = self._get_msg('links', 'en')
+                    reply = reply % (request['os'], request['lc'], links)
+                    
 
-                elif req['type'] == 'mirrors':
-                    self.log.debug("Type of request: mirrors")
+                elif request['type'] == 'mirrors':
+                    #self.log.debug("Type of request: mirrors")
                     status = 'success'
                     reply = self._get_msg('mirrors', 'en')
                     try:
@@ -510,86 +255,50 @@ class TwitterBot(object):
                     except IOError as e:
                         reply = self._get_msg('mirrors_unavailable', 'en')
 
-                elif req['type'] == 'links':
-                    self.log.debug("Type of request: help")
-                    links = self.core.get_links(
-                        "Twitter",
-                        req['os'],
-                        req['lc']
-                    )
-                    reply = self._get_msg('links', 'en')
-                    reply = reply % (req['os'], req['lc'], links)
-                    self.sendMessage(sender_id, reply)
-
+                else:
+                    #self.log.debug("Type of request: help")
                     status = 'success'
-            
-            # send whatever the reply is
-            self.sendMessage(sender_id, reply)
+                    reply = self._get_msg('help', 'en')
+                
+                self.api.send_direct_message(
+                    user_id=sender_id,
+                    text=reply
+                )
 
         except (core.ConfigError, core.InternalError) as e:
             # if core failes, send the user an error message, but keep going
-            self.log.error("Something went wrong internally: %s" % str(e))
+            #self.log.error("Something went wrong internally: %s" % str(e))
             status = 'core_error'
-            reply = self._get_msg('internal_error', req['lc'])
+            reply = self._get_msg('internal_error', 'en')
 
         finally:
             # keep stats
-            if req:
-                self.log.debug("Adding request to database... ")
+            if request:
+                #self.log.debug("Adding request to database... ")
                 self.core.add_request_to_db()    
 
-    def sendMessage(self, target_id, message):
-        # this is quick and ugly. primary splits (if needed) at newlines.
-        """
-        try:
-            cur_message = ''
-            for line in message.split('\n'):
-                if len(cur_message + ('\n' if cur_message else '') + line)\
-                    > config.CHARACTER_LIMIT:
-                    self._split_in_chunks_and_send(target_id, cur_message)
-                    cur_message = ''
-                else:
-                    cur_message += ('\n' if cur_message else '') + line
-            if cur_message:
-                self._split_in_chunks_and_send(target_id, cur_message)
-        except Exception as e:
-            # again, scrubbing 'target_id' should be an option, etc.
-            log.warning('Failed to send a direct message to %s. Exception:\n%s',
-                        str(target_id), e)
-            return False
-        return True
+    def start(self):
+        """ Start the bot for handling requests.
+
+        Start a new Twitter bot.
         """
-        # with new twitter limit of direct messages, we can send messages
-        # without any trouble
-        self.api.send_direct_message(
-            user_id=target_id,
-            text=message
+        self.auth = tweepy.OAuthHandler(
+            self.api_key, 
+            self.api_secret
+        )
+        
+        self.auth.set_access_token(
+            self.access_token,
+            self.token_secret
         )
 
-    """
-    def _split_in_chunks_and_send(self, target_id, message):
-        # assume any decent humane splitting has been done beforehand.
-        # we have to do with what we have here.
-        # exception handling at higher call stack.
-
-        while message:
-            self.api.send_direct_message(user_id=target_id,
-                text=message[:config.CHARACTER_LIMIT])
-            message = message[config.CHARACTER_LIMIT:]
-    """
-
-    """
-    def followAllFollowers(self):
-        # Start following everyone who is following us.
-
-        for follower in tweepy.Cursor(self.api.followers).items():
-            follower.follow()
-    """
-
-    """
-    def unfollowAllFollowers(self):
-        # Unfollow everyone who is following us.
-
-        for follower in tweepy.Cursor(self.api.followers).items():
-            follower.unfollow()
-    """
+        self.api = tweepy.API(self.auth)
+        self.bot_info = self.api.me()
+         
+        stream = tweepy.Stream(
+            auth = self.api.auth,
+            listener=GetTorStreamListener(self)
+        )
+        
+        stream.userstream()
+
diff --git a/process_tweets.py b/process_tweets.py
index 9e14f8b..484cb97 100644
--- a/process_tweets.py
+++ b/process_tweets.py
@@ -7,22 +7,9 @@ import logging
 import gettor.twitter
 
 def main():
-    """Quick way of running the thing.
-
-    Note that default right now is 'no async', which means, function won't
-    return; on the other hand everything "will just work."
-
-    If async is off, then we can:
-    >>> from quick_run import quick_run
-    >>> bot = quick_run() # authenticate, subscribe to streaming api, get handle
-    """
-
     try:
-        bot = TwitterBot()
-        bot.authenticate()
-        # bot.api.update_status('hello world!')
-        bot.subscribeToStreams()
-        return bot
+        bot = gettor.twitter.TwitterBot()
+        bot.start()
     except gettor.twitter.ConfigError as e:
         print "Configuration error: %s" % str(e)
     except gettor.twitter.InternalError as e:





More information about the tor-commits mailing list