[tor-commits] [bridgedb/master] 1836 - recaptcha support

arma at torproject.org arma at torproject.org
Mon Sep 19 01:10:01 UTC 2011


commit 2d670fe64c04b632c9bbc8f0d2ccddd6e2b0baaa
Author: aagbsn <aagbsn at extc.org>
Date:   Mon Jul 25 17:18:20 2011 -0700

    1836 - recaptcha support
    
    This set of changes adds recaptcha support to bridgedb.
    The implementation proxies responses from clients to
    preserve their privacy.
---
 README                  |    3 +
 bridgedb.conf           |    7 +++
 lib/bridgedb/I18n.py    |   10 ++++-
 lib/bridgedb/Main.py    |    5 ++-
 lib/bridgedb/Raptcha.py |   40 +++++++++++++++
 lib/bridgedb/Server.py  |  122 +++++++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 182 insertions(+), 5 deletions(-)

diff --git a/README b/README
index c05317a..a21e273 100644
--- a/README
+++ b/README
@@ -14,6 +14,9 @@ To set up:
  - To install Maxmind GeoIP
    - Debian: apt-get install python-geoip
    - Others: http://www.maxmind.com/app/python 
+ - To enable recaptcha support, set the options RECAPTCHA_ENABLED,
+     RECAPTCHA_PUB_KEY, and RECAPTCHA_PRIV_KEY in bridgedb.conf. 
+     A recaptcha.net account is required. 
 
 To re-generate and update the i18n files (in case translated strings
 have changed in BridgeDB):
diff --git a/bridgedb.conf b/bridgedb.conf
index 11afc02..e5d7314 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -147,3 +147,10 @@ EMAIL_INCLUDE_FINGERPRINTS=False
 RESERVED_SHARE=2
 
 FILE_BUCKETS = {}
+
+# Options related to recaptcha support.
+# Enable/Disable recaptcha
+RECAPTCHA_ENABLED = False
+# Recaptcha API keys
+RECAPTCHA_PUB_KEY = ''
+RECAPTCHA_PRIV_KEY = '' 
diff --git a/lib/bridgedb/I18n.py b/lib/bridgedb/I18n.py
index 750ca4a..24629f1 100644
--- a/lib/bridgedb/I18n.py
+++ b/lib/bridgedb/I18n.py
@@ -58,5 +58,13 @@ emails is: """),
  # BRIDGEDB_TEXT[11]
  _("""hours"""),
  # BRIDGEDB_TEXT[12]
- _("""All further emails will be ignored.""")
+ _("""All further emails will be ignored."""),
+ # BRIDGEDB_TEXT[13]
+ _("""Type the two words"""),
+ # BRIDGEDB_TEXT[14]
+ _("""I am human"""),
+ # BRIDGEDB_TEXT[15]
+ _("""Upgrade your browser to Firefox"""),
+ # BRIDGEDB_TEXT[16]
+ _("""(Might be blocked)""") 
 ]
diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py
index 42f78c3..2ef56bf 100644
--- a/lib/bridgedb/Main.py
+++ b/lib/bridgedb/Main.py
@@ -95,7 +95,10 @@ CONFIG = Conf(
 
     RESERVED_SHARE=2,
 
-    FILE_BUCKETS = {}
+    FILE_BUCKETS = {},
+    RECAPTCHA_ENABLED = False,
+    RECAPTCHA_PUB_KEY = '',
+    RECAPTCHA_PRIV_KEY = '', 
   )
 
 def configureLogging(cfg):
diff --git a/lib/bridgedb/Raptcha.py b/lib/bridgedb/Raptcha.py
new file mode 100644
index 0000000..1af8dd6
--- /dev/null
+++ b/lib/bridgedb/Raptcha.py
@@ -0,0 +1,40 @@
+# Raptcha
+# Copyright (c) 2010, The Tor Project, Inc.
+# See LICENSE for licensing information
+
+"""
+This module wraps the recaptcha api and proxies requests to protect privacy.
+"""
+import recaptcha.client.captcha as captcha
+from BeautifulSoup import BeautifulSoup
+import urllib2
+
+class Raptcha():
+    """ A recaptcha captcha and method to request them """
+
+    def __init__(self, pubkey=None, privkey=None):
+        self.pubkey = pubkey
+        self.privkey = privkey
+        self.image = None
+        self.challenge = None
+
+    def get(self):
+        """ gets a fresh captcha """
+
+        if (self.pubkey == '') or (self.privkey == ''):
+            raise RaptchaKeyError
+        urlbase = captcha.API_SERVER
+        form = "/noscript?k=%s" % self.pubkey
+
+        # extract and store image from captcha
+        html = urllib2.urlopen(urlbase+form).read()
+        soup = BeautifulSoup(html)
+        imgurl = urlbase+"/"+ soup.find('img')['src']
+        self.challenge = str(soup.find('input', {'name' : 'recaptcha_challenge_field'})['value'])
+        self.image = urllib2.urlopen(imgurl).read()
+
+class RaptchaKeyError(Exception):
+    """ Exception raised when recaptcha API keys are not supplied"""
+    def __init__(self):
+        msg = 'You must supply recaptcha API keys'
+        Exception.__init__(self, msg)
diff --git a/lib/bridgedb/Server.py b/lib/bridgedb/Server.py
index 9cbeb15..1089598 100644
--- a/lib/bridgedb/Server.py
+++ b/lib/bridgedb/Server.py
@@ -26,6 +26,12 @@ import twisted.mail.smtp
 import bridgedb.Dist
 import bridgedb.I18n as I18n
 
+import recaptcha.client.captcha as captcha
+from random import randint
+from bridgedb.Raptcha import Raptcha
+import base64
+import textwrap
+ 
 try:
     import GeoIP
     # GeoIP data object: choose database here
@@ -42,7 +48,9 @@ class WebResource(twisted.web.resource.Resource):
     isLeaf = True
 
     def __init__(self, distributor, schedule, N=1, useForwardedHeader=False,
-                 includeFingerprints=True, domains=[]):
+                 includeFingerprints=True,
+                 useRecaptcha=False,recaptchaPrivKey='', recaptchaPubKey='',
+                 domains=[]): 
         """Create a new WebResource.
              distributor -- an IPBasedDistributor object
              schedule -- an IntervalSchedule object
@@ -57,7 +65,57 @@ class WebResource(twisted.web.resource.Resource):
         self.includeFingerprints = includeFingerprints
         self.domains = domains
 
+        # recaptcha options
+        self.useRecaptcha = useRecaptcha
+        self.recaptchaPrivKey = recaptchaPrivKey
+        self.recaptchaPubKey = recaptchaPubKey
+
     def render_GET(self, request):
+        if self.useRecaptcha:
+            # get a captcha
+            c = Raptcha(self.recaptchaPubKey, self.recaptchaPrivKey)
+            c.get()
+
+            # TODO: this does not work for versions of IE < 8.0
+            imgstr = 'data:image/jpeg;base64,%s' % base64.b64encode(c.image)
+            HTML_CAPTCHA_TEMPLATE = self.buildHTMLMessageTemplateWithCaptcha(
+                    getLocaleFromRequest(request), c.challenge, imgstr)
+            return HTML_CAPTCHA_TEMPLATE
+        else:
+            return self.getBridgeRequestAnswer(request)
+
+
+    def render_POST(self, request):
+
+        # check captcha if recaptcha support is enabled
+        if self.useRecaptcha:
+            try:
+                challenge = request.args['recaptcha_challenge_field'][0]
+                response = request.args['recaptcha_response_field'][0]
+
+            except:
+                return self.render_GET(request)
+
+            # generate a random IP for the captcha submission
+            remote_ip = '%d.%d.%d.%d' % (randint(1,255),randint(1,255),
+                                         randint(1,255),randint(1,255))
+
+            recaptcha_response = captcha.submit(challenge, response,
+                                            self.recaptchaPrivKey, remote_ip)
+            if recaptcha_response.is_valid:
+                logging.info("Valid recaptcha from %s. Parameters were %r",
+                        remote_ip, request.args)
+            else:
+                logging.info("Invalid recaptcha from %s. Parameters were %r",
+                             remote_ip, request.args)
+                logging.info("Recaptcha error code: %s", recaptcha_response.error_code)
+                return self.render_GET(request) # redirect back to captcha
+
+        return self.getBridgeRequestAnswer(request)
+
+    def getBridgeRequestAnswer(self, request):
+        """ returns a response to a bridge request """
+ 
         interval = self.schedule.getInterval(time.time())
         bridges = ( )
         ip = None
@@ -131,6 +189,44 @@ class WebResource(twisted.web.resource.Resource):
 
         return html_msg
 
+    def buildHTMLMessageTemplateWithCaptcha(self, t, challenge, img):
+        """Builds a translated html response with recaptcha"""
+
+        recaptchaTemplate = textwrap.dedent("""\
+            <form action="" method="POST">
+              <input type="hidden" name="recaptcha_challenge_field"
+                id="recaptcha_challenge_field"\
+                        value="{recaptchaChallengeField}">
+              <img width="300" height="57" alt="{bridgeDBText14}"\
+                      src="{recaptchaImgSrc}">
+              <div class="recaptcha_input_area">
+                <label for="recaptcha_response_field">{bridgeDBText12}</label>
+              </div>
+              <div>
+                <input name="recaptcha_response_field"\
+                        id="recaptcha_response_field"
+                type="text" autocomplete="off">
+              </div>
+              <div>
+                <input type="submit" name="submit" value="{bridgeDBText13}">
+              </div>
+            </form>
+            """).strip()
+
+        recaptchaTemplate = recaptchaTemplate.format(
+                recaptchaChallengeField=challenge,
+                recaptchaImgSrc=img,
+                bridgeDBText12=t.gettext(I18n.BRIDGEDB_TEXT[13]),
+                bridgeDBText13=t.gettext(I18n.BRIDGEDB_TEXT[14]),
+                bridgeDBText14=t.gettext(I18n.BRIDGEDB_TEXT[15]))
+
+        html_msg = "<html><body>" \
+                   + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[1]) + "</p>" \
+                   + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[9]) + "</p>" \
+                   + "<p>" + recaptchaTemplate + "</p>" \
+                   + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[4]) + "</p>" \
+                   + "</body></html>"
+        return html_msg 
 
 def addWebServer(cfg, dist, sched):
     """Set up a web server.
@@ -142,6 +238,9 @@ def addWebServer(cfg, dist, sched):
                 HTTPS_PORT
                 HTTPS_BIND_IP
                 HTTPS_USE_IP_FROM_FORWARDED_HEADER
+                RECAPTCHA_ENABLED
+                RECAPTCHA_PUB_KEY
+                RECAPTCHA_PRIV_KEY 
          dist -- an IPBasedDistributor object.
          sched -- an IntervalSchedule object.
     """
@@ -152,7 +251,10 @@ def addWebServer(cfg, dist, sched):
         resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER,
                        cfg.HTTP_USE_IP_FROM_FORWARDED_HEADER,
                        includeFingerprints=cfg.HTTPS_INCLUDE_FINGERPRINTS,
-                       domains=cfg.EMAIL_DOMAINS)
+                       useRecaptcha=cfg.RECAPTCHA_ENABLED,
+                       domains=cfg.EMAIL_DOMAINS,
+                       recaptchaPrivKey=cfg.RECAPTCHA_PRIV_KEY,
+                       recaptchaPubKey=cfg.RECAPTCHA_PUB_KEY) 
         site = Site(resource)
         reactor.listenTCP(cfg.HTTP_UNENCRYPTED_PORT, site, interface=ip)
     if cfg.HTTPS_PORT:
@@ -164,7 +266,10 @@ def addWebServer(cfg, dist, sched):
         resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER,
                        cfg.HTTPS_USE_IP_FROM_FORWARDED_HEADER,
                        includeFingerprints=cfg.HTTPS_INCLUDE_FINGERPRINTS,
-                       domains=cfg.EMAIL_DOMAINS)
+                       domains=cfg.EMAIL_DOMAINS,
+                       useRecaptcha=cfg.RECAPTCHA_ENABLED,
+                       recaptchaPrivKey=cfg.RECAPTCHA_PRIV_KEY,
+                       recaptchaPubKey=cfg.RECAPTCHA_PUB_KEY) 
         site = Site(resource)
         reactor.listenSSL(cfg.HTTPS_PORT, site, factory, interface=ip)
     return site
@@ -381,6 +486,17 @@ def getLocaleFromPlusAddr(address):
 
     return replyLocale
 
+def getLocaleFromRequest(request):
+    # See if we did get a request for a certain locale, otherwise fall back
+    # to 'en':
+    # Try evaluating the path /foo first, then check if we got a ?lang=foo
+    default_lang = lang = "en"
+    if len(request.path) > 1:
+        lang = request.path[1:]
+    if lang == default_lang:
+        lang = request.args.get("lang", [default_lang])
+        lang = lang[0]
+    return I18n.getLang(lang) 
 
 class MailContext:
     """Helper object that holds information used by email subsystem."""





More information about the tor-commits mailing list