commit 2d670fe64c04b632c9bbc8f0d2ccddd6e2b0baaa Author: aagbsn aagbsn@extc.org Date: Mon Jul 25 17:18:20 2011 -0700
1836 - recaptcha support
This set of changes adds recaptcha support to bridgedb. The implementation proxies responses from clients to preserve their privacy. --- README | 3 + bridgedb.conf | 7 +++ lib/bridgedb/I18n.py | 10 ++++- lib/bridgedb/Main.py | 5 ++- lib/bridgedb/Raptcha.py | 40 +++++++++++++++ lib/bridgedb/Server.py | 122 +++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 182 insertions(+), 5 deletions(-)
diff --git a/README b/README index c05317a..a21e273 100644 --- a/README +++ b/README @@ -14,6 +14,9 @@ To set up: - To install Maxmind GeoIP - Debian: apt-get install python-geoip - Others: http://www.maxmind.com/app/python + - To enable recaptcha support, set the options RECAPTCHA_ENABLED, + RECAPTCHA_PUB_KEY, and RECAPTCHA_PRIV_KEY in bridgedb.conf. + A recaptcha.net account is required.
To re-generate and update the i18n files (in case translated strings have changed in BridgeDB): diff --git a/bridgedb.conf b/bridgedb.conf index 11afc02..e5d7314 100644 --- a/bridgedb.conf +++ b/bridgedb.conf @@ -147,3 +147,10 @@ EMAIL_INCLUDE_FINGERPRINTS=False RESERVED_SHARE=2
FILE_BUCKETS = {} + +# Options related to recaptcha support. +# Enable/Disable recaptcha +RECAPTCHA_ENABLED = False +# Recaptcha API keys +RECAPTCHA_PUB_KEY = '' +RECAPTCHA_PRIV_KEY = '' diff --git a/lib/bridgedb/I18n.py b/lib/bridgedb/I18n.py index 750ca4a..24629f1 100644 --- a/lib/bridgedb/I18n.py +++ b/lib/bridgedb/I18n.py @@ -58,5 +58,13 @@ emails is: """), # BRIDGEDB_TEXT[11] _("""hours"""), # BRIDGEDB_TEXT[12] - _("""All further emails will be ignored.""") + _("""All further emails will be ignored."""), + # BRIDGEDB_TEXT[13] + _("""Type the two words"""), + # BRIDGEDB_TEXT[14] + _("""I am human"""), + # BRIDGEDB_TEXT[15] + _("""Upgrade your browser to Firefox"""), + # BRIDGEDB_TEXT[16] + _("""(Might be blocked)""") ] diff --git a/lib/bridgedb/Main.py b/lib/bridgedb/Main.py index 42f78c3..2ef56bf 100644 --- a/lib/bridgedb/Main.py +++ b/lib/bridgedb/Main.py @@ -95,7 +95,10 @@ CONFIG = Conf(
RESERVED_SHARE=2,
- FILE_BUCKETS = {} + FILE_BUCKETS = {}, + RECAPTCHA_ENABLED = False, + RECAPTCHA_PUB_KEY = '', + RECAPTCHA_PRIV_KEY = '', )
def configureLogging(cfg): diff --git a/lib/bridgedb/Raptcha.py b/lib/bridgedb/Raptcha.py new file mode 100644 index 0000000..1af8dd6 --- /dev/null +++ b/lib/bridgedb/Raptcha.py @@ -0,0 +1,40 @@ +# Raptcha +# Copyright (c) 2010, The Tor Project, Inc. +# See LICENSE for licensing information + +""" +This module wraps the recaptcha api and proxies requests to protect privacy. +""" +import recaptcha.client.captcha as captcha +from BeautifulSoup import BeautifulSoup +import urllib2 + +class Raptcha(): + """ A recaptcha captcha and method to request them """ + + def __init__(self, pubkey=None, privkey=None): + self.pubkey = pubkey + self.privkey = privkey + self.image = None + self.challenge = None + + def get(self): + """ gets a fresh captcha """ + + if (self.pubkey == '') or (self.privkey == ''): + raise RaptchaKeyError + urlbase = captcha.API_SERVER + form = "/noscript?k=%s" % self.pubkey + + # extract and store image from captcha + html = urllib2.urlopen(urlbase+form).read() + soup = BeautifulSoup(html) + imgurl = urlbase+"/"+ soup.find('img')['src'] + self.challenge = str(soup.find('input', {'name' : 'recaptcha_challenge_field'})['value']) + self.image = urllib2.urlopen(imgurl).read() + +class RaptchaKeyError(Exception): + """ Exception raised when recaptcha API keys are not supplied""" + def __init__(self): + msg = 'You must supply recaptcha API keys' + Exception.__init__(self, msg) diff --git a/lib/bridgedb/Server.py b/lib/bridgedb/Server.py index 9cbeb15..1089598 100644 --- a/lib/bridgedb/Server.py +++ b/lib/bridgedb/Server.py @@ -26,6 +26,12 @@ import twisted.mail.smtp import bridgedb.Dist import bridgedb.I18n as I18n
+import recaptcha.client.captcha as captcha +from random import randint +from bridgedb.Raptcha import Raptcha +import base64 +import textwrap + try: import GeoIP # GeoIP data object: choose database here @@ -42,7 +48,9 @@ class WebResource(twisted.web.resource.Resource): isLeaf = True
def __init__(self, distributor, schedule, N=1, useForwardedHeader=False, - includeFingerprints=True, domains=[]): + includeFingerprints=True, + useRecaptcha=False,recaptchaPrivKey='', recaptchaPubKey='', + domains=[]): """Create a new WebResource. distributor -- an IPBasedDistributor object schedule -- an IntervalSchedule object @@ -57,7 +65,57 @@ class WebResource(twisted.web.resource.Resource): self.includeFingerprints = includeFingerprints self.domains = domains
+ # recaptcha options + self.useRecaptcha = useRecaptcha + self.recaptchaPrivKey = recaptchaPrivKey + self.recaptchaPubKey = recaptchaPubKey + def render_GET(self, request): + if self.useRecaptcha: + # get a captcha + c = Raptcha(self.recaptchaPubKey, self.recaptchaPrivKey) + c.get() + + # TODO: this does not work for versions of IE < 8.0 + imgstr = 'data:image/jpeg;base64,%s' % base64.b64encode(c.image) + HTML_CAPTCHA_TEMPLATE = self.buildHTMLMessageTemplateWithCaptcha( + getLocaleFromRequest(request), c.challenge, imgstr) + return HTML_CAPTCHA_TEMPLATE + else: + return self.getBridgeRequestAnswer(request) + + + def render_POST(self, request): + + # check captcha if recaptcha support is enabled + if self.useRecaptcha: + try: + challenge = request.args['recaptcha_challenge_field'][0] + response = request.args['recaptcha_response_field'][0] + + except: + return self.render_GET(request) + + # generate a random IP for the captcha submission + remote_ip = '%d.%d.%d.%d' % (randint(1,255),randint(1,255), + randint(1,255),randint(1,255)) + + recaptcha_response = captcha.submit(challenge, response, + self.recaptchaPrivKey, remote_ip) + if recaptcha_response.is_valid: + logging.info("Valid recaptcha from %s. Parameters were %r", + remote_ip, request.args) + else: + logging.info("Invalid recaptcha from %s. Parameters were %r", + remote_ip, request.args) + logging.info("Recaptcha error code: %s", recaptcha_response.error_code) + return self.render_GET(request) # redirect back to captcha + + return self.getBridgeRequestAnswer(request) + + def getBridgeRequestAnswer(self, request): + """ returns a response to a bridge request """ + interval = self.schedule.getInterval(time.time()) bridges = ( ) ip = None @@ -131,6 +189,44 @@ class WebResource(twisted.web.resource.Resource):
return html_msg
+ def buildHTMLMessageTemplateWithCaptcha(self, t, challenge, img): + """Builds a translated html response with recaptcha""" + + recaptchaTemplate = textwrap.dedent("""\ + <form action="" method="POST"> + <input type="hidden" name="recaptcha_challenge_field" + id="recaptcha_challenge_field"\ + value="{recaptchaChallengeField}"> + <img width="300" height="57" alt="{bridgeDBText14}"\ + src="{recaptchaImgSrc}"> + <div class="recaptcha_input_area"> + <label for="recaptcha_response_field">{bridgeDBText12}</label> + </div> + <div> + <input name="recaptcha_response_field"\ + id="recaptcha_response_field" + type="text" autocomplete="off"> + </div> + <div> + <input type="submit" name="submit" value="{bridgeDBText13}"> + </div> + </form> + """).strip() + + recaptchaTemplate = recaptchaTemplate.format( + recaptchaChallengeField=challenge, + recaptchaImgSrc=img, + bridgeDBText12=t.gettext(I18n.BRIDGEDB_TEXT[13]), + bridgeDBText13=t.gettext(I18n.BRIDGEDB_TEXT[14]), + bridgeDBText14=t.gettext(I18n.BRIDGEDB_TEXT[15])) + + html_msg = "<html><body>" \ + + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[1]) + "</p>" \ + + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[9]) + "</p>" \ + + "<p>" + recaptchaTemplate + "</p>" \ + + "<p>" + t.gettext(I18n.BRIDGEDB_TEXT[4]) + "</p>" \ + + "</body></html>" + return html_msg
def addWebServer(cfg, dist, sched): """Set up a web server. @@ -142,6 +238,9 @@ def addWebServer(cfg, dist, sched): HTTPS_PORT HTTPS_BIND_IP HTTPS_USE_IP_FROM_FORWARDED_HEADER + RECAPTCHA_ENABLED + RECAPTCHA_PUB_KEY + RECAPTCHA_PRIV_KEY dist -- an IPBasedDistributor object. sched -- an IntervalSchedule object. """ @@ -152,7 +251,10 @@ def addWebServer(cfg, dist, sched): resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER, cfg.HTTP_USE_IP_FROM_FORWARDED_HEADER, includeFingerprints=cfg.HTTPS_INCLUDE_FINGERPRINTS, - domains=cfg.EMAIL_DOMAINS) + useRecaptcha=cfg.RECAPTCHA_ENABLED, + domains=cfg.EMAIL_DOMAINS, + recaptchaPrivKey=cfg.RECAPTCHA_PRIV_KEY, + recaptchaPubKey=cfg.RECAPTCHA_PUB_KEY) site = Site(resource) reactor.listenTCP(cfg.HTTP_UNENCRYPTED_PORT, site, interface=ip) if cfg.HTTPS_PORT: @@ -164,7 +266,10 @@ def addWebServer(cfg, dist, sched): resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER, cfg.HTTPS_USE_IP_FROM_FORWARDED_HEADER, includeFingerprints=cfg.HTTPS_INCLUDE_FINGERPRINTS, - domains=cfg.EMAIL_DOMAINS) + domains=cfg.EMAIL_DOMAINS, + useRecaptcha=cfg.RECAPTCHA_ENABLED, + recaptchaPrivKey=cfg.RECAPTCHA_PRIV_KEY, + recaptchaPubKey=cfg.RECAPTCHA_PUB_KEY) site = Site(resource) reactor.listenSSL(cfg.HTTPS_PORT, site, factory, interface=ip) return site @@ -381,6 +486,17 @@ def getLocaleFromPlusAddr(address):
return replyLocale
+def getLocaleFromRequest(request): + # See if we did get a request for a certain locale, otherwise fall back + # to 'en': + # Try evaluating the path /foo first, then check if we got a ?lang=foo + default_lang = lang = "en" + if len(request.path) > 1: + lang = request.path[1:] + if lang == default_lang: + lang = request.args.get("lang", [default_lang]) + lang = lang[0] + return I18n.getLang(lang)
class MailContext: """Helper object that holds information used by email subsystem."""
tor-commits@lists.torproject.org