commit e1ae7a0ac0b5a8363fce07732d1b0e685547e582
Author: Isis Lovecruft <isis(a)torproject.org>
Date: Tue Oct 29 05:45:28 2013 +0000
Fix the parsing of Accept-Language to actually support fallbacks.
All right. Going through the most significant bugs in this function:
* The line from the original function:
``langs = request.getHeader('accept-language').split(',')``
getHeader() returns ``None`` if the header isn't present, so this results
in a TypeError on the split().
* The line from the original function:
``langs = filter(lambda x: re.match('^[a-z\-]{1,5}', x), langs)``
This chucks locales with capital letters, and doesn't much at all to insure
that we're actually getting a well-formed header, all at the expense of a
(rather expensive; they're slow in Python) regex call.
* These lines from the original:
# add fallback languages
langs_only = filter(lambda x: '-' in x, langs)
langs.extend(map(lambda x: x.split('-')[0], langs_only))
If my 'Accept-Language' header starts with 'en-GB,en-US;q=0.92[…]', then
this would add ['en','en'] to the end of my header, without even checking
if I already have 'en'. Instead, we should check if 'en' is already there,
and iff not, then add it *after the other English headers*. Not after
Mandarin, Japanese, Arabic, Russian, and the other slew of languages that I
half-assedly learned at some point.
* These lines from the original:
# gettext wants _, not -
map(lambda x: x.replace('-', '_'), langs)
Great. A pretty mapping. Good thing the returned values weren't saved as
anything; otherwise they might have been useful!
* Lastly, the way that languages, once parsed were added to gettext, would
raise UnhandledErrors, *and* it didn't even add the fallbacks correctly,
meaning we could only get one language at a time.
---
lib/bridgedb/HTTPServer.py | 62 +++++++++++++++++++------------
lib/bridgedb/parse/headers.py | 82 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 120 insertions(+), 24 deletions(-)
diff --git a/lib/bridgedb/HTTPServer.py b/lib/bridgedb/HTTPServer.py
index ad76568..ca850d5 100644
--- a/lib/bridgedb/HTTPServer.py
+++ b/lib/bridgedb/HTTPServer.py
@@ -30,6 +30,7 @@ from bridgedb.Raptcha import Raptcha
from bridgedb.Filters import filterBridgesByIP6, filterBridgesByIP4
from bridgedb.Filters import filterBridgesByTransport
from bridgedb.Filters import filterBridgesByNotBlockedIn
+from bridgedb.parse import headers
from ipaddr import IPv4Address, IPv6Address
from random import randint
from mako.template import Template
@@ -328,29 +329,42 @@ def getAssumedChosenLang(langs):
return lang
def setLocaleFromRequestHeader(request):
+ """Retrieve the languages from the accept-language header and install them.
+
+ Parse the languages in the header, and attempt to install the first one in
+ the list. If that fails, we receive a :class:`gettext.NullTranslation`
+ object, if it worked then we have a :class:`gettext.GNUTranslation`
+ object. Whichever one we end up with, add the other get the other
+ languages and add them as fallbacks to the first. Lastly, install this
+ chain of translations.
+
+ :type request: :class:`twisted.web.server.Request`
+ :param request: An incoming request from a client.
+ :rtype: list
+ :returns: All requested languages.
"""
- Retrieve the languages from the accept-language header and insall
-
- Parse the languages in the header, if any of them contain locales then
- add their languages to the list, also. Then install all of them using
- gettext, it will choose the best one.
-
- :param request twisted.web.server.Request: Incoming request
- :returns list: All requested languages
- """
- langs = request.getHeader('accept-language').split(',')
- logging.debug("Accept-Language: %s" % langs)
- localedir=os.path.join(os.path.dirname(__file__), 'i18n/')
-
- if langs:
- langs = filter(lambda x: re.match('^[a-z\-]{1,5}', x), langs)
- logging.debug("Languages: %s" % langs)
- # add fallback languages
- langs_only = filter(lambda x: '-' in x, langs)
- langs.extend(map(lambda x: x.split('-')[0], langs_only))
- # gettext wants _, not -
- map(lambda x: x.replace('-', '_'), langs)
- lang = gettext.translation("bridgedb", localedir=localedir,
- languages=langs, fallback=True)
- lang.install(True)
+ logging.debug("Getting client 'Accept-Language' header...")
+ header = request.getHeader('accept-language')
+
+ if header is None:
+ logging.debug("Client sent no 'Accept-Language' header. Using fallback.")
+ header = 'en,en-US'
+
+ localedir = os.path.join(os.path.dirname(__file__), 'i18n/')
+ langs = headers.parseAcceptLanguage(header)
+ ## XXX the 'Accept-Language' header is potentially identifying
+ logging.debug("Client Accept-Language (top 5): %s" % langs[:4])
+
+ try:
+ language = gettext.translation("bridgedb", localedir=localedir,
+ languages=langs, fallback=True)
+ for lang in langs:
+ language.add_fallback(gettext.translation("bridgedb",
+ localedir=localedir,
+ languages=langs,
+ fallback=True))
+ except IOError as error:
+ logging.error(error.message)
+
+ language.install(unicode=True)
return langs
diff --git a/lib/bridgedb/parse/headers.py b/lib/bridgedb/parse/headers.py
new file mode 100644
index 0000000..d9f1c4b
--- /dev/null
+++ b/lib/bridgedb/parse/headers.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis(a)torproject.org>
+# please also see AUTHORS file
+# :copyright: (c) 2013 Isis Lovecruft
+# (c) 2007-2013, The Tor Project, Inc.
+# (c) 2007-2013, all entities within the AUTHORS file
+# :license: 3-clause BSD, see included LICENSE for information
+
+"""bridgedb.parse.headers -- Parsers for HTTP and Email headers.
+
+** Module Overview: **
+
+::
+
+ parseAcceptLanguage - Parse the contents of a client 'Accept-Language' header
+
+"""
+
+import logging
+log = logging.getLogger()
+
+import re
+import os
+
+def parseAcceptLanguage(header):
+ """Parse the contents of a client 'Accept-Language' header.
+
+ Parse the header in the following manner:
+
+ 0. If ``header`` is None or an empty string, return an empty list.
+ 1. Split the ``header`` string on any commas.
+ 2. Chop of the RFC2616 quality/level suffix. We ignore these, and just
+ use the order of the list as the preference order, without any
+ parsing of quality/level assignments.
+ 3. Add a fallback language of the same type if it is missing. For
+ example, if we only got ['es-ES', 'de-DE'], add 'es' after 'es-ES'
+ and add 'de' after 'de-DE'.
+ 4. Change all hyphens to underscores.
+
+ :param string header: The contents of an 'Accept-Language' header, i.e. as
+ if taken from :func:`twisted.web.server.Request.getHeader`.
+ :rtype: list
+ :returns: A list of language codes (with and without locales), in order of
+ preference.
+ """
+ langs = []
+
+ if not header:
+ return langs
+
+ langHeader = header.split(',')
+
+ for lang in langHeader:
+ if lang.find(';') != -1:
+ # Chop off the RFC2616 Accept `q=` and `level=` feilds
+ code, _ = lang.split(';')
+ langs.append(code)
+ else:
+ langs.append(lang)
+
+ # Add a fallback language of the same type if it is missing.
+ langsWithLocales = filter(lambda x: '-' in x, langs)
+ langsOnly = map(lambda x: x.split('-')[0], langsWithLocales)
+ for only in langsOnly:
+ if only not in langs:
+ # Add the fallback after the other languages like it:
+ insertAfter = filter(lambda x: x.startswith(only),
+ [x for x in langs])
+ if insertAfter:
+ placement = langs.index(insertAfter[0]) + 1
+ langs.insert(placement, only)
+ continue
+ # Otherwise just put it at the end
+ langs.append(only)
+
+ # Gettext wants underderscores, because that is how it creates the
+ # directories under i18n/, not hyphens:
+ langs = map(lambda x: x.replace('-', '_'), [x for x in langs])
+ return langs