[tor-commits] [bridgedb/master] Make BridgeDB export usage metrics.

phw at torproject.org phw at torproject.org
Tue Aug 20 16:56:57 UTC 2019


commit 5cde59d9ccafdb248ca8aa9c1c9abbfe2edb5dc6
Author: Philipp Winter <phw at nymity.ch>
Date:   Mon Aug 12 14:05:48 2019 -0700

    Make BridgeDB export usage metrics.
    
    Until now, we had no insight into how BridgeDB is being used.  We don't
    know the relative popularity of our distribution method; we don't know
    how many users BridgeDB sees; we don't know how many requests succeed or
    fail; and we don't know the relative popularity of transports that users
    request.
    
    This patch attempts to answer these questions by making BridgeDB export
    usage metrics.  At the end of each 24-hour measurement interval,
    BridgeDB will append usage metrics to the file METRICS_FILE, which is
    configured in bridgedb.conf.
    
    Our metrics keep track of the number of (un)successful requests per
    transport type per country code (or email provider) per distribution
    method.  This way, we get to learn that, say, over the last 24 hours
    there were 31-40 users in Iran who successfully requested an obfs4
    bridge over Moat.  The corresponding metrics line would look as follows:
    
      bridgedb-metric-count moat.obfs4.ir.success.none 40
    
    To make the metrics preserve user privacy, we don't collect
    user-identifying information and we introduce noise by rounding up
    metrics to our bin size which defaults to 10.
    
    This patch also extends the looping calls that BridgeDB spawns.  When
    BridgeDB first starts, it loads proxies from the files PROXY_LIST_FILES.
    It augments this list of proxies with Tor exit relays that we download
    every three hours.
---
 CHANGELOG                                    |  11 +-
 bridgedb.conf                                |  16 +-
 bridgedb/distributors/email/autoresponder.py |  16 +
 bridgedb/distributors/https/server.py        |  11 +
 bridgedb/distributors/moat/server.py         |  15 +
 bridgedb/main.py                             |  29 +-
 bridgedb/metrics.py                          | 461 +++++++++++++++++++++++++++
 bridgedb/test/test_metrics.py                | 204 ++++++++++++
 8 files changed, 755 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 1229578..ae0e651 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,13 @@
-Changes in version 0.7.2 - YYYY-MM-DD
+Changes in version 0.8.0 - YYYY-MM-DD
+
+        * FIXES https://bugs.torproject.org/9316
+        Make BridgeDB export usage metrics every 24 hours.  At the end of each
+        24-hour measurement interval, BridgeDB will append usage metrics to the
+        file METRICS_FILE, which is configured in bridgedb.conf.  Our metrics
+        keep track of the number of (un)successful requests per transport type
+        per country code (or email provider) per distribution method.  This way,
+        we get to learn that, say, over the last 24 hours there were 31-40 users
+        in Iran who successfully requested an obfs4 bridge over Moat.
 
         * FIXES #26542 https://bugs.torproject.org/26542
         Make BridgeDB distribute vanilla IPv6 bridges again.
diff --git a/bridgedb.conf b/bridgedb.conf
index 66b1983..ba43bb6 100644
--- a/bridgedb.conf
+++ b/bridgedb.conf
@@ -177,6 +177,9 @@ MASTER_KEY_FILE = "secret_key"
 # File to which we dump bridge pool assignments for statistics.
 ASSIGNMENTS_FILE = "assignments.log"
 
+# Name of the file that contains BridgeDB's metrics.
+METRICS_FILE = "bridgedb-metrics.log"
+
 #------------------
 # Logging Options  \
 #------------------------------------------------------------------------------
@@ -260,16 +263,19 @@ FORCE_FLAGS = [("Stable", 1)]
 # Only consider routers whose purpose matches this string.
 BRIDGE_PURPOSE = "bridge"
 
-# TASKS is a dictionary mapping the names of tasks to the frequency with which
-# they should be run (in seconds). If a task's value is set to 0, it will not
-# be scheduled to run.
+# TASKS is a dictionary mapping the names of tasks to a tuple consisting of the
+# frequency with which they should be run (in seconds) and a boolean value
+# expressing if the task should be run immediately after start up. If a task's
+# frequency is set to 0, it will not be scheduled to run.
 TASKS = {
     # Download a list of Tor exit relays once every three hours (by running
     # scripts/get-exit-list) and add those exit relays to the list of proxies
     # loaded from the PROXY_LIST_FILES:
-    'GET_TOR_EXIT_LIST': 3 * 60 * 60,
+    'GET_TOR_EXIT_LIST': (3 * 60 * 60, True),
     # Delete *.unparseable descriptor files which are more than 24 hours old:
-    'DELETE_UNPARSEABLE_DESCRIPTORS': 24 * 60 * 60,
+    'DELETE_UNPARSEABLE_DESCRIPTORS': (24 * 60 * 60, False),
+    # Export usage metrics every 24 hours:
+    'EXPORT_METRICS': (24 * 60 * 60, False),
 }
 
 # SUPPORTED_TRANSPORTS is a dictionary mapping Pluggable Transport methodnames
diff --git a/bridgedb/distributors/email/autoresponder.py b/bridgedb/distributors/email/autoresponder.py
index ff65a73..e69f78a 100644
--- a/bridgedb/distributors/email/autoresponder.py
+++ b/bridgedb/distributors/email/autoresponder.py
@@ -48,6 +48,8 @@ from twisted.internet import reactor
 from twisted.mail import smtp
 from twisted.python import failure
 
+from bridgedb import strings
+from bridgedb import metrics
 from bridgedb import safelog
 from bridgedb.crypto import NEW_BUFFER_INTERFACE
 from bridgedb.distributors.email import dkim
@@ -62,6 +64,10 @@ from bridgedb.parse.addr import canonicalizeEmailDomain
 from bridgedb.util import levenshteinDistance
 from bridgedb import translations
 
+# We use our metrics singleton to keep track of BridgeDB metrics such as
+# "number of failed HTTPS bridge requests."
+metrix = metrics.EmailMetrics()
+
 
 def createResponseBody(lines, context, client, lang='en'):
     """Parse the **lines** from an incoming email request and determine how to
@@ -424,6 +430,16 @@ class SMTPAutoresponder(smtp.SMTPClient):
         body = createResponseBody(self.incoming.lines,
                                   self.incoming.context,
                                   client, lang)
+
+        # The string EMAIL_MISC_TEXT[1] shows up in an email if BridgeDB
+        # responds with bridges.  Everything else we count as an invalid
+        # request.
+        translator = translations.installTranslations(lang)
+        if body is not None and translator.gettext(strings.EMAIL_MISC_TEXT[1]) in body:
+            metrix.recordValidEmailRequest(self)
+        else:
+            metrix.recordInvalidEmailRequest(self)
+
         if not body: return  # The client was already warned.
 
         messageID = self.incoming.message.getheader("Message-ID", None)
diff --git a/bridgedb/distributors/https/server.py b/bridgedb/distributors/https/server.py
index 8c50bc1..732f8bf 100644
--- a/bridgedb/distributors/https/server.py
+++ b/bridgedb/distributors/https/server.py
@@ -52,6 +52,7 @@ from bridgedb import crypto
 from bridgedb import strings
 from bridgedb import translations
 from bridgedb import txrecaptcha
+from bridgedb import metrics
 from bridgedb.distributors.common.http import setFQDN
 from bridgedb.distributors.common.http import getFQDN
 from bridgedb.distributors.common.http import getClientIP
@@ -85,6 +86,10 @@ logging.debug("Set template root to %s" % TEMPLATE_DIR)
 #: Localisations which BridgeDB supports which should be rendered right-to-left.
 rtl_langs = ('ar', 'he', 'fa', 'gu_IN', 'ku')
 
+# We use our metrics singleton to keep track of BridgeDB metrics such as
+# "number of failed HTTPS bridge requests."
+metrix = metrics.HTTPSMetrics()
+
 
 def replaceErrorPage(request, error, template_name=None, html=True):
     """Create a general error page for displaying in place of tracebacks.
@@ -495,6 +500,7 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
 
         try:
             if self.checkSolution(request) is True:
+                metrix.recordValidHTTPSRequest(request)
                 return self.resource.render(request)
         except ValueError as err:
             logging.debug(err.message)
@@ -504,11 +510,14 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
             # work of art" as pennance for their sins.
             d = task.deferLater(reactor, 1, lambda: request)
             d.addCallback(redirectMaliciousRequest)
+            metrix.recordInvalidHTTPSRequest(request)
             return NOT_DONE_YET
         except Exception as err:
             logging.debug(err.message)
+            metrix.recordInvalidHTTPSRequest(request)
             return replaceErrorPage(request, err)
 
+        metrix.recordInvalidHTTPSRequest(request)
         logging.debug("Client failed a CAPTCHA; returning redirect to %s"
                       % request.uri)
         return redirectTo(request.uri, request)
@@ -764,10 +773,12 @@ class ReCaptchaProtectedResource(CaptchaProtectedResource):
             # breaking). Hence, the 'no cover' pragma.
             if solution.is_valid:  # pragma: no cover
                 logging.info("Valid CAPTCHA solution from %r." % clientIP)
+                metrix.recordValidHTTPSRequest(request)
                 return (True, request)
             else:
                 logging.info("Invalid CAPTCHA solution from %r: %r"
                              % (clientIP, solution.error_code))
+                metrix.recordInvalidHTTPSRequest(request)
                 return (False, request)
 
         d = txrecaptcha.submit(challenge, response, self.secretKey,
diff --git a/bridgedb/distributors/moat/server.py b/bridgedb/distributors/moat/server.py
index 509d471..73d2423 100644
--- a/bridgedb/distributors/moat/server.py
+++ b/bridgedb/distributors/moat/server.py
@@ -38,6 +38,7 @@ from twisted.internet.error import CannotListenError
 from twisted.web import resource
 from twisted.web.server import Site
 
+from bridgedb import metrics
 from bridgedb import captcha
 from bridgedb import crypto
 from bridgedb.distributors.common.http import setFQDN
@@ -49,6 +50,10 @@ from bridgedb.schedule import Unscheduled
 from bridgedb.schedule import ScheduledInterval
 from bridgedb.util import replaceControlChars
 
+# We use our metrics singleton to keep track of BridgeDB metrics such as
+# "number of failed HTTPS bridge requests."
+metrix = metrics.MoatMetrics()
+
 
 #: The current version of the moat JSON API that we speak
 MOAT_API_VERSION = '0.1.0'
@@ -681,6 +686,8 @@ class CaptchaCheckResource(CaptchaResource):
         error = self.checkRequestHeaders(request)
 
         if error:  # pragma: no cover
+            logging.debug("Error while checking moat request headers.")
+            metrix.recordInvalidMoatRequest(request)
             return error.render(request)
 
         data = {
@@ -694,7 +701,11 @@ class CaptchaCheckResource(CaptchaResource):
         }
 
         try:
+            pos = request.content.tell()
             encoded_client_data = request.content.read()
+            # We rewind the stream to its previous position to allow the
+            # metrix module to read the request's content too.
+            request.content.seek(pos)
             client_data = json.loads(encoded_client_data)["data"][0]
             clientIP = self.getClientIP(request)
 
@@ -704,16 +715,19 @@ class CaptchaCheckResource(CaptchaResource):
             valid = self.checkSolution(challenge, solution, clientIP)
         except captcha.CaptchaExpired:
             logging.debug("The challenge had timed out")
+            metrix.recordInvalidMoatRequest(request)
             return self.failureResponse(5, request)
         except Exception as impossible:
             logging.warn("Unhandled exception while processing a POST /fetch request!")
             logging.error(impossible)
+            metrix.recordInvalidMoatRequest(request)
             return self.failureResponse(4, request)
 
         if valid:
             qrcode = None
             bridgeRequest = self.createBridgeRequest(clientIP, client_data)
             bridgeLines = self.getBridgeLines(bridgeRequest)
+            metrix.recordValidMoatRequest(request)
 
             # If we can only return less than the configured
             # MOAT_BRIDGES_PER_ANSWER then log a warning.
@@ -736,6 +750,7 @@ class CaptchaCheckResource(CaptchaResource):
 
             return self.formatDataForResponse(data, request)
         else:
+            metrix.recordInvalidMoatRequest(request)
             return self.failureResponse(4, request)
 
 
diff --git a/bridgedb/main.py b/bridgedb/main.py
index 4d1d38a..6b99127 100644
--- a/bridgedb/main.py
+++ b/bridgedb/main.py
@@ -25,6 +25,7 @@ from bridgedb import persistent
 from bridgedb import proxy
 from bridgedb import runner
 from bridgedb import util
+from bridgedb import metrics
 from bridgedb.bridges import MalformedBridgeInfo
 from bridgedb.bridges import MissingServerDescriptorDigest
 from bridgedb.bridges import ServerDescriptorDigestMismatch
@@ -72,6 +73,22 @@ def writeAssignments(hashring, filename):
     except IOError:
         logging.info("I/O error while writing assignments to: '%s'" % filename)
 
+def writeMetrics(filename, measurementInterval):
+    """Dump usage metrics to disk.
+
+    :param str filename: The filename to write the metrics to.
+    :param int measurementInterval: The number of seconds after which we rotate
+        and dump our metrics.
+    """
+
+    logging.debug("Dumping metrics to file: '%s'" % filename)
+
+    try:
+        with open(filename, 'a') as fh:
+            metrics.export(fh, measurementInterval)
+    except IOError as err:
+        logging.error("Failed to write metrics to '%s': %s" % (filename, err))
+
 def load(state, hashring, clear=False):
     """Read and parse all descriptors, and load into a bridge hashring.
 
@@ -398,6 +415,7 @@ def run(options, reactor=reactor):
         for proxyfile in cfg.PROXY_LIST_FILES:
             logging.info("Loading proxies from: %s" % proxyfile)
             proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
+        metrics.setProxies(proxies)
 
         logging.info("Reparsing bridge descriptors...")
         (hashring,
@@ -463,6 +481,8 @@ def run(options, reactor=reactor):
         if config.EMAIL_DIST and config.EMAIL_SHARE:
             addSMTPServer(config, emailDistributor)
 
+        metrics.setSupportedTransports(config.SUPPORTED_TRANSPORTS)
+
         tasks = {}
 
         # Setup all our repeating tasks:
@@ -483,14 +503,19 @@ def run(options, reactor=reactor):
             runner.cleanupUnparseableDescriptors,
             os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
 
+        measurementInterval, _ = config.TASKS['EXPORT_METRICS']
+        tasks['EXPORT_METRICS'] = task.LoopingCall(
+            writeMetrics, state.METRICS_FILE, measurementInterval)
+
         # Schedule all configured repeating tasks:
-        for name, seconds in config.TASKS.items():
+        for name, value in config.TASKS.items():
+            seconds, startNow = value
             if seconds:
                 try:
                     # Set now to False to get the servers up and running when
                     # first started, rather than spend a bunch of time in
                     # scheduled tasks.
-                    tasks[name].start(abs(seconds), now=False)
+                    tasks[name].start(abs(seconds), now=startNow)
                 except KeyError:
                     logging.info("Task %s is disabled and will not run." % name)
                 else:
diff --git a/bridgedb/metrics.py b/bridgedb/metrics.py
new file mode 100644
index 0000000..4e1c880
--- /dev/null
+++ b/bridgedb/metrics.py
@@ -0,0 +1,461 @@
+# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
+# _____________________________________________________________________________
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: please see included AUTHORS file
+# :copyright: (c) 2019, The Tor Project, Inc.
+#             (c) 2019, Philipp Winter
+# :license: see LICENSE for licensing information
+# _____________________________________________________________________________
+
+"""API for keeping track of BridgeDB statistics, e.g., the demand for bridges
+over time.
+"""
+
+import logging
+import ipaddr
+import operator
+import json
+import datetime
+
+from bridgedb import geo
+from bridgedb.distributors.common.http import getClientIP
+from bridgedb.distributors.email import request
+from bridgedb.distributors.email.distributor import EmailRequestedHelp
+
+from twisted.mail.smtp import Address
+
+# Our data structure to keep track of exit relays.  The variable is of type
+# bridgedb.proxy.ProxySet.  We reserve a special country code (determined by
+# PROXY_CC below) for exit relays and other proxies.
+PROXIES = None
+
+# Our custom country code for IP addresses that we couldn't map to a country.
+# This can happen for private IP addresses or if our geo-location provider has
+# no mapping.
+UNKNOWN_CC = "??"
+
+# Our custom country code for IP addresses that are proxies, e.g., Tor exit
+# relays.  The code "zz" is free for assignment for user needs as specified
+# here: <https://en.wikipedia.org/w/index.php?title=ISO_3166-1_alpha-2&oldid=906611218#Decoding_table>
+PROXY_CC = "ZZ"
+
+# We use BIN_SIZE to reduce the granularity of our counters.  We round up
+# numbers to the next multiple of BIN_SIZE, e.g., 28 is rounded up to:
+# 10 * 3 = 30.
+BIN_SIZE = 10
+
+# The prefix length that we use to keep track of the number of unique subnets
+# we have seen HTTPS requests from.
+SUBNET_CTR_PREFIX_LEN = 20
+
+# All of the pluggable transports BridgeDB currently supports.
+SUPPORTED_TRANSPORTS = None
+
+# Major and minor version number for our statistics format.
+METRICS_MAJOR_VERSION = 1
+METRICS_MINOR_VERSION = 0
+
+
+def setProxies(proxies):
+    """Set the given proxies.
+
+    :type proxies: :class:`~bridgedb.proxy.ProxySet`
+    :param proxies: The container for the IP addresses of any currently
+        known open proxies.
+    """
+    logging.debug("Setting %d proxies." % len(proxies))
+    global PROXIES
+    PROXIES = proxies
+
+
+def setSupportedTransports(supportedTransports):
+    """Set the given supported transports.
+
+    :param dict supportedTransports: The transport types that BridgeDB
+        currently supports.
+    """
+
+    logging.debug("Setting %d supported transports." %
+                  len(supportedTransports))
+    global SUPPORTED_TRANSPORTS
+    SUPPORTED_TRANSPORTS = supportedTransports
+
+
+def isTransportSupported(transport):
+    """Return `True' if the given transport is supported or `False' otherwise.
+
+    :param str transport: The transport protocol.
+    """
+
+    if SUPPORTED_TRANSPORTS is None:
+        logging.error("Bug: Variable SUPPORTED_TRANSPORTS is None.")
+        return False
+
+    return transport in SUPPORTED_TRANSPORTS
+
+
+def export(fh, measurementInterval):
+    """Export metrics by writing them to the given file handle.
+
+    :param file fh: The file handle to which we're writing our metrics.
+    :param int measurementInterval: The number of seconds after which we rotate
+        and dump our metrics.
+    """
+
+    httpsMetrix = HTTPSMetrics()
+    emailMetrix = EmailMetrics()
+    moatMetrix = MoatMetrics()
+
+    # Rotate our metrics.
+    httpsMetrix.rotate()
+    emailMetrix.rotate()
+    moatMetrix.rotate()
+
+    numProxies = len(PROXIES) if PROXIES is not None else 0
+    if numProxies == 0:
+        logging.error("Metrics module doesn't have any proxies.")
+    else:
+        logging.debug("Metrics module knows about %d proxies." % numProxies)
+
+    now = datetime.datetime.utcnow()
+    fh.write("bridgedb-stats-end %s (%d s)\n" % (
+             now.strftime("%Y-%m-%d %H:%M:%S"),
+             measurementInterval))
+    fh.write("bridgedb-stats-version %d.%d\n" % (METRICS_MAJOR_VERSION,
+                                                 METRICS_MINOR_VERSION))
+
+    httpsLines = httpsMetrix.getMetrics()
+    for line in httpsLines:
+        fh.write("bridgedb-metric-count %s\n" % line)
+
+    moatLines = moatMetrix.getMetrics()
+    for line in moatLines:
+        fh.write("bridgedb-metric-count %s\n" % line)
+
+    emailLines = emailMetrix.getMetrics()
+    for line in emailLines:
+        fh.write("bridgedb-metric-count %s\n" % line)
+
+
+def resolveCountryCode(ipAddr):
+    """Return the country code of the given IP address.
+
+    :param str ipAddr: The IP address to resolve.
+
+    :rtype: str
+    :returns: A two-letter country code.
+    """
+
+    if ipAddr is None:
+        logging.warning("Given IP address was None.  Using %s as country "
+                        "code." % UNKNOWN_CC)
+        return UNKNOWN_CC
+
+    if PROXIES is None:
+        logging.warning("Proxies are not yet set.")
+    elif ipAddr in PROXIES:
+        return PROXY_CC
+
+    countryCode = geo.getCountryCode(ipaddr.IPAddress(ipAddr))
+
+    # countryCode may be None if GeoIP is unable to map an IP address to a
+    # country.
+    return UNKNOWN_CC if countryCode is None else countryCode
+
+
+class Singleton(type):
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(Singleton, cls).__call__(*args,
+                                                                 **kwargs)
+        return cls._instances[cls]
+
+    def clear(cls):
+        """Drop the instance (necessary for unit tests)."""
+        try:
+            del cls._instances[cls]
+        except KeyError:
+            pass
+
+
+class Metrics(object):
+    """Base class representing metrics.
+
+    This class provides functionality that our three distribution mechanisms
+    share.
+    """
+
+    # We're using a meta class to implement a singleton for Metrics.
+    __metaclass__ = Singleton
+
+    def __init__(self, binSize=BIN_SIZE):
+        logging.debug("Instantiating metrics class.")
+        self.binSize = binSize
+
+        # Metrics cover a 24 hour period.  To that end, we're maintaining two
+        # data structures: our "hot" metrics are currently being populated
+        # while our "cold" metrics are finished, and valid for 24 hours.  After
+        # that, our hot metrics turn into cold metrics, and we start over.
+        self.hotMetrics = dict()
+        self.coldMetrics = dict()
+
+    def rotate(self):
+        """Rotate our metrics."""
+
+        self.coldMetrics = self.hotMetrics
+        self.hotMetrics = dict()
+
+    def findAnomaly(self, request):
+        anomaly = "none"
+
+        # TODO: Inspect email for traces of bots, Sherlock Homes-style!
+        # See <https://bugs.torproject.org/9316#comment:19> for the rationale.
+        # All classes that inherit from Metrics() should implement this method.
+
+        return anomaly
+
+    def getMetrics(self):
+        """Get our sanitized current metrics, one per line.
+
+        Metrics are of the form:
+
+            [
+             "moat.obfs4.us.success.none 10",
+             "https.vanilla.de.success.none 30",
+             ...
+            ]
+
+        :rtype: list
+        :returns: A list of metric lines.
+        """
+        lines = []
+        for key, value in self.coldMetrics.iteritems():
+            # Round up our value to the nearest multiple of self.binSize to
+            # reduce the accuracy of our real values.
+            if (value % self.binSize) > 0:
+                value += self.binSize - (value % self.binSize)
+            lines.append("%s %d" % (key, value))
+        return lines
+
+    def set(self, key, value):
+        """Set the given key to the given value.
+
+        :param str key: The time series key.
+        :param int value: The time series value.
+        """
+        self.hotMetrics[key] = value
+
+    def inc(self, key):
+        """Increment the given key.
+
+        :param str key: The time series key.
+        """
+        if key in self.hotMetrics:
+            self.hotMetrics[key] += 1
+        else:
+            self.set(key, 1)
+
+    def createKey(self, distMechanism, bridgeType, countryOrProvider,
+                  success, anomaly):
+        """Create and return a time series key.
+
+        :param str distMechanism: A string representing our distribution
+            mechanism, e.g., "https".
+        :param str bridgeType: A string representing the requested bridge
+            type, e.g., "vanilla" or "obfs4".
+        :param str countryOrProvider: A string representing the client's
+            two-letter country code or email provider, e.g., "it" or
+            "yahoo.com".
+        :param bool success: ``True`` if the request was successful and
+            BridgeDB handed out a bridge; ``False`` otherwise.
+        :param str anomaly: ``None`` if the request was not anomalous and hence
+            believed to have come from a real user; otherwise a string
+            representing the type of anomaly.
+        :rtype: str
+        :returns: A key that uniquely identifies the given metrics
+            combinations.
+        """
+
+        countryOrProvider = countryOrProvider.lower()
+        bridgeType = bridgeType.lower()
+        success = "success" if success else "fail"
+
+        key = "%s.%s.%s.%s.%s" % (distMechanism, bridgeType,
+                                  countryOrProvider, success, anomaly)
+
+        return key
+
+
+class HTTPSMetrics(Metrics):
+
+    def __init__(self):
+        super(HTTPSMetrics, self).__init__()
+
+        # Maps subnets (e.g., "1.2.0.0/16") to the number of times we've seen
+        # requests from the given subnet.
+        self.subnetCounter = dict()
+        self.keyPrefix = "https"
+
+    def getTopNSubnets(self, n=10):
+
+        sortedByNum = sorted(self.subnetCounter.items(),
+                             key=operator.itemgetter(1),
+                             reverse=True)
+        return sortedByNum[:n]
+
+    def _recordHTTPSRequest(self, request, success):
+
+        logging.debug("HTTPS request has user agent: %s" %
+                      request.requestHeaders.getRawHeaders("User-Agent"))
+
+        # Pull the client's IP address out of the request and convert it to a
+        # two-letter country code.
+        ipAddr = getClientIP(request,
+                             useForwardedHeader=True,
+                             skipLoopback=False)
+        self.updateSubnetCounter(ipAddr)
+        countryCode = resolveCountryCode(ipAddr)
+
+        transports = request.args.get("transport", list())
+        if len(transports) > 1:
+            logging.warning("Expected a maximum of one transport but %d are "
+                            "given." % len(transports))
+
+        if len(transports) == 0:
+            bridgeType = "vanilla"
+        elif transports[0] == "" or transports[0] == "0":
+            bridgeType = "vanilla"
+        else:
+            bridgeType = transports[0]
+
+        # BridgeDB's HTTPS interface exposes transport types as a drop down
+        # menu but users can still request anything by manipulating HTTP
+        # parameters.
+        if not isTransportSupported(bridgeType):
+            logging.warning("User requested unsupported transport type %s "
+                            "over HTTPS." % bridgeType)
+            return
+
+        logging.debug("Recording %svalid HTTPS request for %s from %s (%s)." %
+                      ("" if success else "in",
+                       bridgeType, ipAddr, countryCode))
+
+        # Now update our metrics.
+        key = self.createKey(self.keyPrefix, bridgeType, countryCode,
+                             success, self.findAnomaly(request))
+        self.inc(key)
+
+    def recordValidHTTPSRequest(self, request):
+        self._recordHTTPSRequest(request, True)
+
+    def recordInvalidHTTPSRequest(self, request):
+        self._recordHTTPSRequest(request, False)
+
+    def updateSubnetCounter(self, ipAddr):
+
+        if ipAddr is None:
+            return
+
+        nw = ipaddr.IPNetwork(ipAddr + "/" + str(SUBNET_CTR_PREFIX_LEN),
+                              strict=False)
+        subnet = nw.network.compressed
+        logging.debug("Updating subnet counter with %s" % subnet)
+
+        num = self.subnetCounter.get(subnet, 0)
+        self.subnetCounter[subnet] = num + 1
+
+
+class EmailMetrics(Metrics):
+
+    def __init__(self):
+        super(EmailMetrics, self).__init__()
+        self.keyPrefix = "email"
+
+    def _recordEmailRequest(self, smtpAutoresp, success):
+
+        emailAddrs = smtpAutoresp.getMailTo()
+        if len(emailAddrs) == 0:
+            # This is just for unit tests.
+            emailAddr = Address("foo at gmail.com")
+        else:
+            emailAddr = emailAddrs[0]
+
+        # Get the requested transport protocol.
+        try:
+            br = request.determineBridgeRequestOptions(
+                    smtpAutoresp.incoming.lines)
+        except EmailRequestedHelp:
+            return
+        bridgeType = "vanilla" if not len(br.transports) else br.transports[0]
+
+        # Over email, transports are requested by typing them.  Typos happen
+        # and users can request anything, really.
+        if not isTransportSupported(bridgeType):
+            logging.warning("User requested unsupported transport type %s "
+                            "over email." % bridgeType)
+            return
+
+        logging.debug("Recording %svalid email request for %s from %s." %
+                      ("" if success else "in", bridgeType, emailAddr))
+        sld = emailAddr.domain.split(".")[0]
+
+        # Now update our metrics.
+        key = self.createKey(self.keyPrefix, bridgeType, sld, success,
+                             self.findAnomaly(request))
+        self.inc(key)
+
+    def recordValidEmailRequest(self, smtpAutoresp):
+        self._recordEmailRequest(smtpAutoresp, True)
+
+    def recordInvalidEmailRequest(self, smtpAutoresp):
+        self._recordEmailRequest(smtpAutoresp, False)
+
+
+class MoatMetrics(Metrics):
+
+    def __init__(self):
+        super(MoatMetrics, self).__init__()
+        self.keyPrefix = "moat"
+
+    def _recordMoatRequest(self, request, success):
+
+        logging.debug("Moat request has user agent: %s" %
+                      request.requestHeaders.getRawHeaders("User-Agent"))
+
+        ipAddr = getClientIP(request,
+                             useForwardedHeader=True,
+                             skipLoopback=False)
+        countryCode = resolveCountryCode(ipAddr)
+
+        try:
+            encodedClientData = request.content.read()
+            clientData = json.loads(encodedClientData)["data"][0]
+            transport = clientData["transport"]
+            bridgeType = "vanilla" if not len(transport) else transport
+        except Exception as err:
+            logging.warning("Could not decode request: %s" % err)
+            return
+
+        if not isTransportSupported(bridgeType):
+            logging.warning("User requested unsupported transport type %s "
+                            "over moat." % bridgeType)
+            return
+
+        logging.debug("Recording %svalid moat request for %s from %s (%s)." %
+                      ("" if success else "in",
+                       bridgeType, ipAddr, countryCode))
+
+        # Now update our metrics.
+        key = self.createKey(self.keyPrefix, bridgeType,
+                             countryCode, success, self.findAnomaly(request))
+        self.inc(key)
+
+    def recordValidMoatRequest(self, request):
+        self._recordMoatRequest(request, True)
+
+    def recordInvalidMoatRequest(self, request):
+        self._recordMoatRequest(request, False)
diff --git a/bridgedb/test/test_metrics.py b/bridgedb/test/test_metrics.py
new file mode 100644
index 0000000..a870fc2
--- /dev/null
+++ b/bridgedb/test/test_metrics.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
+# _____________________________________________________________________________
+#
+# This file is part of BridgeDB, a Tor bridge distribution system.
+#
+# :authors: please see included AUTHORS file
+# :copyright: (c) 2019, The Tor Project, Inc.
+#             (c) 2019, Philipp Winter
+# :license: see LICENSE for licensing information
+# _____________________________________________________________________________
+
+"""Unittests for the :mod:`bridgedb.metrics` module.
+
+These tests are meant to ensure that the :mod:`bridgedb.metrics` module is
+functioning as expected.
+"""
+
+import StringIO
+import json
+import os
+
+from bridgedb import metrics
+from bridgedb.test.https_helpers import DummyRequest
+from bridgedb.distributors.email.server import SMTPMessage
+from bridgedb.test.email_helpers import _createMailServerContext
+from bridgedb.test.email_helpers import _createConfig
+from bridgedb.distributors.moat import server
+
+from twisted.trial import unittest
+from twisted.test import proto_helpers
+
+
+class StateTest(unittest.TestCase):
+
+    def setUp(self):
+        self.topDir = os.getcwd().rstrip('_trial_temp')
+        self.captchaDir = os.path.join(self.topDir, 'captchas')
+
+        # Clear all singletons before each test to prevent cross-test
+        # interference.
+        type(metrics.HTTPSMetrics()).clear()
+        type(metrics.EmailMetrics()).clear()
+        type(metrics.MoatMetrics()).clear()
+
+        metrics.setSupportedTransports({
+            'obfs2': False,
+            'obfs3': True,
+            'obfs4': True,
+            'scramblesuit': True,
+            'fte': True,
+        })
+
+        self.metrix = metrics.HTTPSMetrics()
+        self.key = self.metrix.createKey("https", "obfs4", "de", True, None)
+
+    def test_binning(self):
+
+        key = self.metrix.createKey("https", "obfs4", "de", True, None)
+        self.metrix.coldMetrics = self.metrix.hotMetrics
+
+        # A value of 1 should be rounded up to 10.
+        self.metrix.inc(key)
+        metrixLines = self.metrix.getMetrics()
+        key, value = metrixLines[0].split(" ")
+        self.assertTrue(int(value) == 10)
+
+        # A value of 10 should remain 10.
+        self.metrix.set(key, 10)
+        metrixLines = self.metrix.getMetrics()
+        key, value = metrixLines[0].split(" ")
+        self.assertTrue(int(value) == 10)
+
+        # A value of 11 should be rounded up to 20.
+        self.metrix.inc(key)
+        metrixLines = self.metrix.getMetrics()
+        key, value = metrixLines[0].split(" ")
+        self.assertTrue(int(value) == 20)
+
+    def test_key_manipulation(self):
+
+        self.metrix = metrics.HTTPSMetrics()
+        key = self.metrix.createKey("email", "obfs4", "de", True, "none")
+        self.assertTrue(key == "email.obfs4.de.success.none")
+
+        self.metrix.inc(key)
+        self.assertEqual(self.metrix.hotMetrics[key], 1)
+
+        self.metrix.set(key, 10)
+        self.assertEqual(self.metrix.hotMetrics[key], 10)
+
+    def test_rotation(self):
+
+        key = self.metrix.createKey("moat", "obfs4", "de", True, "none")
+        self.metrix.inc(key)
+        oldHotMetrics = self.metrix.hotMetrics
+        self.metrix.rotate()
+
+        self.assertEqual(len(self.metrix.coldMetrics), 1)
+        self.assertEqual(len(self.metrix.hotMetrics), 0)
+        self.assertEqual(self.metrix.coldMetrics, oldHotMetrics)
+
+    def test_export(self):
+
+        self.metrix.inc(self.key)
+
+        self.metrix.coldMetrics = self.metrix.hotMetrics
+        pseudo_fh = StringIO.StringIO()
+        metrics.export(pseudo_fh, 0)
+
+        self.assertTrue(len(pseudo_fh.getvalue()) > 0)
+
+        lines = pseudo_fh.getvalue().split("\n")
+        self.assertTrue(lines[0].startswith("bridgedb-stats-end"))
+        self.assertTrue(lines[1].startswith("bridgedb-stats-version"))
+        self.assertTrue(lines[2] ==
+                        "bridgedb-metric-count https.obfs4.de.success.None 10")
+
+    def test_https_metrics(self):
+
+        origFunc = metrics.resolveCountryCode
+        metrics.resolveCountryCode = lambda _: "US"
+
+        key1 = "https.obfs4.us.success.none"
+        req1 = DummyRequest([b"bridges?transport=obfs4"])
+        # We have to set the request args manually when using a DummyRequest.
+        req1.args.update({'transport': ['obfs4']})
+        req1.getClientIP = lambda: "3.3.3.3"
+
+        self.metrix.recordValidHTTPSRequest(req1)
+        self.assertTrue(self.metrix.hotMetrics[key1] == 1)
+
+        key2 = "https.obfs4.us.fail.none"
+        req2 = DummyRequest([b"bridges?transport=obfs4"])
+        # We have to set the request args manually when using a DummyRequest.
+        req2.args.update({'transport': ['obfs4']})
+        req2.getClientIP = lambda: "3.3.3.3"
+        self.metrix.recordInvalidHTTPSRequest(req2)
+        self.assertTrue(self.metrix.hotMetrics[key2] == 1)
+
+        metrics.resolveCountryCode = origFunc
+
+    def test_email_metrics(self):
+
+        config = _createConfig()
+        context = _createMailServerContext(config)
+        message = SMTPMessage(context)
+        message.lines = [
+            "From: foo at gmail.com",
+            "To: bridges at torproject.org",
+            "Subject: testing",
+            "",
+            "get transport obfs4",
+        ]
+
+        message.message = message.getIncomingMessage()
+        responder = message.responder
+        tr = proto_helpers.StringTransportWithDisconnection()
+        tr.protocol = responder
+        responder.makeConnection(tr)
+
+        email_metrix = metrics.EmailMetrics()
+
+        key1 = "email.obfs4.gmail.success.none"
+        email_metrix.recordValidEmailRequest(responder)
+        self.assertTrue(email_metrix.hotMetrics[key1] == 1)
+
+        key2 = "email.obfs4.gmail.fail.none"
+        email_metrix.recordInvalidEmailRequest(responder)
+        self.assertTrue(email_metrix.hotMetrics[key2] == 1)
+
+    def test_moat_metrics(self):
+
+        def create_moat_request():
+            encoded_data = json.dumps({
+                'data': [{
+                    'id': '2',
+                    'type': 'moat-solution',
+                    'version': server.MOAT_API_VERSION,
+                    'transport': 'obfs4',
+                    'solution': 'Tvx74PMy',
+                    'qrcode': False,
+                }]
+            })
+
+            request = DummyRequest(["fetch"])
+            request.requestHeaders.addRawHeader('Content-Type',
+                                                'application/vnd.api+json')
+            request.requestHeaders.addRawHeader('Accept',
+                                                'application/vnd.api+json')
+            request.requestHeaders.addRawHeader('X-Forwarded-For', '3.3.3.3')
+            request.headers['X-Forwarded-For'.lower()] = '3.3.3.3'
+            request.method = b'POST'
+            request.writeContent(encoded_data)
+
+            return request
+
+        metrix = metrics.MoatMetrics()
+        metrix.recordValidMoatRequest(create_moat_request())
+        metrix.recordInvalidMoatRequest(create_moat_request())
+
+        key1 = "moat.obfs4.us.success.none"
+        key2 = "moat.obfs4.us.fail.none"
+        self.assertTrue(metrix.hotMetrics[key1] == 1)
+        self.assertTrue(metrix.hotMetrics[key2] == 1)





More information about the tor-commits mailing list