commit ce32921680158e8d977f72c16f05de1f9f69cc85 Author: Philipp Winter phw@nymity.ch Date: Mon Dec 2 15:23:25 2019 -0800
Make BridgeDB export internal metrics.
BridgeDB already exports usage metrics that help us understand how our users interact with BridgeDB. This commit adds internal metrics, which shed light on BridgeDB's internals. In particular, we add the following internal metrics:
* Number of bridges per distributor subring.
* Number of empty responses per distributor.
* Number of IPv4/IPv6 handouts.
* Descriptive statistics on the number of bridge handouts (min, max, median, quartile 1, quartile 3, upper whisker, lower whisker).
* Number of unique bridges that were handed out.
This patch fixes tpo/anti-censorship/bridgedb#31422 and also bumps BridgeDB's metrics format to version 2. --- CHANGELOG | 4 + bridgedb/distributors/email/autoresponder.py | 12 +- bridgedb/distributors/https/server.py | 22 ++- bridgedb/distributors/moat/server.py | 47 ++++-- bridgedb/main.py | 17 +++ bridgedb/metrics.py | 199 ++++++++++++++++++++++--- bridgedb/test/test_distributors_moat_server.py | 8 +- bridgedb/test/test_metrics.py | 132 ++++++++++++++++ doc/bridgedb-metrics-spec.txt | 92 ++++++++---- requirements.txt | 1 + 10 files changed, 455 insertions(+), 79 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG index 10012ce..8939ac4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ + * FIXES https://bugs.torproject.org/31422 + Make BridgeDB report internal metrics, like the median number of users that + bridges were handed out to. + * FIXES https://bugs.torproject.org/34260 Parse bridge blocking information from SQL database.
diff --git a/bridgedb/distributors/email/autoresponder.py b/bridgedb/distributors/email/autoresponder.py index 9b0ac53..94a8f75 100644 --- a/bridgedb/distributors/email/autoresponder.py +++ b/bridgedb/distributors/email/autoresponder.py @@ -62,9 +62,10 @@ from bridgedb.parse.addr import canonicalizeEmailDomain from bridgedb.util import levenshteinDistance from bridgedb import translations
-# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.EmailMetrics() +emailMetrix = metrics.EmailMetrics() +internalMetrix = metrics.InternalMetrics()
def createResponseBody(lines, context, client, lang='en'): @@ -113,6 +114,9 @@ def createResponseBody(lines, context, client, lang='en'): transport = bridgeRequest.justOnePTType() answer = "".join(" %s\r\n" % b.getBridgeLine( bridgeRequest, context.includeFingerprints) for b in bridges) + internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges) + else: + internalMetrix.recordEmptyEmailResponse() return templates.buildAnswerMessage(translator, client, answer)
def generateResponse(fromAddress, client, body, subject=None, @@ -396,9 +400,9 @@ class SMTPAutoresponder(smtp.SMTPClient): # request. translator = translations.installTranslations(lang) if body is not None and translator.gettext(strings.EMAIL_MISC_TEXT[1]) in body: - metrix.recordValidEmailRequest(self) + emailMetrix.recordValidEmailRequest(self) else: - metrix.recordInvalidEmailRequest(self) + emailMetrix.recordInvalidEmailRequest(self)
if not body: return # The client was already warned.
diff --git a/bridgedb/distributors/https/server.py b/bridgedb/distributors/https/server.py index 91757e7..fd3fc74 100644 --- a/bridgedb/distributors/https/server.py +++ b/bridgedb/distributors/https/server.py @@ -90,9 +90,10 @@ logging.debug("Set template root to %s" % TEMPLATE_DIR) #: A list of supported language tuples. Use getSortedLangList() to read this variable. supported_langs = []
-# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.HTTPSMetrics() +httpsMetrix = metrics.HTTPSMetrics() +internalMetrix = metrics.InternalMetrics()
def stringifyRequestArgs(args): @@ -574,7 +575,7 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource):
try: if self.checkSolution(request) is True: - metrix.recordValidHTTPSRequest(request) + httpsMetrix.recordValidHTTPSRequest(request) return self.resource.render(request) except ValueError as err: logging.debug(str(err)) @@ -584,14 +585,14 @@ class CaptchaProtectedResource(CustomErrorHandlingResource, CSPResource): # work of art" as pennance for their sins. d = task.deferLater(reactor, 1, lambda: request) d.addCallback(redirectMaliciousRequest) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return NOT_DONE_YET except Exception as err: logging.debug(str(err)) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return replaceErrorPage(request, err)
- metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) logging.debug("Client failed a CAPTCHA; returning redirect to %s" % request.uri) return redirectTo(request.uri, request) @@ -847,12 +848,12 @@ class ReCaptchaProtectedResource(CaptchaProtectedResource): # breaking). Hence, the 'no cover' pragma. if solution.is_valid: # pragma: no cover logging.info("Valid CAPTCHA solution from %r." % clientIP) - metrix.recordValidHTTPSRequest(request) + httpsMetrix.recordValidHTTPSRequest(request) return (True, request) else: logging.info("Invalid CAPTCHA solution from %r: %r" % (clientIP, solution.error_code)) - metrix.recordInvalidHTTPSRequest(request) + httpsMetrix.recordInvalidHTTPSRequest(request) return (False, request)
d = txrecaptcha.submit(challenge, response, self.secretKey, @@ -1000,6 +1001,8 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource): bridgeLines = [replaceControlChars(bridge.getBridgeLine( bridgeRequest, self.includeFingerprints)) for bridge in bridges]
+ internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges) + if antibot.isRequestFromBot(request): transports = bridgeRequest.transports # Return either a decoy bridge or no bridge. @@ -1059,6 +1062,9 @@ class BridgesResource(CustomErrorHandlingResource, CSPResource): rtl = False format = self.getResponseFormat(request)
+ if not bridgeLines: + internalMetrix.recordEmptyHTTPSResponse() + if format == 'plain': request.setHeader("Content-Type", "text/plain") try: diff --git a/bridgedb/distributors/moat/server.py b/bridgedb/distributors/moat/server.py index cdf2022..7e83d94 100644 --- a/bridgedb/distributors/moat/server.py +++ b/bridgedb/distributors/moat/server.py @@ -51,9 +51,10 @@ from bridgedb.schedule import Unscheduled from bridgedb.schedule import ScheduledInterval from bridgedb.util import replaceControlChars
-# We use our metrics singleton to keep track of BridgeDB metrics such as +# We use our metrics singletons to keep track of BridgeDB metrics such as # "number of failed HTTPS bridge requests." -metrix = metrics.MoatMetrics() +moatMetrix = metrics.MoatMetrics() +internalMetrix = metrics.InternalMetrics()
#: The current version of the moat JSON API that we speak @@ -538,26 +539,37 @@ class CaptchaCheckResource(CaptchaResource):
return bridgeRequest
- def getBridgeLines(self, bridgeRequest): - """Get bridge lines for a client's HTTP request. + def getBridges(self, bridgeRequest): + """Get bridges for a client's HTTP request.
:type bridgeRequest: :class:`MoatBridgeRequest` :param bridgeRequest: A valid bridge request object with pre-generated filters (as returned by :meth:`createBridgeRequest`). :rtype: list - :returns: A list of bridge lines. + :return: A list of :class:`~bridgedb.bridges.Bridge`s. """ - bridgeLines = list() + bridges = list() interval = self.schedule.intervalStart(time.time())
logging.debug("Replying to JSON API request from %s." % bridgeRequest.client)
if bridgeRequest.isValid(): bridges = self.distributor.getBridges(bridgeRequest, interval) - bridgeLines = [replaceControlChars(bridge.getBridgeLine(bridgeRequest)) - for bridge in bridges]
- return bridgeLines + return bridges + + def getBridgeLines(self, bridgeRequest, bridges): + """ + :type bridgeRequest: :class:`MoatBridgeRequest` + :param bridgeRequest: A valid bridge request object with pre-generated + filters (as returned by :meth:`createBridgeRequest`). + :param list bridges: A list of :class:`~bridgedb.bridges.Bridge` + objects. + :rtype: list + :return: A list of bridge lines. + """ + return [replaceControlChars(bridge.getBridgeLine(bridgeRequest)) + for bridge in bridges]
def extractClientSolution(self, data): """Extract the client's CAPTCHA solution from a POST request. @@ -686,7 +698,7 @@ class CaptchaCheckResource(CaptchaResource):
if error: # pragma: no cover logging.debug("Error while checking moat request headers.") - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return error.render(request)
data = { @@ -714,25 +726,30 @@ class CaptchaCheckResource(CaptchaResource): valid = self.checkSolution(challenge, solution, clientIP) except captcha.CaptchaExpired: logging.debug("The challenge had timed out") - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(5, request) except Exception as impossible: logging.warn("Unhandled exception while processing a POST /fetch request!") logging.error(impossible) - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(4, request)
if valid: qrcode = None bridgeRequest = self.createBridgeRequest(clientIP, client_data) - bridgeLines = self.getBridgeLines(bridgeRequest) - metrix.recordValidMoatRequest(request) + bridges = self.getBridges(bridgeRequest) + bridgeLines = self.getBridgeLines(bridgeRequest, bridges) + moatMetrix.recordValidMoatRequest(request)
# If we can only return less than the configured # MOAT_BRIDGES_PER_ANSWER then log a warning. if len(bridgeLines) < self.nBridgesToGive: logging.warn(("Not enough bridges of the type specified to " "fulfill the following request: %s") % bridgeRequest) + if not bridgeLines: + internalMetrix.recordEmptyMoatResponse() + else: + internalMetrix.recordHandoutsPerBridge(bridgeRequest, bridges)
if antibot.isRequestFromBot(request): ttype = transport or "vanilla" @@ -754,7 +771,7 @@ class CaptchaCheckResource(CaptchaResource):
return self.formatDataForResponse(data, request) else: - metrix.recordInvalidMoatRequest(request) + moatMetrix.recordInvalidMoatRequest(request) return self.failureResponse(4, request)
diff --git a/bridgedb/main.py b/bridgedb/main.py index 8b851a3..8fdec23 100644 --- a/bridgedb/main.py +++ b/bridgedb/main.py @@ -483,6 +483,23 @@ def run(options, reactor=reactor): else: logging.warn("No Moat distributor created!")
+ metrix = metrics.InternalMetrics() + logging.info("Logging bridge ring metrics for %d rings." % + len(hashring.ringsByName)) + for ringName, ring in hashring.ringsByName.items(): + # Ring is of type FilteredBridgeSplitter or UnallocatedHolder. + # FilteredBridgeSplitter splits bridges into subhashrings based on + # filters. + if hasattr(ring, "filterRings"): + for (ringname, (filterFn, subring)) in ring.filterRings.items(): + subRingName = "-".join(ring.extractFilterNames(ringname)) + metrix.recordBridgesInHashring(ringName, + subRingName, + len(subring)) + elif hasattr(ring, "fingerprints"): + metrix.recordBridgesInHashring(ringName, "unallocated", + len(ring.fingerprints)) + # Dump bridge pool assignments to disk. writeAssignments(hashring, state.ASSIGNMENTS_FILE) state.save() diff --git a/bridgedb/metrics.py b/bridgedb/metrics.py index 48713f0..14073ee 100644 --- a/bridgedb/metrics.py +++ b/bridgedb/metrics.py @@ -18,6 +18,8 @@ import ipaddr import operator import json import datetime +import statistics +import numpy
from bridgedb import geo from bridgedb.distributors.common.http import getClientIP @@ -54,7 +56,7 @@ SUPPORTED_TRANSPORTS = None
# Version number for our metrics format. We increment the version if our # format changes. -METRICS_VERSION = 1 +METRICS_VERSION = 2
def setProxies(proxies): @@ -106,14 +108,14 @@ def export(fh, measurementInterval): and dump our metrics. """
- httpsMetrix = HTTPSMetrics() - emailMetrix = EmailMetrics() - moatMetrix = MoatMetrics() + metrics = [HTTPSMetrics(), + EmailMetrics(), + MoatMetrics(), + InternalMetrics()]
# Rotate our metrics. - httpsMetrix.rotate() - emailMetrix.rotate() - moatMetrix.rotate() + for m in metrics: + m.rotate()
numProxies = len(PROXIES) if PROXIES is not None else 0 if numProxies == 0: @@ -127,17 +129,14 @@ def export(fh, measurementInterval): measurementInterval)) fh.write("bridgedb-metrics-version %d\n" % METRICS_VERSION)
- httpsLines = httpsMetrix.getMetrics() - for line in httpsLines: - fh.write("bridgedb-metric-count %s\n" % line) + for m in metrics: + distLines = m.getMetrics() + for line in distLines: + fh.write("bridgedb-metric-count %s\n" % line) + logging.debug("Writing metrics line to file: %s" % line)
- moatLines = moatMetrix.getMetrics() - for line in moatLines: - fh.write("bridgedb-metric-count %s\n" % line) - - emailLines = emailMetrix.getMetrics() - for line in emailLines: - fh.write("bridgedb-metric-count %s\n" % line) + for m in metrics: + m.reset()
def resolveCountryCode(ipAddr): @@ -200,6 +199,7 @@ class Metrics(metaclass=Singleton): # that, our hot metrics turn into cold metrics, and we start over. self.hotMetrics = dict() self.coldMetrics = dict() + self.unsanitisedSet = set()
def rotate(self): """Rotate our metrics.""" @@ -216,8 +216,15 @@ class Metrics(metaclass=Singleton):
return anomaly
- def getMetrics(self): - """Get our sanitized current metrics, one per line. + def doNotSanitise(self, key): + """ + :param str key: A key that will not be sanitised when exporting our + metrics. + """ + self.unsanitisedSet.add(key) + + def getMetrics(self, sanitized=True): + """Get our (sanitized) current metrics, one per line.
Metrics are of the form:
@@ -227,15 +234,19 @@ class Metrics(metaclass=Singleton): ... ]
+ :param bool sanitized: ``True`` if the metrics must be sanitized. :rtype: list :returns: A list of metric lines. """ lines = [] for key, value in self.coldMetrics.items(): - # Round up our value to the nearest multiple of self.binSize to - # reduce the accuracy of our real values. - if (value % self.binSize) > 0: - value += self.binSize - (value % self.binSize) + + # There's no need to sanitize internal metrics. + if sanitized and not key in self.unsanitisedSet: + # Round up our value to the nearest multiple of self.binSize to + # reduce the accuracy of our real values. + if (value % self.binSize) > 0: + value += self.binSize - (value % self.binSize) lines.append("%s %d" % (key, value)) return lines
@@ -290,6 +301,148 @@ class Metrics(metaclass=Singleton):
return key
+ def reset(self): + """Reset internal variables after a metrics interval.""" + pass + + +class InternalMetrics(Metrics): + + def __init__(self): + super(InternalMetrics, self).__init__() + self.keyPrefix = "internal" + # Maps bridges to the number of time they have been handed out. + self.bridgeHandouts = {} + + # There's no reason for the following metrics to be sanitised. + handoutsPrefix = "{}.handouts".format(self.keyPrefix) + self.doNotSanitise("{}.unique-bridges".format(handoutsPrefix)) + self.doNotSanitise("{}.median".format(handoutsPrefix)) + self.doNotSanitise("{}.min".format(handoutsPrefix)) + self.doNotSanitise("{}.max".format(handoutsPrefix)) + self.doNotSanitise("{}.quartile1".format(handoutsPrefix)) + self.doNotSanitise("{}.quartile3".format(handoutsPrefix)) + self.doNotSanitise("{}.lower-whisker".format(handoutsPrefix)) + self.doNotSanitise("{}.upper-whisker".format(handoutsPrefix)) + + def reset(self): + """Reset bridge handouts after each interval.""" + + # Log the bridge that has seen the most handouts. This helps us + # understand BridgeDB better. + items = self.bridgeHandouts.items() + if len(items): + bridgeLine, num = sorted(items, key=lambda x: x[1], + reverse=True)[0] + logging.debug("Bridge line with most handouts (%d): %s" % + (num, bridgeLine)) + + self.bridgeHandouts = {} + + def _recordEmptyResponse(self, distributor): + """ + Record an empty bridge request response for the given distributor. + + :param str distributor: A bridge distributor, e.g., "https". + """ + self.inc("{}.{}.empty-response".format(self.keyPrefix, distributor)) + + def recordEmptyEmailResponse(self): + self._recordEmptyResponse("email") + + def recordEmptyHTTPSResponse(self): + self._recordEmptyResponse("https") + + def recordEmptyMoatResponse(self): + self._recordEmptyResponse("moat") + + def recordHandoutsPerBridge(self, bridgeRequest, bridges): + """ + Record how often a given bridge was handed out. + + Note that bridges that were not handed out will not be part of these + metrics. + + :type bridgeRequest: :api:`bridgerequest.BridgeRequestBase` + :param bridgeRequest: A bridge request for either one of our + distributors. + :param list bridges: A list of :class:`~bridgedb.Bridges.Bridge`s. + """ + + handoutsPrefix = "{}.handouts".format(self.keyPrefix) + + if bridgeRequest is None or bridges is None: + logging.warning("Given bridgeRequest and bridges cannot be None.") + return + + # Keep track of how many IPv4 and IPv6 requests we are seeing. + ipVersion = bridgeRequest.ipVersion + if ipVersion not in [4, 6]: + logging.warning("Got bridge request for unsupported IP version " + "{}.".format(ipVersion)) + return + else: + self.inc("{}.ipv{}".format(handoutsPrefix, ipVersion)) + + # Keep track of how many times we're handing out a given bridge. + for bridge in bridges: + # Use bridge lines as dictionary key. We cannot use the bridge + # objects because BridgeDB reloads its descriptors every 30 + # minutes, at which points the bridge objects change. + key = bridge.getBridgeLine(bridgeRequest) + num = self.bridgeHandouts.get(key, None) + if num is None: + self.bridgeHandouts[key] = 1 + else: + self.bridgeHandouts[key] = num + 1 + + # We need more than two handouts to calculate our statistics. + values = self.bridgeHandouts.values() + if len(values) <= 2: + return + + # Update our statistics. + self.set("{}.median".format(handoutsPrefix), + statistics.median(values)) + self.set("{}.min".format(handoutsPrefix), min(values)) + self.set("{}.max".format(handoutsPrefix), max(values)) + self.set("{}.unique-bridges".format(handoutsPrefix), + len(self.bridgeHandouts)) + # Python 3.8 comes with a statistics.quantiles function, which we + # should use instead of numpy once 3.8 is available in Debian stable. + q1, q3 = numpy.quantile(numpy.array(list(values)), [0.25, 0.75]) + self.set("{}.quartile1".format(handoutsPrefix), q1) + self.set("{}.quartile3".format(handoutsPrefix), q3) + # Determine our inter-quartile range (the difference between quartile 3 + # and quartile 1) and use it to calculate the upper and lower whiskers + # as you would see them in a boxplot. + iqr = q3 - q1 + lowerWhisker = min([x for x in values if x >= q1 - (1.5 * iqr)]) + upperWhisker = max([x for x in values if x <= q3 + (1.5 * iqr)]) + self.set("{}.lower-whisker".format(handoutsPrefix), lowerWhisker) + self.set("{}.upper-whisker".format(handoutsPrefix), upperWhisker) + + def recordBridgesInHashring(self, ringName, subRingName, numBridges): + """ + Record the number of bridges per hashring. + + :param str ringName: The name of the ring, e.g., "https". + :param str subRingName: The name of the subring, e.g., + "byIPv6-bySubring1of4". + :param int numBridges: The number of bridges in the given subring. + """ + + if not ringName or not subRingName: + logging.warning("Ring name ({}) and subring name ({}) cannot be " + "empty.".format(ringName, subRingName)) + return + + logging.info("Recording metrics for bridge (sub)rings: %s/%s/%d." % + (ringName, subRingName, numBridges)) + # E.g, concatenate "https" with "byipv6-bysubring1of4". + key = "{}.{}.{}".format(self.keyPrefix, ringName, subRingName.lower()) + self.set(key, numBridges) +
class HTTPSMetrics(Metrics):
diff --git a/bridgedb/test/test_distributors_moat_server.py b/bridgedb/test/test_distributors_moat_server.py index 695116b..3d33823 100644 --- a/bridgedb/test/test_distributors_moat_server.py +++ b/bridgedb/test/test_distributors_moat_server.py @@ -586,7 +586,7 @@ class MockCaptchaCheckResource(server.CaptchaCheckResource): """A mocked :class:`server.CaptchaCheckResource` whose ``getBridgeLines`` method returns no bridges. """ - def getBridgeLines(self, bridgeRequest): + def getBridgeLines(self, bridgeRequest, bridges): return list()
@@ -860,7 +860,8 @@ class CaptchaCheckResourceTests(unittest.TestCase): content = json.loads(encoded_content)['data'][0]
bridgeRequest = self.resource.createBridgeRequest('3.3.3.3', content) - bridgelines = self.resource.getBridgeLines(bridgeRequest) + bridges = self.resource.getBridges(bridgeRequest) + bridgelines = self.resource.getBridgeLines(bridgeRequest, bridges)
self.assertTrue(bridgelines)
@@ -869,7 +870,8 @@ class CaptchaCheckResourceTests(unittest.TestCase): request.client = requesthelper.IPv4Address('TCP', '3.3.3.3', 443)
bridgeRequest = self.resource.createBridgeRequest('3.3.3.3', None) - bridgelines = self.resource.getBridgeLines(bridgeRequest) + bridges = self.resource.getBridges(bridgeRequest) + bridgelines = self.resource.getBridgeLines(bridgeRequest, bridges)
self.assertFalse(bridgeRequest.isValid()) self.assertEqual(len(bridgelines), 0) diff --git a/bridgedb/test/test_metrics.py b/bridgedb/test/test_metrics.py index fcc9e4a..12b3ee1 100644 --- a/bridgedb/test/test_metrics.py +++ b/bridgedb/test/test_metrics.py @@ -18,13 +18,17 @@ functioning as expected. import io import json import os +import copy
from bridgedb import metrics +from bridgedb.test import util from bridgedb.test.https_helpers import DummyRequest from bridgedb.distributors.email.server import SMTPMessage +from bridgedb.distributors.https.server import HTTPSBridgeRequest from bridgedb.test.email_helpers import _createMailServerContext from bridgedb.test.email_helpers import _createConfig from bridgedb.distributors.moat import server +from bridgedb.bridges import Bridge
from twisted.trial import unittest from twisted.test import proto_helpers @@ -41,6 +45,7 @@ class StateTest(unittest.TestCase): type(metrics.HTTPSMetrics()).clear() type(metrics.EmailMetrics()).clear() type(metrics.MoatMetrics()).clear() + type(metrics.InternalMetrics()).clear()
metrics.setSupportedTransports({ 'obfs2': False, @@ -213,3 +218,130 @@ class StateTest(unittest.TestCase): self.assertTrue(metrics.isBridgeTypeSupported("obfs4")) self.assertTrue(metrics.isBridgeTypeSupported("vanilla")) self.assertFalse(metrics.isBridgeTypeSupported("xxx")) + + def test_bridge_handouts(self): + + metrix = metrics.InternalMetrics() + bridges = copy.deepcopy(util.generateFakeBridges()) + bridge1, bridge2, bridge3 = bridges[0:3] + m = metrix.hotMetrics + + br = HTTPSBridgeRequest() + br.withIPversion({"ipv6": "4"}) + br.isValid(True) + + # Record a number of distribution events for three separate bridges. + for i in range(10): + metrix.recordHandoutsPerBridge(br, [bridge1]) + for i in range(5): + metrix.recordHandoutsPerBridge(br, [bridge2]) + metrix.recordHandoutsPerBridge(br, [bridge3]) + + self.assertEqual(m["internal.handouts.unique-bridges"], 3) + self.assertEqual(m["internal.handouts.min"], 1) + self.assertEqual(m["internal.handouts.max"], 10) + self.assertEqual(m["internal.handouts.median"], 5) + + # Internal metrics must not be sanitized. + metrix.rotate() + lines = metrix.getMetrics() + self.assertIn("internal.handouts.unique-bridges 3", lines) + self.assertIn("internal.handouts.median 5", lines) + self.assertIn("internal.handouts.min 1", lines) + self.assertIn("internal.handouts.max 10", lines) + + def test_empty_responses(self): + + metrix = metrics.InternalMetrics() + + # Unlike all other internal metrics, empty responses are sanitized. + for i in range(10): + metrix.recordEmptyEmailResponse() + for i in range(11): + metrix.recordEmptyMoatResponse() + metrix.recordEmptyHTTPSResponse() + + metrix.rotate() + lines = metrix.getMetrics() + + self.assertEqual(len(lines), 3) + self.assertIn("internal.email.empty-response 10", lines) + self.assertIn("internal.moat.empty-response 20", lines) + self.assertIn("internal.https.empty-response 10", lines) + + def test_rings(self): + + metrix = metrics.InternalMetrics() + + # Empty parameters must not be recorded. + metrix.recordBridgesInHashring("", "", 20) + self.assertEqual(len(metrix.hotMetrics), 0) + + metrix.recordBridgesInHashring("https", "byIPv6-bySubring1of4", 20) + self.assertEqual(len(metrix.hotMetrics), 1) + self.assertEqual(list(metrix.hotMetrics.keys()), + ["internal.https.byipv6-bysubring1of4"]) + + def test_ipv4_ipv6_requests(self): + + metrix = metrics.InternalMetrics() + v6Req = HTTPSBridgeRequest() + v6Req.withIPversion({"ipv6": "4"}) + v4Req = HTTPSBridgeRequest() + v4Req.withIPversion({}) + + bridges = copy.deepcopy(util.generateFakeBridges()) + + for i in range(9): + metrix.recordHandoutsPerBridge(v6Req, [bridges[0]]) + metrix.recordHandoutsPerBridge(v6Req, [bridges[1]]) + + for i in range(11): + metrix.recordHandoutsPerBridge(v4Req, [bridges[0]]) + + metrix.rotate() + lines = metrix.getMetrics() + + self.assertIn("internal.handouts.ipv6 10", lines) + self.assertIn("internal.handouts.ipv4 20", lines) + + def test_handouts(self): + + metrix = metrics.InternalMetrics() + metrix.recordHandoutsPerBridge(None, None) + self.assertEqual(len(metrix.hotMetrics), 0) + + req = HTTPSBridgeRequest() + req.withIPversion({}) + req.isValid(True) + bridges = copy.deepcopy(util.generateFakeBridges()) + + metrix.recordHandoutsPerBridge(req, [bridges[0]]) + self.assertNotIn("internal.handouts.median", metrix.hotMetrics.keys()) + metrix.recordHandoutsPerBridge(req, [bridges[1]]) + self.assertNotIn("internal.handouts.median", metrix.hotMetrics.keys()) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + self.assertEqual(metrix.hotMetrics["internal.handouts.median"], 1) + + metrix.recordHandoutsPerBridge(req, [bridges[1]]) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + metrix.recordHandoutsPerBridge(req, [bridges[2]]) + self.assertEqual(metrix.hotMetrics["internal.handouts.min"], 1) + self.assertEqual(metrix.hotMetrics["internal.handouts.median"], 2) + self.assertEqual(metrix.hotMetrics["internal.handouts.max"], 3) + self.assertEqual(metrix.hotMetrics["internal.handouts.unique-bridges"], 3) + self.assertEqual(metrix.hotMetrics["internal.handouts.quartile1"], 1.5) + self.assertEqual(metrix.hotMetrics["internal.handouts.quartile3"], 2.5) + self.assertEqual(metrix.hotMetrics["internal.handouts.lower-whisker"], 1) + self.assertEqual(metrix.hotMetrics["internal.handouts.upper-whisker"], 3) + + def test_metrics_reset(self): + metrix = metrics.InternalMetrics() + req = HTTPSBridgeRequest() + req.withIPversion({}) + bridges = copy.deepcopy(util.generateFakeBridges()) + metrix.recordHandoutsPerBridge(req, [bridges[0]]) + + self.assertTrue(len(metrix.bridgeHandouts) > 0) + metrix.reset() + self.assertTrue(len(metrix.bridgeHandouts) == 0) diff --git a/doc/bridgedb-metrics-spec.txt b/doc/bridgedb-metrics-spec.txt index 14c38f9..aac838a 100644 --- a/doc/bridgedb-metrics-spec.txt +++ b/doc/bridgedb-metrics-spec.txt @@ -1,4 +1,4 @@ - BridgeDB metrics (version 1) + BridgeDB metrics (version 2)
BridgeDB exports usage metrics once every 24 hours. These metrics encode how many approximate successful/failed requests BridgeDB has seen @@ -33,42 +33,82 @@ file is formatted as follows: "bridgedb-metric-count" METRIC_KEY COUNT NL [Any number.]
- METRIC_KEY determines a metrics key, which consists of several - fields, separated by a period: - - DISTRIBUTION "." TRANSPORT "." CC/EMAIL "." "success" | "fail" "." RESERVED - - DISTRIBUTION is BridgeDB's distribution mechanism, which includes - "https", "email", and "moat". These distribution mechanisms may - change in the future. + METRIC_KEY specifies a metrics key, which consists of several fields, + separated by a period. These fields form a hierarchy. At the root of + the hierarchy are our three distribution mechanisms ("https", "email", + and "moat") and "internal", which represents BridgeDB-internal metrics. + The hierarchy is of the following form: + + * "https" + └─* TRANSPORT + └─* CC + └─* "success" | "fail" + └─* RESERVED + + * "email" + └─* TRANSPORT + └─* EMAIL + └─* "success" | "fail" + └─* RESERVED + + * "moat" + └─* TRANSPORT + └─* CC + └─* "success" | "fail" + └─* RESERVED + + * "internal" + ├─* "handouts" + │ ├─* "min" + │ ├─* "max" + │ ├─* "median" + │ ├─* "quartile1" + │ ├─* "quartile3" + │ ├─* "lower-whisker" + │ ├─* "upper-whisker" + │ ├─* "ipv4" + │ └─* "ipv6" + │ + └─* TRANSPORT + ├─* "empty-response" + └─* SUBRING + + Strings in between quotes (e.g., "handouts") are literals and show up in + the hierarchy as is. Upper-case strings (e.g., TRANSPORT) are + placeholders, which are explained below.
TRANSPORT refers to a pluggable transport protocol. This includes "obfs2", "obfs3", "obfs4", "scramblesuit", and "fte". These pluggable transports will change in the future.
- CC/EMAIL refers to a two-letter country code of the user's IP - address iff DISTRIBUTION is "moat" or "https"; or to an email - provider iff DISTRIBUTION is "email". We use two reserved country - codes, "??" and "zz". "??" denotes that we couldn't map an IP - address to its country, e.g., because our geolocation API was - unable to. "zz" denotes a proxy IP address, e.g., Tor exit - relays. The two allowed email providers are "gmail" and "riseup". + CC refers to a two-letter country code of the user's IP address. We use + two reserved country codes, "??" and "zz". "??" denotes that we couldn't + map an IP address to its country, e.g., because our geolocation API was + unable to. "zz" denotes a proxy IP address, e.g., Tor exit relays.
- The next field is either "success" or "fail", depending on if the - BridgeDB request was successful or not. A request is successful - if BridgeDB attempts to provide the user with bridges, even if - BridgeDB currently has no bridges available. A request has failed - if BridgeDB won't provide the user with bridges, for example, if - the user could not solve the CAPTCHA. + EMAIL refers to an email provider. The two currently-supported email + providers are "gmail" and "riseup".
- The field RESERVED is reserved for an anomaly score. It is - currently set to "none" and should be ignored by implementations. + The field RESERVED is reserved for an anomaly score. It is currently set + to "none" and should be ignored by implementations.
COUNT is the approximate number of user requests for the given METRIC_KEY. We round up the number of requests to the next - multiple of 10 to preserve some user privacy. + multiple of 10 to preserve some user privacy. Some metrics key are not + rounded up to the next multiple of ten because they are not sensitive. + + One label either takes on the value "success" or "fail", depending on if + the BridgeDB request was successful or not. A request is successful if + BridgeDB attempts to provide the user with bridges, even if BridgeDB + currently has no bridges available. A request has failed if BridgeDB + won't provide the user with bridges, for example, if the user could not + solve the CAPTCHA.
- Examples: + Here are several examples: bridgedb-metric-count https.scramblesuit.zz.fail.none 100 bridgedb-metric-count moat.obfs4.??.success.none 3550 bridgedb-metric-count email.fte.gmail.fail.none 10 + bridgedb-metric-count internal.handouts.ipv4 20 + bridgedb-metric-count internal.moat.empty-response 10 + bridgedb-metric-count internal.handouts.min 23 + bridgedb-metric-count internal.https.byipv6-bysubring1of4 40 diff --git a/requirements.txt b/requirements.txt index fdf1043..08edf9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ qrcode==6.1 service_identity==18.1.0 stem==1.8.0 zope.interface==5.1.0 +numpy==1.18.5
tor-commits@lists.torproject.org