commit 39b6285b28e2826af09bce6a3563f0b1138eac7e Author: Philipp Winter phw@nymity.ch Date: Wed Sep 18 13:42:47 2019 -0700
Add specification for BridgeDB's metrics format.
We implemented BridgeDB's metrics in https://bugs.torproject.org/9316 but haven't specified its format until this patch.
This patch also makes our implementation consistent with our (slightly updated) specification. In particular:
* For naming consistency, we changed "bridgedb-stats-version" to "bridgedb-metrics-version" and "bridgedb-stats-end" to "bridgedb-metrics-end".
* For simplicity, we also changed our version from a major and minor number to a single number.
* Instead of appending to our metrics file, we now overwrite the file because our specification requires "bridgedb-metrics-end" and "bridgedb-metrics-version" to be there exactly once. --- CHANGELOG | 8 +++++ bridgedb/main.py | 2 +- bridgedb/metrics.py | 13 ++++---- bridgedb/test/test_metrics.py | 4 +-- doc/bridgedb-metrics-spec.txt | 74 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG index 06968d2..c2fca89 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +Changes in version A.B.C - YYYY-MM-DD + + * FIXES https://bugs.torproject.org/31780 + We implemented BridgeDB's metrics in #9316 but haven't specified its + format until now. In addition to adding a specification, this patch + also makes our implementation consistent with our (slightly updated) + specification. + Changes in version 0.8.2 - 2019-09-20
Updated translations for the following languages: diff --git a/bridgedb/main.py b/bridgedb/main.py index 94f4921..7c2df6d 100644 --- a/bridgedb/main.py +++ b/bridgedb/main.py @@ -85,7 +85,7 @@ def writeMetrics(filename, measurementInterval): logging.debug("Dumping metrics to file: '%s'" % filename)
try: - with open(filename, 'a') as fh: + with open(filename, 'w') as fh: metrics.export(fh, measurementInterval) except IOError as err: logging.error("Failed to write metrics to '%s': %s" % (filename, err)) diff --git a/bridgedb/metrics.py b/bridgedb/metrics.py index 4e1c880..5e14146 100644 --- a/bridgedb/metrics.py +++ b/bridgedb/metrics.py @@ -9,7 +9,7 @@ # :license: see LICENSE for licensing information # _____________________________________________________________________________
-"""API for keeping track of BridgeDB statistics, e.g., the demand for bridges +"""API for keeping track of BridgeDB metrics, e.g., the demand for bridges over time. """
@@ -53,9 +53,9 @@ SUBNET_CTR_PREFIX_LEN = 20 # All of the pluggable transports BridgeDB currently supports. SUPPORTED_TRANSPORTS = None
-# Major and minor version number for our statistics format. -METRICS_MAJOR_VERSION = 1 -METRICS_MINOR_VERSION = 0 +# Version number for our metrics format. We increment the version if our +# format changes. +METRICS_VERSION = 1
def setProxies(proxies): @@ -120,11 +120,10 @@ def export(fh, measurementInterval): logging.debug("Metrics module knows about %d proxies." % numProxies)
now = datetime.datetime.utcnow() - fh.write("bridgedb-stats-end %s (%d s)\n" % ( + fh.write("bridgedb-metrics-end %s (%d s)\n" % ( now.strftime("%Y-%m-%d %H:%M:%S"), measurementInterval)) - fh.write("bridgedb-stats-version %d.%d\n" % (METRICS_MAJOR_VERSION, - METRICS_MINOR_VERSION)) + fh.write("bridgedb-metrics-version %d\n" % METRICS_VERSION)
httpsLines = httpsMetrix.getMetrics() for line in httpsLines: diff --git a/bridgedb/test/test_metrics.py b/bridgedb/test/test_metrics.py index a870fc2..a27431c 100644 --- a/bridgedb/test/test_metrics.py +++ b/bridgedb/test/test_metrics.py @@ -110,8 +110,8 @@ class StateTest(unittest.TestCase): self.assertTrue(len(pseudo_fh.getvalue()) > 0)
lines = pseudo_fh.getvalue().split("\n") - self.assertTrue(lines[0].startswith("bridgedb-stats-end")) - self.assertTrue(lines[1].startswith("bridgedb-stats-version")) + self.assertTrue(lines[0].startswith("bridgedb-metrics-end")) + self.assertTrue(lines[1].startswith("bridgedb-metrics-version")) self.assertTrue(lines[2] == "bridgedb-metric-count https.obfs4.de.success.None 10")
diff --git a/doc/bridgedb-metrics-spec.txt b/doc/bridgedb-metrics-spec.txt new file mode 100644 index 0000000..14c38f9 --- /dev/null +++ b/doc/bridgedb-metrics-spec.txt @@ -0,0 +1,74 @@ + BridgeDB metrics (version 1) + +BridgeDB exports usage metrics once every 24 hours. These metrics +encode how many approximate successful/failed requests BridgeDB has seen +per distribution mechanism, per pluggable transport, per country code or +email provider. For example, one of these metrics lines can tell us +that over the last 24 hours, BridgeDB has seen between 21 and 30 +successful requests for obfs4 over moat from Zimbabwe. + +This section specifies the format of BridgeDB's metrics. Each metrics +file is formatted as follows: + + "bridgedb-metrics-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At start, exactly once.] + + YYYY-MM-DD HH:MM:SS defines the end (in UTC) of the included + measurement interval of length NSEC seconds (86400 seconds by + default). + + Example: + bridgedb-metrics-end 2019-09-18 00:33:44 (86400 s) + + "bridgedb-metrics-version" VERSION NL + [Exactly once.] + + VERSION determines the version of the metrics format. As the + format changes over time, we will increment VERSION. The latest + version is 1 -- the first iteration of the metrics format. + + Example: + bridgedb-metrics-version 1 + + "bridgedb-metric-count" METRIC_KEY COUNT NL + [Any number.] + + METRIC_KEY determines a metrics key, which consists of several + fields, separated by a period: + + DISTRIBUTION "." TRANSPORT "." CC/EMAIL "." "success" | "fail" "." RESERVED + + DISTRIBUTION is BridgeDB's distribution mechanism, which includes + "https", "email", and "moat". These distribution mechanisms may + change in the future. + + TRANSPORT refers to a pluggable transport protocol. This includes + "obfs2", "obfs3", "obfs4", "scramblesuit", and "fte". These + pluggable transports will change in the future. + + CC/EMAIL refers to a two-letter country code of the user's IP + address iff DISTRIBUTION is "moat" or "https"; or to an email + provider iff DISTRIBUTION is "email". We use two reserved country + codes, "??" and "zz". "??" denotes that we couldn't map an IP + address to its country, e.g., because our geolocation API was + unable to. "zz" denotes a proxy IP address, e.g., Tor exit + relays. The two allowed email providers are "gmail" and "riseup". + + The next field is either "success" or "fail", depending on if the + BridgeDB request was successful or not. A request is successful + if BridgeDB attempts to provide the user with bridges, even if + BridgeDB currently has no bridges available. A request has failed + if BridgeDB won't provide the user with bridges, for example, if + the user could not solve the CAPTCHA. + + The field RESERVED is reserved for an anomaly score. It is + currently set to "none" and should be ignored by implementations. + + COUNT is the approximate number of user requests for the given + METRIC_KEY. We round up the number of requests to the next + multiple of 10 to preserve some user privacy. + + Examples: + bridgedb-metric-count https.scramblesuit.zz.fail.none 100 + bridgedb-metric-count moat.obfs4.??.success.none 3550 + bridgedb-metric-count email.fte.gmail.fail.none 10