[metrics-web/master] Add bridge descriptors specification.

commit adba0625df154720be6e36c699b336ff588a5179 Author: Karsten Loesing <karsten.loesing@gmx.net> Date: Sat Jul 22 14:48:31 2017 +0200 Add bridge descriptors specification. Implements #22827. --- .../torproject/metrics/web/DataSourceServlet.java | 43 + website/src/main/resources/etc/web.xml | 11 + website/src/main/resources/spec/README.txt | 15 + .../src/main/resources/spec/bridge-descriptors.xml | 700 ++++++++++++++++ website/src/main/resources/spec/convert.awk | 46 ++ website/src/main/resources/spec/convert.sh | 6 + .../resources/web/WEB-INF/bridge-descriptors.jsp | 913 +++++++++++++++++++++ 7 files changed, 1734 insertions(+) diff --git a/website/src/main/java/org/torproject/metrics/web/DataSourceServlet.java b/website/src/main/java/org/torproject/metrics/web/DataSourceServlet.java new file mode 100644 index 0000000..f6605c1 --- /dev/null +++ b/website/src/main/java/org/torproject/metrics/web/DataSourceServlet.java @@ -0,0 +1,43 @@ +/* Copyright 2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.metrics.web; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +public class DataSourceServlet extends AnyServlet { + + private static final long serialVersionUID = -8220752089313109128L; + + private Map<String, String[]> specFiles = new HashMap<>(); + + @Override + public void init() throws ServletException { + super.init(); + this.specFiles.put("/bridge-descriptors.html", + new String[] { "/bridge-descriptors.jsp", "Tor Bridge Descriptors" }); + } + + @Override + public void doGet(HttpServletRequest request, + HttpServletResponse response) throws IOException, ServletException { + String requestedPage = request.getRequestURI(); + for (Map.Entry<String, String[]> specFile : this.specFiles.entrySet()) { + if (requestedPage.endsWith(specFile.getKey())) { + request.setAttribute("categories", this.categories); + request.setAttribute("breadcrumb", specFile.getValue()[1]); + request.getRequestDispatcher("WEB-INF" + specFile.getValue()[0]) + .forward(request, response); + return; + } + } + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } +} + diff --git a/website/src/main/resources/etc/web.xml b/website/src/main/resources/etc/web.xml index d19777f..38a30bb 100644 --- a/website/src/main/resources/etc/web.xml +++ b/website/src/main/resources/etc/web.xml @@ -284,6 +284,17 @@ </servlet-mapping> <servlet> + <servlet-name>DataSourceServlet</servlet-name> + <servlet-class> + org.torproject.metrics.web.DataSourceServlet + </servlet-class> + </servlet> + <servlet-mapping> + <servlet-name>DataSourceServlet</servlet-name> + <url-pattern>/bridge-descriptors.html</url-pattern> + </servlet-mapping> + + <servlet> <servlet-name>CollecTorServlet</servlet-name> <servlet-class> org.torproject.metrics.web.CollecTorServlet diff --git a/website/src/main/resources/spec/README.txt b/website/src/main/resources/spec/README.txt new file mode 100644 index 0000000..652eb7b --- /dev/null +++ b/website/src/main/resources/spec/README.txt @@ -0,0 +1,15 @@ +This directory contains data source specifications. + +The notation used in these documents is still less formal than it could be. +Maybe we can use ABNF which is even supported to some extend by xml2rfc. But +even then we may want to introduce a set of (standard) operations for +binary/string/crypto operations. + +It might be useful to focus on formats that can easily be processed by grammar +based parser generators like ANTLR. We tried that and spent a day or two on +ANTLR, and then gave up, figuring there are lower-hanging fruit on this +specification tree. Maybe later. + +We might even be able to reuse that set in other specifications like +dir-spec.txt or the yet-to-be-written original bridge descriptors specification. + diff --git a/website/src/main/resources/spec/bridge-descriptors.xml b/website/src/main/resources/spec/bridge-descriptors.xml new file mode 100644 index 0000000..f30056f --- /dev/null +++ b/website/src/main/resources/spec/bridge-descriptors.xml @@ -0,0 +1,700 @@ +<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> +<!-- Make this a private "Internet Draft". --> +<?rfc private="sanitized-bridge-descriptors"?> +<!-- Use compact format without horizontal rules between sections. --> +<?rfc compact="yes"?> +<!-- Remove authorship information. --> +<?rfc authorship="no"?> +<!-- Remove index. --> +<?rfc-ext include-index="no" ?> +<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ + <!ENTITY nbsp " "> + <!ENTITY thinsp " "> + <!ENTITY nbhy "‑"> + <!ENTITY ndash "–"> + <!ENTITY mdash "—"> +]> +<rfc xmlns:x="http://purl.org/net/xml2rfc/ext"> + <front> + <title>Tor bridge descriptors</title> + </front> + <middle> + <section title="Purpose of this document"> + <t>Bridges and the bridge authority publish bridge descriptors that are + used by censored clients to connect to the network. + We aim for publishing all network directory data for informational and + statistical purposes. + We cannot, however, make bridge descriptors publicly available in the same + way as we publish relay descriptors, because that would defeat the purpose + of making bridges hard to enumerate for censors. + We therefore sanitize bridge descriptors by removing all potentially + identifying information and then publish sanitized versions.</t> + <t>The purpose of this document is to specify the document format of + sanitized bridge descriptors. + These descriptors are based on original, non-sanitized bridge descriptors + after sanitizing any parts that would make it easier to enumerate + bridges. + Unless stated otherwise, the + <eref target="https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt#n193"> + document meta-format</eref> of the Tor directory protocol, version 3 + is used.</t> + </section> + <section title="Sanitizing potentially sensitive descriptor items"> + <t>The following sanitizing steps are applied to original, non-sanitized + bridge descriptors.</t> + <section anchor="type-annotation" title="Prefix with @type annotation"> + <t>"@type" SP DescriptorType SP Version + <list> + <t>DescriptorType is a fixed string that identifies the sanitized + bridge descriptor type. + Known strings are listed in the sections below.</t> + <t>Version is set by the sanitizer to indicate changes in the + sanitizing process. + The version string consist of a major version number for + backward-incompatible changes and a minor version number for + backward-compatible changes.</t> + </list> + </t> + </section> + <section anchor="fingerprint" title="Replace RSA fingerprints"> + <t>HashedFingerprint = SHA1(Fingerprint) + <list> + <t>Fingerprint is the decoded binary representation of the SHA-1 + hash of an ASN.1 encoded RSA public key.</t> + <t>The (non-sanitized) Fingerprint of a bridge could, in theory, be + used quite easily to uniquely identify the bridge. + However, clients can request a bridge's current descriptor by + sending its Fingerprint to the bridge authority. + This is a feature to make bridges on dynamic IP addresses more + useful, but it would also allow for trivial enumeration of bridges. + Therefore, the original Fingerprint (and anything that could be used + to derive it) is removed from descriptors and replaced with + something else that can be used to uniquely identify the bridge. + The approach taken here is to replace the Fingerprint with its SHA-1 + hash.</t> + </list> + </t> + </section> + <section anchor="master-key-ed25519" title="Replace ed25519 master keys"> + <t>HashedMasterKeyEd25519 = SHA256(MasterKeyEd25519) + <list> + <t>MasterKeyEd25519 is the decoded binary representation of an + ed25519 master key.</t> + <t>Similar to (non-sanitized) <xref target="fingerprint">RSA + fingerprints</xref>, ed25519 master keys could be used to uniquely + identify bridges and to request a current descriptor from the + bridge authority. + That is why they are replaced with their SHA-256 hashes. + In cases where a descriptor only contains an ed25519 certificate and + no ed25519 master key, the (non-sanitized) master key is first + extracted from the certificate before sanitizing it.</t> + </list> + </t> + </section> + <section anchor="crypto" + title="Remove public keys, certificates, and signatures"> + <t>[Removed.] + <list> + <t>Some of the public keys and certificates could be used to derive + key fingerprints, hence they need to be replaced or removed. + However, replacing them seemed unnecessary and overly complex with + respect to keeping state on the sanitizing host. + That is why most public keys, certificates, and signatures are + simply removed in the sanitizing process.</t> + </list> + </t> + </section> + <section anchor="ipv4-address" title="Replace IPv4 addresses"> + <t>SanitizedIpv4Address = "10." | KeyedHash</t> + <t>KeyedHash = SHA256(Ipv4Address | Fingerprint | Secret)[:3] + <list> + <t>Address is the 4-byte long binary representation of the + (non-sanitized) IPv4 address.</t> + <t>Fingerprint is the 20-byte long binary representation of the + (non-sanitized) long-term identity fingerprint.</t> + <t>Secret is a 31-byte long secure random string that changes once + per month for all descriptors and statuses published in that + month.</t> + <t>The [:3] operator picks the first three bytes from the left of the + result and encodes it as three dot-separated decimal numbers. + </t> + <t>Sanitizing IPv4 addresses is obviously required to prevent + enumeration of bridges. + The approach taken is here is to replace IPv4 addresses with + syntactical valid addresses in the private IPv4 address space 10/8 + based on a keyed hash function that produces the same sanitized + address for a given bridge, IPv4 address, and month.</t> + </list> + </t> + </section> + <section anchor="ipv6-address" title="Replace IPv6 addresses"> + <t>SanitizedIpv6Address = "[fd9f:2e19:3bcf::" | KeyedHash | "]"</t> + <t>KeyedHash = SHA256(Ipv6Address | Fingerprint | Secret)[:3] + <list> + <t>Address is the 16-byte long binary representation of the + (non-sanitized) IPv6 address.</t> + <t>Fingerprint is the 20-byte long binary representation of the + (non-sanitized) long-term identity fingerprint.</t> + <t>Secret is a 19-byte long secure random string that changes once + per month for all descriptors and statuses published in that + month.</t> + <t>The [:3] operator picks the first three bytes from the left of the + result and encodes it as two lower-case hexadecimal numbers, a + colon, and another four lower-case hexadecimal numbers. + </t> + <t>Similar to <xref target="ipv4-address">IPv4 addresses</xref>, + IPv6 addresses are replaced with syntactical valid addresses in the + address range [fd9f:2e19:3bcf::/116] based on a keyed hash function + that produces the same sanitized address for a given bridge, IPv6 + address, and month.</t> + </list> + </t> + </section> + <section anchor="tcp-port" title="Replace TCP ports"> + <t>SanitizedPort = KeyedHash / 2^2 + 2^15 + 2^14</t> + <t>KeyedHash = SHA256(Port | Fingerprint | Secret)[:2] + <list> + <t>Port is the 2-byte long binary representation of the TCP + port.</t> + <t>Fingerprint is the 20-byte long binary representation of the + bridge's long-term identity fingerprint.</t> + <t>Secret is a 33-byte long secure random string that changes once + per month for all descriptors and statuses published in that + month.</t> + <t>The [:2] operator means that we pick the first two bytes from the + left of the result, and the /, ^, and + operators are all integer + operators. + </t> + <t>TCP ports that are 0 in the original are left unchanged.</t> + <t>It may be less obvious that TCP ports need to be sanitized, but an + unusual TCP port used by a high-value bridge might still stand out and + provide yet another way to locate and block the bridge. + Therefore, each non-zero TCP port is replaced with a port number in + the range from 49152 to 65535, which is reserved for private services, + based on a keyed hash function that produces the same sanitized port + for a given bridge, TCP port, and month.</t> + </list> + </t> + </section> + <section anchor="contact" title="Remove contact information"> + <t>SanitizedContact = "somebody" + <list> + <t>If there is contact information in a descriptor, it is replaced by + the constant string "somebody". + (Note that this sanitizing step is subject to change and maybe be + changed in a future version towards retaining the original contact + information.)</t> + </list> + </t> + </section> + <section anchor="transport" + title="Remove extraneous transport information"> + <t>[Removed.] + <list> + <t>Bridges may provide transports in addition to the OR protocol and + include information about these transports for the bridge + distribution service. + In that case, any IP addresses, TCP ports, or additional arguments + are removed, only leaving in the supported transport names.</t> + </list> + </t> + </section> + <section anchor="replaced-digest" + title="Replace digests in referencing descriptors"> + <t>SanitizedSha1Digest = SHA1(Sha1Digest)</t> + <t>SanitizedSha256Digest = SHA256(Sha256Digest) + <list> + <t>Sha1Digest is the 20-byte long binary representation of a + descriptor's SHA-1 digest.</t> + <t>Sha256Digest is the 32-byte long binary representation of a + descriptor's SHA-256 digest.</t> + <t>Some descriptors reference other descriptors by their digest. + However, these digests are also somewhat sensitive, because it + shouldn't be possible to reconstruct the original descriptor with + help of these digests. + That is why digests in referencing descriptors are replaced with + either the hex-encoded SHA-1 hash or the base64-encoded SHA-256 hash + of the original digest, depending on which hash algorithm was used + to generate the original digest. + The resulting digest string in a referencing descriptor can then be + matched to an <xref target="appended-digest">appended digest</xref> + in a referenced descriptor.</t> + </list> + </t> + </section> + <section anchor="appended-digest" + title="Append digests to referenced descriptors"> + <t>SanitizedSha1Digest = SHA1(Sha1Digest)</t> + <t>SanitizedSha256Digest = SHA256(Sha256Digest) + <list> + <t>Sha1Digest is the 20-byte long binary representation of a + descriptor's SHA-1 digest.</t> + <t>Sha256Digest is the 32-byte long binary representation of a + descriptor's SHA-256 digest.</t> + <t>As stated <xref target="replaced-digest">above</xref>, some + descriptors are referenced by others by their digest. + But in contrast to non-sanitized descriptors, it's neither possible + to compute the digest of a sanitized descriptor nor is it desirable + to include the original digest. + The reason is that it shouldn't be possible to reconstruct the + original descriptor with help of the original digest. + That is why descriptors that are typically referenced from others + may contain additional lines with the hex-encoded SHA-1 hash or the + base64-encoded SHA-256 hash of the original digest, depending on + which hash algorithm would have been used to generate the original + digest. + The resulting digest string can then be matched to a + <xref target="replaced-digest">sanitized digest</xref> in a + referencing descriptor.</t> + </list> + </t> + </section> + </section> + <section title="Server descriptor document format"> + <t>The document format of sanitized bridge server descriptors resembles + the document format of (non-sanitized) server descriptors as much as + possible. + Also refer to the + <eref target="https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt"> + Tor directory protocol, version 3 specification</eref>, as the following + sections only specify items that differ from their non-sanitized counterparts.</t> + <section title="Annotations"> + <t>The bridge authority may prefix descriptors with one or more + annotation lines containing metadata, and the sanitizer may add + annotation lines with metadata about the sanitizing process.</t> + <t>"@purpose" SP Purpose NL + <list> + <t>[Removed.]</t> + </list> + </t> + <t>"@type" SP "bridge-server-descriptor" SP Version + <list> + <t>[Exactly once.]</t> + <t>Version can be one of the following numbers: + <list style="symbols"> + <t>"1.0" was the first version. + There was supposed to be a newer version indicating added + "ntor-onion-key" lines, but due to a mistake only the version + number of sanitized bridge extra-info descriptors was raised. + As a result, there may be sanitized bridge server descriptors + with version 1.0 with and without those lines.</t> + <t>"1.1" added "master-key-ed25519" and "router-digest-sha256" + lines to server descriptors published by bridges using an + ed25519 master key.</t> + <t>"1.2" introduced sanitized TCP ports.</t> + </list> + </t> + </list> + </t> + </section> + <section title="Descriptor body"> + <t>The body of a sanitized bridge server descriptor contains several + sanitized items as specified in the following:</t> + <t>"router" SP Nickname SP SanitizedAddress SP SanitizedORPort SP + SOCKSPort SP SanitizedDirPort NL + <list> + <t>[At start, exactly once.]</t> + <t>Nickname is the bridge's original, unchanged nickname.</t> + <t>SanitizedAddress is the bridge's + <xref target="ipv4-address">sanitized IP address</xref>.</t> + <t>SanitizedORPort is the bridge's + <xref target="tcp-port">sanitized OR port</xref> (since version 1.2) + or the original OR port (until version 1.1).</t> + <t>SOCKSPort is deprecated and always 0, as in the original, + non-sanitized server descriptor.</t> + <t>SanitizedDirPort is the bridge's + <xref target="tcp-port">sanitized directory port</xref> (since + version 1.2) or the original directory port (until version 1.1).</t> + </list> + </t> + <t>"or-address" SP SanitizedAddress ":" SanitizedPort NL + <list> + <t>[Any number.]</t> + <t>SanitizedAddress is either an additional + <xref target="ipv4-address">sanitized IPv4 address</xref> or + <xref target="ipv6-address">sanitized IPv6 address</xref>.</t> + <t>SanitizedPort is an additional + <xref target="tcp-port">sanitized OR port</xref> (since version 1.2) + or original OR port (until version 1.1).</t> + </list> + </t> + <t>"identity-ed25519" NL CertificateBlock NL + <list> + <t>[Removed.]</t> + </list> + </t> + <t>"master-key-ed25519" SP SanitizedMasterKey NL + <list> + <t>[At most once.]</t> + <t>SanitizedMasterKey is the bridge's + <xref target="master-key-ed25519">sanitized ed25519 master + key</xref>. + (Introduced in version 1.1, not present in earlier versions.)</t> + </list> + </t> + <t>"fingerprint" SP SanitizedFingerprint NL + <list> + <t>[At most once.]</t> + <t>SanitizedFingerprint is the bridge's + <xref target="fingerprint">sanitized fingerprint</xref>, formatted + with a single SP after every 4 characters.</t> + </list> + </t> + <t>"contact" SP SanitizedContact NL + <list> + <t>[At most once.]</t> + <t>SanitizedContact is the bridge's <xref target="contact">sanitized + contact information</xref>.</t> + </list> + </t> + <t>"reject" SP ExitPattern NL + <list> + <t>[Any number.]</t> + <t>ExitPattern contains the bridge's + <xref target="ipv4-address">sanitized IPv4 address</xref>, if the + original line contained the bridge's primary IP address, and is + otherwise unchanged. + (Note that "accept" lines are exempt from this sanitizing step, + which doesn't really make sense and which might change in the + future.)</t> + </list> + </t> + <t>"extra-info-digest" SP SanitizedSha1Digest [SP + SanitizedSha256Digest] NL + <list> + <t>[At most once.]</t> + <t>SanitizedSha1Digest is the + <xref target="replaced-digest">sanitized SHA-1 digest</xref> of the + corresponding extra-info descriptor.</t> + <t>SanitizedSha256Digest is the + <xref target="replaced-digest">sanitized SHA-256 digest</xref> of + corresponding extra-info descriptor and is only included if the + original line contained a SHA-256 digest.</t> + </list> + </t> + <t>"family" (SP Name)* NL + <list> + <t>[At most once.]</t> + <t>Name is either the <xref target="fingerprint">sanitized + fingerprint</xref> or unchanged nickname of another relay or bridge + that is purportedly in the same family as this bridge.</t> + </list> + </t> + <t>"onion-key" NL PublicKeyBlock NL + <list> + <t>[Removed.]</t> + <t>The bridge's medium-term RSA key is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"signing-key" NL PublicKeyBlock NL + <list> + <t>[Removed.]</t> + <t>The bridge's long-term RSA key is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"onion-key-crosscert" NL SignatureBlock NL + <list> + <t>[Removed.]</t> + <t>This cross signature created with the onion-key is removed together + with most other <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"ntor-onion-key-crosscert" SP PublicKey NL + <list> + <t>[Removed.]</t> + <t>This cross signature created with the ntor-onion-key is removed + together with most other <xref target="crypto">public keys, + certificates, and signatures</xref>.</t> + </list> + </t> + <t>"ntor-onion-key" SP NtorOnionKey NL + <list> + <t>[At most once.]</t> + <t>The curve25519 public key used for the ntor circuit extended + handshake is included without modification in most sanitized + descriptors. + This key was originally missing in version 1.0, and there was + supposed to be a newer version indicating added "ntor-onion-key" + lines. + But due to a mistake only the version number of sanitized bridge + extra-info descriptors was raised. + As a result, there are sanitized bridge server descriptors with + version 1.0 with and without this line. + All subsequent versions contain this line</t> + </list> + </t> + <t>"router-sig-ed25519" SP Signature NL + <list> + <t>[Removed.]</t> + <t>The ed25519 signature is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"router-signature" NL SignatureBlock NL + <list> + <t>[Removed.]</t> + <t>The RSA signature is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"router-digest-sha256" SP SanitizedSha256Digest NL + <list> + <t>[At most once.]</t> + <t>SanitizedSha256Digest is the + <xref target="appended-digest">sanitized SHA-256 digest</xref> of + this descriptor and is only included if the original descriptor + contained an ed25519 signature of the descriptor's SHA-256 digest. + (Introduced in version 1.1, not present in earlier versions.)</t> + </list> + </t> + <t>"router-digest" SP SanitizedSha1Digest NL + <list> + <t>[At end, exactly once.]</t> + <t>SanitizedSha1Digest is the + <xref target="appended-digest">sanitized SHA-1 digest</xref> of + this descriptor.</t> + </list> + </t> + </section> + </section> + <section title="Extra-info descriptor document format"> + <t>The document format of sanitized extra-info descriptors follows the + same approach as sanitized server descriptors by changing as few items as + possible in their original, non-sanitized counterpart. + The original format is specified in the + <eref target="https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt">Tor + directory protocol, version 3</eref>. + Only the changes to that specification are listed below.</t> + <section title="Annotations"> + <t>"@type" SP "bridge-extra-info" SP Version + <list> + <t>[Exactly once.]</t> + <t>Version can be one of the following numbers: + <list style="symbols"> + <t>"1.0" was the first version.</t> + <t>"1.1" added sanitized "transport" lines.</t> + <t>"1.2" was supposed to indicate added "ntor-onion-key" lines, + but those changes only affected bridge server descriptors, not + extra-info descriptors. + So, nothing has changed as compared to version 1.1.</t> + <t>"1.3" added "master-key-ed25519" and "router-digest-sha256" + lines to extra-info descriptors published by bridges using an + ed25519 master key.</t> + </list> + </t> + </list> + </t> + </section> + <section title="Descriptor body"> + <t>Several items in the extra-info descriptor body are changed or + removed as compared to original, non-sanitized descriptors:</t> + <t>"extra-info" SP Nickname SP SanitizedFingerprint NL + <list> + <t>[At start, exactly once.]</t> + <t>Nickname is the bridge's original, unchanged nickname.</t> + <t>SanitizedFingerprint is the bridge's + <xref target="fingerprint">sanitized fingerprint</xref>.</t> + </list> + </t> + <t>"transport" SP TransportName NL + <list> + <t>[Any number.]</t> + <t>TransportName is the transport name as found in the original + line.</t> + <t>Any further details about this + <xref target="transport">transport</xref>, including any IP + addresses, TCP ports, or additional arguments are removed, only + leaving in the supported transport names. + (Introduced in version 1.1, not present in earlier versions.)</t> + </list> + </t> + <t>"transport-info SP TransportInfo NL + <list> + <t>[Removed.]</t> + <t>Any lines containing <xref target="transport">extraneous transport + information</xref> are removed. + (Note that these lines are not even specified for original, + non-sanitized descriptors.)</t> + </list> + </t> + <t>"identity-ed25519" NL CertificateBlock NL + <list> + <t>[Removed.]</t> + <t>The RSA signature is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"master-key-ed25519" SP SanitizedMasterKey NL + <list> + <t>[At most once.]</t> + <t>SanitizedMasterKey is the bridge's + <xref target="master-key-ed25519">sanitized ed25519 master + key</xref>. + (Introduced in version 1.3, not present in earlier versions.)</t> + </list> + </t> + <t>"router-sig-ed25519" SP Signature NL + <list> + <t>[Removed.]</t> + <t>The ed25519 signature is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"router-signature" NL SignatureBlock NL + <list> + <t>[Removed.]</t> + <t>The RSA signature is removed together with most other + <xref target="crypto">public keys, certificates, and + signatures</xref>.</t> + </list> + </t> + <t>"router-digest-sha256" SP SanitizedSha256Digest NL + <list> + <t>[At most once.]</t> + <t>SanitizedSha256Digest is the + <xref target="appended-digest">sanitized SHA-256 digest</xref> of + this descriptor and is only included if the original descriptor + contained an ed25519 signature of the descriptor's SHA-256 digest. + (Introduced in version 1.3, not present in earlier versions.)</t> + </list> + </t> + <t>"router-digest" SP SanitizedSha1Digest NL + <list> + <t>[At end, exactly once.]</t> + <t>SanitizedSha1Digest is the + <xref target="appended-digest">sanitized SHA-1 digest</xref> of + this descriptor.</t> + </list> + </t> + </section> + </section> + <section title="Network status document format"> + <t>The document format of bridge network statuses is loosely based on the + network status format specified in the + <eref target="https://gitweb.torproject.org/torspec.git/tree/attic/dir-spec-v2.txt">Tor + directory protocol, version 2</eref>. + However, the preamble of bridge network statuses contains far fewer items + than that of (relay) network statuses, and the ones that are similar + differ in some of the details. + That's why all preamble lines that exist in sanitized bridge network + statuses are specified below, not just the ones that differ.</t> + <section title="Annotations"> + <t>Sanitized bridge network statuses start with one or more + annotations:</t> + <t>"@type" SP "bridge-network-status" SP Version NL + <list> + <t>[Exactly once.]</t> + <t>Version can be one of the following numbers: + <list style="symbols"> + <t>"1.0" was the first version.</t> + <t>"1.1" introduced sanitized TCP ports.</t> + <t>"1.2" introduced the "fingerprint" line, containing the + fingerprint of the bridge authority which produced the document, + to the header.</t> + </list> + </t> + </list> + </t> + </section> + <section title="Preamble"> + <t>The preamble contains zero or more of the following items in no + predefined order:</t> + <t>"published" SP Publication NL + <list> + <t>[Exactly once.]</t> + <t>Publication is the publication time for this document, which is + left unchanged in the sanitizing process.</t> + </list> + </t> + <t>"flag-thresholds" SP Thresholds NL + <list> + <t>[At most once.]</t> + <t>Thresholds are internal performance thresholds that the bridge + directory authority had at the moment it was forming a status, which + are left unchanged in the sanitizing process. + This item was first introduced in + <eref target="https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt">Tor + directory protocol, version 3</eref>.</t> + </list> + </t> + <t>"fingerprint" SP Fingerprint NL; + <list> + <t>[At most once.]</t> + <t>Fingerprint is the (non-sanitized) SHA-1 hash of the bridge + authority's long-term signing key, encoded as 40 upper-case + hexadecimal characters, which is either added or left unchanged in + the sanitizing process. + (Introduced in version 1.2, not present in earlier versions.)</t> + </list> + </t> + </section> + <section title="Router entries"> + <t>For each bridge, there is one router entry containing one or more + items. + Similar to the preamble specification, the following specification + lists all lines known in sanitized bridge network statuses, including + those that are left unchanged in the sanitizing process.</t> + <t>"r" SP Nickname SP SanitizedFingerprint SP SanitizedSha1Digest SP + Publication SP SanitizedAddress SP SanitizedORPort SP SanitizedDirPort + NL + <list> + <t>[At start, exactly once.]</t> + <t>Nickname is the bridge's original, unchanged nickname.</t> + <t>SanitizedFingerprint is the bridge's + <xref target="fingerprint">sanitized fingerprint</xref>.</t> + <t>SanitizedSha1Digest is the + <xref target="replaced-digest">sanitized SHA-1 digest</xref> of + the corresponding server descriptor.</t> + <t>Publication is the publication time for the corresponding + server descriptor, which is left unchanged in the sanitizing + process.</t> + <t>SanitizedAddress is the bridge's + <xref target="ipv4-address">sanitized IP address</xref>.</t> + <t>SanitizedORPort is the bridge's + <xref target="tcp-port">sanitized OR port</xref> (since version 1.1) + or the original OR port (until version 1.0).</t> + <t>SanitizedDirPort is the bridge's + <xref target="tcp-port">sanitized directory port</xref> (since + version 1.1) or the original directory port (until version 1.0).</t> + </list> + </t> + <t>"a" SP SanitizedAddress ":" SanitizedPort NL + <list> + <t>[Any number.]</t> + <t>SanitizedAddress is either an additional + <xref target="ipv4-address">sanitized IPv4 address</xref> or + <xref target="ipv6-address">sanitized IPv6 address</xref>.</t> + <t>SanitizedPort is an additional <xref target="tcp-port">sanitized + OR port</xref>.</t> + </list> + </t> + <t>"s" ... NL + <list> + <t>[Unchanged.]</t> + </list> + </t> + <t>"w" ... NL + <list> + <t>[Unchanged.]</t> + </list> + </t> + <t>"p" ... NL + <list> + <t>[Unchanged.]</t> + </list> + </t> + </section> + </section> + </middle> + <back/> +</rfc> + diff --git a/website/src/main/resources/spec/convert.awk b/website/src/main/resources/spec/convert.awk new file mode 100644 index 0000000..ab26121 --- /dev/null +++ b/website/src/main/resources/spec/convert.awk @@ -0,0 +1,46 @@ +#!/usr/bin/awk -f + +# Skip everything before <body> including generated header, script, and style. +BEGIN { + insidebody = 0; +} + +# Skip any lines after </body>, and include our footer. +/<\/body>/ { + insidebody = 2 + printf("<jsp:include page=\"bottom.jsp\"/>\n") +} + +# Prepend <header> and <section> with <div class="container">. +/<(header|section).*>/ { + printf("<div class=\"container\">\n") +} + +# Copy over all lines between <body> and </body> (exclusive). +{ + if (insidebody == 1) { + print + } +} + +# Append </div> to </header> and </section>. +/<\/(header|section)>/ { + printf("</div> <!-- container -->\n") +} + +# Start copying at <body>, but first include our header. +/<body>/ { + insidebody = 1; + printf("<jsp:include page=\"top.jsp\">\n") + printf("<jsp:param name=\"pageTitle\" value=\"Sources – Tor Metrics\"/>\n") + printf("<jsp:param name=\"navActive\" value=\"Sources\"/>\n") + printf("</jsp:include>\n") + printf("<div class=\"container\">\n") + printf("<ul class=\"breadcrumb\">\n") + printf("<li><a href=\"/\">Home</a></li>\n") + printf("<li><a href=\"sources.html\">Sources</a></li>\n") + printf("<li class=\"active\">${breadcrumb}</li>\n") + printf("</ul>\n") + printf("</div>\n") +} + diff --git a/website/src/main/resources/spec/convert.sh b/website/src/main/resources/spec/convert.sh new file mode 100755 index 0000000..5b32f9a --- /dev/null +++ b/website/src/main/resources/spec/convert.sh @@ -0,0 +1,6 @@ +#!/bin/bash +for specfile in "bridge-descriptors"; do + saxon-xslt $specfile.xml rfc2629.xslt xml2rfc-topblock=no | \ + tidy -q | awk -f convert.awk > ../web/WEB-INF/$specfile.jsp +done + diff --git a/website/src/main/resources/web/WEB-INF/bridge-descriptors.jsp b/website/src/main/resources/web/WEB-INF/bridge-descriptors.jsp new file mode 100644 index 0000000..db7aaa2 --- /dev/null +++ b/website/src/main/resources/web/WEB-INF/bridge-descriptors.jsp @@ -0,0 +1,913 @@ +<jsp:include page="top.jsp"> +<jsp:param name="pageTitle" value="Sources – Tor Metrics"/> +<jsp:param name="navActive" value="Sources"/> +</jsp:include> +<div class="container"> +<ul class="breadcrumb"> +<li><a href="/">Home</a></li> +<li><a href="sources.html">Sources</a></li> +<li class="active">${breadcrumb}</li> +</ul> +</div> +<div class="container"> +<header> +<div id="rfc.title"> +<h1>Tor bridge descriptors</h1> +</div> +</header> +</div> <!-- container --> +<div class="container"> +<section id="n-purpose-of-this-document"> +<h2 id="rfc.section.1" class="np"><a href= +"#rfc.section.1">1.</a> <a href= +"#n-purpose-of-this-document">Purpose of this document</a></h2> +<div id="rfc.section.1.p.1"> +<p>Bridges and the bridge authority publish bridge descriptors that +are used by censored clients to connect to the network. We aim for +publishing all network directory data for informational and +statistical purposes. We cannot, however, make bridge descriptors +publicly available in the same way as we publish relay descriptors, +because that would defeat the purpose of making bridges hard to +enumerate for censors. We therefore sanitize bridge descriptors by +removing all potentially identifying information and then publish +sanitized versions.</p> +</div> +<div id="rfc.section.1.p.2"> +<p>The purpose of this document is to specify the document format +of sanitized bridge descriptors. These descriptors are based on +original, non-sanitized bridge descriptors after sanitizing any +parts that would make it easier to enumerate bridges. Unless stated +otherwise, the <a href= +"https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt#n193">document +meta-format</a> of the Tor directory protocol, version 3 is +used.</p> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-sanitizing-potentially-sensitive-descriptor-items"> +<h2 id="rfc.section.2"><a href= +"#rfc.section.2">2.</a> <a href="#n-sanitizing-potentially-sensitive-descriptor-items">Sanitizing +potentially sensitive descriptor items</a></h2> +<div id="rfc.section.2.p.1"> +<p>The following sanitizing steps are applied to original, +non-sanitized bridge descriptors.</p> +</div> +<div class="container"> +<section id="type-annotation"> +<h3 id="rfc.section.2.1"><a href= +"#rfc.section.2.1">2.1.</a> <a href="#type-annotation">Prefix +with @type annotation</a></h3> +<div id="rfc.section.2.1.p.1"> +<p>"@type" SP DescriptorType SP Version</p> +<ul class="empty"> +<li>DescriptorType is a fixed string that identifies the sanitized +bridge descriptor type. Known strings are listed in the sections +below.</li> +<li>Version is set by the sanitizer to indicate changes in the +sanitizing process. The version string consist of a major version +number for backward-incompatible changes and a minor version number +for backward-compatible changes.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="fingerprint"> +<h3 id="rfc.section.2.2"><a href= +"#rfc.section.2.2">2.2.</a> <a href="#fingerprint">Replace RSA +fingerprints</a></h3> +<div id="rfc.section.2.2.p.1"> +<p>HashedFingerprint = SHA1(Fingerprint)</p> +<ul class="empty"> +<li>Fingerprint is the decoded binary representation of the SHA-1 +hash of an ASN.1 encoded RSA public key.</li> +<li>The (non-sanitized) Fingerprint of a bridge could, in theory, +be used quite easily to uniquely identify the bridge. However, +clients can request a bridge's current descriptor by sending its +Fingerprint to the bridge authority. This is a feature to make +bridges on dynamic IP addresses more useful, but it would also +allow for trivial enumeration of bridges. Therefore, the original +Fingerprint (and anything that could be used to derive it) is +removed from descriptors and replaced with something else that can +be used to uniquely identify the bridge. The approach taken here is +to replace the Fingerprint with its SHA-1 hash.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="master-key-ed25519"> +<h3 id="rfc.section.2.3"><a href= +"#rfc.section.2.3">2.3.</a> <a href= +"#master-key-ed25519">Replace ed25519 master keys</a></h3> +<div id="rfc.section.2.3.p.1"> +<p>HashedMasterKeyEd25519 = SHA256(MasterKeyEd25519)</p> +<ul class="empty"> +<li>MasterKeyEd25519 is the decoded binary representation of an +ed25519 master key.</li> +<li>Similar to (non-sanitized) RSA fingerprints (<a href= +"#fingerprint" title= +"Replace RSA fingerprints">Section 2.2</a>), ed25519 master +keys could be used to uniquely identify bridges and to request a +current descriptor from the bridge authority. That is why they are +replaced with their SHA-256 hashes. In cases where a descriptor +only contains an ed25519 certificate and no ed25519 master key, the +(non-sanitized) master key is first extracted from the certificate +before sanitizing it.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="crypto"> +<h3 id="rfc.section.2.4"><a href= +"#rfc.section.2.4">2.4.</a> <a href="#crypto">Remove public +keys, certificates, and signatures</a></h3> +<div id="rfc.section.2.4.p.1"> +<p>[Removed.]</p> +<ul class="empty"> +<li>Some of the public keys and certificates could be used to +derive key fingerprints, hence they need to be replaced or removed. +However, replacing them seemed unnecessary and overly complex with +respect to keeping state on the sanitizing host. That is why most +public keys, certificates, and signatures are simply removed in the +sanitizing process.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="ipv4-address"> +<h3 id="rfc.section.2.5"><a href= +"#rfc.section.2.5">2.5.</a> <a href="#ipv4-address">Replace +IPv4 addresses</a></h3> +<div id="rfc.section.2.5.p.1"> +<p>SanitizedIpv4Address = "10." | KeyedHash</p> +</div> +<div id="rfc.section.2.5.p.2"> +<p>KeyedHash = SHA256(Ipv4Address | Fingerprint | Secret)[:3]</p> +<ul class="empty"> +<li>Address is the 4-byte long binary representation of the +(non-sanitized) IPv4 address.</li> +<li>Fingerprint is the 20-byte long binary representation of the +(non-sanitized) long-term identity fingerprint.</li> +<li>Secret is a 31-byte long secure random string that changes once +per month for all descriptors and statuses published in that +month.</li> +<li>The [:3] operator picks the first three bytes from the left of +the result and encodes it as three dot-separated decimal +numbers.</li> +<li>Sanitizing IPv4 addresses is obviously required to prevent +enumeration of bridges. The approach taken is here is to replace +IPv4 addresses with syntactical valid addresses in the private IPv4 +address space 10/8 based on a keyed hash function that produces the +same sanitized address for a given bridge, IPv4 address, and +month.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="ipv6-address"> +<h3 id="rfc.section.2.6"><a href= +"#rfc.section.2.6">2.6.</a> <a href="#ipv6-address">Replace +IPv6 addresses</a></h3> +<div id="rfc.section.2.6.p.1"> +<p>SanitizedIpv6Address = "[fd9f:2e19:3bcf::" | KeyedHash | "]"</p> +</div> +<div id="rfc.section.2.6.p.2"> +<p>KeyedHash = SHA256(Ipv6Address | Fingerprint | Secret)[:3]</p> +<ul class="empty"> +<li>Address is the 16-byte long binary representation of the +(non-sanitized) IPv6 address.</li> +<li>Fingerprint is the 20-byte long binary representation of the +(non-sanitized) long-term identity fingerprint.</li> +<li>Secret is a 19-byte long secure random string that changes once +per month for all descriptors and statuses published in that +month.</li> +<li>The [:3] operator picks the first three bytes from the left of +the result and encodes it as two lower-case hexadecimal numbers, a +colon, and another four lower-case hexadecimal numbers.</li> +<li>Similar to IPv4 addresses (<a href="#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>), IPv6 addresses are +replaced with syntactical valid addresses in the address range +[fd9f:2e19:3bcf::/116] based on a keyed hash function that produces +the same sanitized address for a given bridge, IPv6 address, and +month.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="tcp-port"> +<h3 id="rfc.section.2.7"><a href= +"#rfc.section.2.7">2.7.</a> <a href="#tcp-port">Replace TCP +ports</a></h3> +<div id="rfc.section.2.7.p.1"> +<p>SanitizedPort = KeyedHash / 2^2 + 2^15 + 2^14</p> +</div> +<div id="rfc.section.2.7.p.2"> +<p>KeyedHash = SHA256(Port | Fingerprint | Secret)[:2]</p> +<ul class="empty"> +<li>Port is the 2-byte long binary representation of the TCP +port.</li> +<li>Fingerprint is the 20-byte long binary representation of the +bridge's long-term identity fingerprint.</li> +<li>Secret is a 33-byte long secure random string that changes once +per month for all descriptors and statuses published in that +month.</li> +<li>The [:2] operator means that we pick the first two bytes from +the left of the result, and the /, ^, and + operators are all +integer operators.</li> +<li>TCP ports that are 0 in the original are left unchanged.</li> +<li>It may be less obvious that TCP ports need to be sanitized, but +an unusual TCP port used by a high-value bridge might still stand +out and provide yet another way to locate and block the bridge. +Therefore, each non-zero TCP port is replaced with a port number in +the range from 49152 to 65535, which is reserved for private +services, based on a keyed hash function that produces the same +sanitized port for a given bridge, TCP port, and month.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="contact"> +<h3 id="rfc.section.2.8"><a href= +"#rfc.section.2.8">2.8.</a> <a href="#contact">Remove contact +information</a></h3> +<div id="rfc.section.2.8.p.1"> +<p>SanitizedContact = "somebody"</p> +<ul class="empty"> +<li>If there is contact information in a descriptor, it is replaced +by the constant string "somebody". (Note that this sanitizing step +is subject to change and maybe be changed in a future version +towards retaining the original contact information.)</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="transport"> +<h3 id="rfc.section.2.9"><a href= +"#rfc.section.2.9">2.9.</a> <a href="#transport">Remove +extraneous transport information</a></h3> +<div id="rfc.section.2.9.p.1"> +<p>[Removed.]</p> +<ul class="empty"> +<li>Bridges may provide transports in addition to the OR protocol +and include information about these transports for the bridge +distribution service. In that case, any IP addresses, TCP ports, or +additional arguments are removed, only leaving in the supported +transport names.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="replaced-digest"> +<h3 id="rfc.section.2.10"><a href= +"#rfc.section.2.10">2.10.</a> <a href= +"#replaced-digest">Replace digests in referencing +descriptors</a></h3> +<div id="rfc.section.2.10.p.1"> +<p>SanitizedSha1Digest = SHA1(Sha1Digest)</p> +</div> +<div id="rfc.section.2.10.p.2"> +<p>SanitizedSha256Digest = SHA256(Sha256Digest)</p> +<ul class="empty"> +<li>Sha1Digest is the 20-byte long binary representation of a +descriptor's SHA-1 digest.</li> +<li>Sha256Digest is the 32-byte long binary representation of a +descriptor's SHA-256 digest.</li> +<li>Some descriptors reference other descriptors by their digest. +However, these digests are also somewhat sensitive, because it +shouldn't be possible to reconstruct the original descriptor with +help of these digests. That is why digests in referencing +descriptors are replaced with either the hex-encoded SHA-1 hash or +the base64-encoded SHA-256 hash of the original digest, depending +on which hash algorithm was used to generate the original digest. +The resulting digest string in a referencing descriptor can then be +matched to an appended digest (<a href="#appended-digest" title= +"Append digests to referenced descriptors">Section 2.11</a>) +in a referenced descriptor.</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="appended-digest"> +<h3 id="rfc.section.2.11"><a href= +"#rfc.section.2.11">2.11.</a> <a href= +"#appended-digest">Append digests to referenced +descriptors</a></h3> +<div id="rfc.section.2.11.p.1"> +<p>SanitizedSha1Digest = SHA1(Sha1Digest)</p> +</div> +<div id="rfc.section.2.11.p.2"> +<p>SanitizedSha256Digest = SHA256(Sha256Digest)</p> +<ul class="empty"> +<li>Sha1Digest is the 20-byte long binary representation of a +descriptor's SHA-1 digest.</li> +<li>Sha256Digest is the 32-byte long binary representation of a +descriptor's SHA-256 digest.</li> +<li>As stated above (<a href="#replaced-digest" title= +"Replace digests in referencing descriptors">Section 2.10</a>), +some descriptors are referenced by others by their digest. But in +contrast to non-sanitized descriptors, it's neither possible to +compute the digest of a sanitized descriptor nor is it desirable to +include the original digest. The reason is that it shouldn't be +possible to reconstruct the original descriptor with help of the +original digest. That is why descriptors that are typically +referenced from others may contain additional lines with the +hex-encoded SHA-1 hash or the base64-encoded SHA-256 hash of the +original digest, depending on which hash algorithm would have been +used to generate the original digest. The resulting digest string +can then be matched to a sanitized digest (<a href= +"#replaced-digest" title= +"Replace digests in referencing descriptors">Section 2.10</a>) +in a referencing descriptor.</li> +</ul> +</div> +</section> +</div> <!-- container --> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-server-descriptor-document-format"> +<h2 id="rfc.section.3"><a href= +"#rfc.section.3">3.</a> <a href="#n-server-descriptor-document-format">Server +descriptor document format</a></h2> +<div id="rfc.section.3.p.1"> +<p>The document format of sanitized bridge server descriptors +resembles the document format of (non-sanitized) server descriptors +as much as possible. Also refer to the <a href= +"https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt">Tor +directory protocol, version 3 specification</a>, as the following +sections only specify items that differ from their non-sanitized +counterparts.</p> +</div> +<div class="container"> +<section id="n-annotations_1"> +<h3 id="rfc.section.3.1"><a href= +"#rfc.section.3.1">3.1.</a> <a href= +"#n-annotations_1">Annotations</a></h3> +<div id="rfc.section.3.1.p.1"> +<p>The bridge authority may prefix descriptors with one or more +annotation lines containing metadata, and the sanitizer may add +annotation lines with metadata about the sanitizing process.</p> +</div> +<div id="rfc.section.3.1.p.2"> +<p>"@purpose" SP Purpose NL</p> +<ul class="empty"> +<li>[Removed.]</li> +</ul> +</div> +<div id="rfc.section.3.1.p.3"> +<p>"@type" SP "bridge-server-descriptor" SP Version</p> +<ul class="empty"> +<li>[Exactly once.]</li> +<li>Version can be one of the following numbers: +<ul> +<li>"1.0" was the first version. There was supposed to be a newer +version indicating added "ntor-onion-key" lines, but due to a +mistake only the version number of sanitized bridge extra-info +descriptors was raised. As a result, there may be sanitized bridge +server descriptors with version 1.0 with and without those +lines.</li> +<li>"1.1" added "master-key-ed25519" and "router-digest-sha256" +lines to server descriptors published by bridges using an ed25519 +master key.</li> +<li>"1.2" introduced sanitized TCP ports.</li> +</ul> +</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-descriptor-body_1"> +<h3 id="rfc.section.3.2"><a href= +"#rfc.section.3.2">3.2.</a> <a href= +"#n-descriptor-body_1">Descriptor body</a></h3> +<div id="rfc.section.3.2.p.1" class="avoidbreakafter"> +<p>The body of a sanitized bridge server descriptor contains +several sanitized items as specified in the following:</p> +</div> +<div id="rfc.section.3.2.p.2"> +<p>"router" SP Nickname SP SanitizedAddress SP SanitizedORPort SP +SOCKSPort SP SanitizedDirPort NL</p> +<ul class="empty"> +<li>[At start, exactly once.]</li> +<li>Nickname is the bridge's original, unchanged nickname.</li> +<li>SanitizedAddress is the bridge's sanitized IP address (<a href= +"#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>).</li> +<li>SanitizedORPort is the bridge's sanitized OR port (<a href= +"#tcp-port" title="Replace TCP ports">Section 2.7</a>) (since +version 1.2) or the original OR port (until version 1.1).</li> +<li>SOCKSPort is deprecated and always 0, as in the original, +non-sanitized server descriptor.</li> +<li>SanitizedDirPort is the bridge's sanitized directory port +(<a href="#tcp-port" title= +"Replace TCP ports">Section 2.7</a>) (since version 1.2) or +the original directory port (until version 1.1).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.3"> +<p>"or-address" SP SanitizedAddress ":" SanitizedPort NL</p> +<ul class="empty"> +<li>[Any number.]</li> +<li>SanitizedAddress is either an additional sanitized IPv4 address +(<a href="#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>) or sanitized IPv6 +address (<a href="#ipv6-address" title= +"Replace IPv6 addresses">Section 2.6</a>).</li> +<li>SanitizedPort is an additional sanitized OR port (<a href= +"#tcp-port" title="Replace TCP ports">Section 2.7</a>) (since +version 1.2) or original OR port (until version 1.1).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.4"> +<p>"identity-ed25519" NL CertificateBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +</ul> +</div> +<div id="rfc.section.3.2.p.5"> +<p>"master-key-ed25519" SP SanitizedMasterKey NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedMasterKey is the bridge's sanitized ed25519 master key +(<a href="#master-key-ed25519" title= +"Replace ed25519 master keys">Section 2.3</a>). (Introduced in +version 1.1, not present in earlier versions.)</li> +</ul> +</div> +<div id="rfc.section.3.2.p.6"> +<p>"fingerprint" SP SanitizedFingerprint NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedFingerprint is the bridge's sanitized fingerprint +(<a href="#fingerprint" title= +"Replace RSA fingerprints">Section 2.2</a>), formatted with a +single SP after every 4 characters.</li> +</ul> +</div> +<div id="rfc.section.3.2.p.7"> +<p>"contact" SP SanitizedContact NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedContact is the bridge's sanitized contact information +(<a href="#contact" title= +"Remove contact information">Section 2.8</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.8"> +<p>"reject" SP ExitPattern NL</p> +<ul class="empty"> +<li>[Any number.]</li> +<li>ExitPattern contains the bridge's sanitized IPv4 address +(<a href="#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>), if the original +line contained the bridge's primary IP address, and is otherwise +unchanged. (Note that "accept" lines are exempt from this +sanitizing step, which doesn't really make sense and which might +change in the future.)</li> +</ul> +</div> +<div id="rfc.section.3.2.p.9"> +<p>"extra-info-digest" SP SanitizedSha1Digest [SP +SanitizedSha256Digest] NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedSha1Digest is the sanitized SHA-1 digest (<a href= +"#replaced-digest" title= +"Replace digests in referencing descriptors">Section 2.10</a>) +of the corresponding extra-info descriptor.</li> +<li>SanitizedSha256Digest is the sanitized SHA-256 digest (<a href= +"#replaced-digest" title= +"Replace digests in referencing descriptors">Section 2.10</a>) +of corresponding extra-info descriptor and is only included if the +original line contained a SHA-256 digest.</li> +</ul> +</div> +<div id="rfc.section.3.2.p.10"> +<p>"family" (SP Name)* NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>Name is either the sanitized fingerprint (<a href= +"#fingerprint" title= +"Replace RSA fingerprints">Section 2.2</a>) or unchanged +nickname of another relay or bridge that is purportedly in the same +family as this bridge.</li> +</ul> +</div> +<div id="rfc.section.3.2.p.11"> +<p>"onion-key" NL PublicKeyBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The bridge's medium-term RSA key is removed together with most +other public keys, certificates, and signatures (<a href="#crypto" +title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.12"> +<p>"signing-key" NL PublicKeyBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The bridge's long-term RSA key is removed together with most +other public keys, certificates, and signatures (<a href="#crypto" +title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.13"> +<p>"onion-key-crosscert" NL SignatureBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>This cross signature created with the onion-key is removed +together with most other public keys, certificates, and signatures +(<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.14"> +<p>"ntor-onion-key-crosscert" SP PublicKey NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>This cross signature created with the ntor-onion-key is removed +together with most other public keys, certificates, and signatures +(<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.15"> +<p>"ntor-onion-key" SP NtorOnionKey NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>The curve25519 public key used for the ntor circuit extended +handshake is included without modification in most sanitized +descriptors. This key was originally missing in version 1.0, and +there was supposed to be a newer version indicating added +"ntor-onion-key" lines. But due to a mistake only the version +number of sanitized bridge extra-info descriptors was raised. As a +result, there are sanitized bridge server descriptors with version +1.0 with and without this line. All subsequent versions contain +this line</li> +</ul> +</div> +<div id="rfc.section.3.2.p.16"> +<p>"router-sig-ed25519" SP Signature NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The ed25519 signature is removed together with most other +public keys, certificates, and signatures (<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.17"> +<p>"router-signature" NL SignatureBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The RSA signature is removed together with most other public +keys, certificates, and signatures (<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.3.2.p.18"> +<p>"router-digest-sha256" SP SanitizedSha256Digest NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedSha256Digest is the sanitized SHA-256 digest (<a href= +"#appended-digest" title= +"Append digests to referenced descriptors">Section 2.11</a>) +of this descriptor and is only included if the original descriptor +contained an ed25519 signature of the descriptor's SHA-256 digest. +(Introduced in version 1.1, not present in earlier versions.)</li> +</ul> +</div> +<div id="rfc.section.3.2.p.19"> +<p>"router-digest" SP SanitizedSha1Digest NL</p> +<ul class="empty"> +<li>[At end, exactly once.]</li> +<li>SanitizedSha1Digest is the sanitized SHA-1 digest (<a href= +"#appended-digest" title= +"Append digests to referenced descriptors">Section 2.11</a>) +of this descriptor.</li> +</ul> +</div> +</section> +</div> <!-- container --> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-extra-info-descriptor-document-format"> +<h2 id="rfc.section.4"><a href= +"#rfc.section.4">4.</a> <a href="#n-extra-info-descriptor-document-format">Extra-info +descriptor document format</a></h2> +<div id="rfc.section.4.p.1"> +<p>The document format of sanitized extra-info descriptors follows +the same approach as sanitized server descriptors by changing as +few items as possible in their original, non-sanitized counterpart. +The original format is specified in the <a href= +"https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt">Tor +directory protocol, version 3</a>. Only the changes to that +specification are listed below.</p> +</div> +<div class="container"> +<section id="n-annotations_2"> +<h3 id="rfc.section.4.1"><a href= +"#rfc.section.4.1">4.1.</a> <a href= +"#n-annotations_2">Annotations</a></h3> +<div id="rfc.section.4.1.p.1"> +<p>"@type" SP "bridge-extra-info" SP Version</p> +<ul class="empty"> +<li>[Exactly once.]</li> +<li>Version can be one of the following numbers: +<ul> +<li>"1.0" was the first version.</li> +<li>"1.1" added sanitized "transport" lines.</li> +<li>"1.2" was supposed to indicate added "ntor-onion-key" lines, +but those changes only affected bridge server descriptors, not +extra-info descriptors. So, nothing has changed as compared to +version 1.1.</li> +<li>"1.3" added "master-key-ed25519" and "router-digest-sha256" +lines to extra-info descriptors published by bridges using an +ed25519 master key.</li> +</ul> +</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-descriptor-body_2"> +<h3 id="rfc.section.4.2"><a href= +"#rfc.section.4.2">4.2.</a> <a href= +"#n-descriptor-body_2">Descriptor body</a></h3> +<div id="rfc.section.4.2.p.1" class="avoidbreakafter"> +<p>Several items in the extra-info descriptor body are changed or +removed as compared to original, non-sanitized descriptors:</p> +</div> +<div id="rfc.section.4.2.p.2"> +<p>"extra-info" SP Nickname SP SanitizedFingerprint NL</p> +<ul class="empty"> +<li>[At start, exactly once.]</li> +<li>Nickname is the bridge's original, unchanged nickname.</li> +<li>SanitizedFingerprint is the bridge's sanitized fingerprint +(<a href="#fingerprint" title= +"Replace RSA fingerprints">Section 2.2</a>).</li> +</ul> +</div> +<div id="rfc.section.4.2.p.3"> +<p>"transport" SP TransportName NL</p> +<ul class="empty"> +<li>[Any number.]</li> +<li>TransportName is the transport name as found in the original +line.</li> +<li>Any further details about this transport (<a href="#transport" +title= +"Remove extraneous transport information">Section 2.9</a>), +including any IP addresses, TCP ports, or additional arguments are +removed, only leaving in the supported transport names. (Introduced +in version 1.1, not present in earlier versions.)</li> +</ul> +</div> +<div id="rfc.section.4.2.p.4"> +<p>"transport-info SP TransportInfo NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>Any lines containing extraneous transport information (<a href= +"#transport" title= +"Remove extraneous transport information">Section 2.9</a>) are +removed. (Note that these lines are not even specified for +original, non-sanitized descriptors.)</li> +</ul> +</div> +<div id="rfc.section.4.2.p.5"> +<p>"identity-ed25519" NL CertificateBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The RSA signature is removed together with most other public +keys, certificates, and signatures (<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.4.2.p.6"> +<p>"master-key-ed25519" SP SanitizedMasterKey NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedMasterKey is the bridge's sanitized ed25519 master key +(<a href="#master-key-ed25519" title= +"Replace ed25519 master keys">Section 2.3</a>). (Introduced in +version 1.3, not present in earlier versions.)</li> +</ul> +</div> +<div id="rfc.section.4.2.p.7"> +<p>"router-sig-ed25519" SP Signature NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The ed25519 signature is removed together with most other +public keys, certificates, and signatures (<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.4.2.p.8"> +<p>"router-signature" NL SignatureBlock NL</p> +<ul class="empty"> +<li>[Removed.]</li> +<li>The RSA signature is removed together with most other public +keys, certificates, and signatures (<a href="#crypto" title= +"Remove public keys, certificates, and signatures">Section 2.4</a>).</li> +</ul> +</div> +<div id="rfc.section.4.2.p.9"> +<p>"router-digest-sha256" SP SanitizedSha256Digest NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>SanitizedSha256Digest is the sanitized SHA-256 digest (<a href= +"#appended-digest" title= +"Append digests to referenced descriptors">Section 2.11</a>) +of this descriptor and is only included if the original descriptor +contained an ed25519 signature of the descriptor's SHA-256 digest. +(Introduced in version 1.3, not present in earlier versions.)</li> +</ul> +</div> +<div id="rfc.section.4.2.p.10"> +<p>"router-digest" SP SanitizedSha1Digest NL</p> +<ul class="empty"> +<li>[At end, exactly once.]</li> +<li>SanitizedSha1Digest is the sanitized SHA-1 digest (<a href= +"#appended-digest" title= +"Append digests to referenced descriptors">Section 2.11</a>) +of this descriptor.</li> +</ul> +</div> +</section> +</div> <!-- container --> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-network-status-document-format"> +<h2 id="rfc.section.5"><a href= +"#rfc.section.5">5.</a> <a href="#n-network-status-document-format">Network +status document format</a></h2> +<div id="rfc.section.5.p.1"> +<p>The document format of bridge network statuses is loosely based +on the network status format specified in the <a href= +"https://gitweb.torproject.org/torspec.git/tree/attic/dir-spec-v2.txt"> +Tor directory protocol, version 2</a>. However, the preamble of +bridge network statuses contains far fewer items than that of +(relay) network statuses, and the ones that are similar differ in +some of the details. That's why all preamble lines that exist in +sanitized bridge network statuses are specified below, not just the +ones that differ.</p> +</div> +<div class="container"> +<section id="n-annotations_3"> +<h3 id="rfc.section.5.1"><a href= +"#rfc.section.5.1">5.1.</a> <a href= +"#n-annotations_3">Annotations</a></h3> +<div id="rfc.section.5.1.p.1" class="avoidbreakafter"> +<p>Sanitized bridge network statuses start with one or more +annotations:</p> +</div> +<div id="rfc.section.5.1.p.2"> +<p>"@type" SP "bridge-network-status" SP Version NL</p> +<ul class="empty"> +<li>[Exactly once.]</li> +<li>Version can be one of the following numbers: +<ul> +<li>"1.0" was the first version.</li> +<li>"1.1" introduced sanitized TCP ports.</li> +<li>"1.2" introduced the "fingerprint" line, containing the +fingerprint of the bridge authority which produced the document, to +the header.</li> +</ul> +</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-preamble"> +<h3 id="rfc.section.5.2"><a href= +"#rfc.section.5.2">5.2.</a> <a href= +"#n-preamble">Preamble</a></h3> +<div id="rfc.section.5.2.p.1" class="avoidbreakafter"> +<p>The preamble contains zero or more of the following items in no +predefined order:</p> +</div> +<div id="rfc.section.5.2.p.2"> +<p>"published" SP Publication NL</p> +<ul class="empty"> +<li>[Exactly once.]</li> +<li>Publication is the publication time for this document, which is +left unchanged in the sanitizing process.</li> +</ul> +</div> +<div id="rfc.section.5.2.p.3"> +<p>"flag-thresholds" SP Thresholds NL</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>Thresholds are internal performance thresholds that the bridge +directory authority had at the moment it was forming a status, +which are left unchanged in the sanitizing process. This item was +first introduced in <a href= +"https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt">Tor +directory protocol, version 3</a>.</li> +</ul> +</div> +<div id="rfc.section.5.2.p.4"> +<p>"fingerprint" SP Fingerprint NL;</p> +<ul class="empty"> +<li>[At most once.]</li> +<li>Fingerprint is the (non-sanitized) SHA-1 hash of the bridge +authority's long-term signing key, encoded as 40 upper-case +hexadecimal characters, which is either added or left unchanged in +the sanitizing process. (Introduced in version 1.2, not present in +earlier versions.)</li> +</ul> +</div> +</section> +</div> <!-- container --> +<div class="container"> +<section id="n-router-entries"> +<h3 id="rfc.section.5.3"><a href= +"#rfc.section.5.3">5.3.</a> <a href="#n-router-entries">Router +entries</a></h3> +<div id="rfc.section.5.3.p.1"> +<p>For each bridge, there is one router entry containing one or +more items. Similar to the preamble specification, the following +specification lists all lines known in sanitized bridge network +statuses, including those that are left unchanged in the sanitizing +process.</p> +</div> +<div id="rfc.section.5.3.p.2"> +<p>"r" SP Nickname SP SanitizedFingerprint SP SanitizedSha1Digest +SP Publication SP SanitizedAddress SP SanitizedORPort SP +SanitizedDirPort NL</p> +<ul class="empty"> +<li>[At start, exactly once.]</li> +<li>Nickname is the bridge's original, unchanged nickname.</li> +<li>SanitizedFingerprint is the bridge's sanitized fingerprint +(<a href="#fingerprint" title= +"Replace RSA fingerprints">Section 2.2</a>).</li> +<li>SanitizedSha1Digest is the sanitized SHA-1 digest (<a href= +"#replaced-digest" title= +"Replace digests in referencing descriptors">Section 2.10</a>) +of the corresponding server descriptor.</li> +<li>Publication is the publication time for the corresponding +server descriptor, which is left unchanged in the sanitizing +process.</li> +<li>SanitizedAddress is the bridge's sanitized IP address (<a href= +"#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>).</li> +<li>SanitizedORPort is the bridge's sanitized OR port (<a href= +"#tcp-port" title="Replace TCP ports">Section 2.7</a>) (since +version 1.1) or the original OR port (until version 1.0).</li> +<li>SanitizedDirPort is the bridge's sanitized directory port +(<a href="#tcp-port" title= +"Replace TCP ports">Section 2.7</a>) (since version 1.1) or +the original directory port (until version 1.0).</li> +</ul> +</div> +<div id="rfc.section.5.3.p.3"> +<p>"a" SP SanitizedAddress ":" SanitizedPort NL</p> +<ul class="empty"> +<li>[Any number.]</li> +<li>SanitizedAddress is either an additional sanitized IPv4 address +(<a href="#ipv4-address" title= +"Replace IPv4 addresses">Section 2.5</a>) or sanitized IPv6 +address (<a href="#ipv6-address" title= +"Replace IPv6 addresses">Section 2.6</a>).</li> +<li>SanitizedPort is an additional sanitized OR port (<a href= +"#tcp-port" title="Replace TCP ports">Section 2.7</a>).</li> +</ul> +</div> +<div id="rfc.section.5.3.p.4"> +<p>"s" ... NL</p> +<ul class="empty"> +<li>[Unchanged.]</li> +</ul> +</div> +<div id="rfc.section.5.3.p.5"> +<p>"w" ... NL</p> +<ul class="empty"> +<li>[Unchanged.]</li> +</ul> +</div> +<div id="rfc.section.5.3.p.6"> +<p>"p" ... NL</p> +<ul class="empty"> +<li>[Unchanged.]</li> +</ul> +</div> +</section> +</div> <!-- container --> +</section> +</div> <!-- container --> +<jsp:include page="bottom.jsp"/>
participants (1)
-
karsten@torproject.org