[tor-commits] [tech-reports/master] Sources for tr-2017-04-001.

karsten at torproject.org karsten at torproject.org
Fri Apr 28 08:31:28 UTC 2017


commit e8d6663c6ea45deb4e8e3ceabc0be053480e78a3
Author: iwakeh <iwakeh at torproject.org>
Date:   Thu Apr 27 20:51:00 2017 +0100

    Sources for tr-2017-04-001.
---
 ...ts-bridge-combined-aq-2016-01-01-2017-01-01.png |  Bin 0 -> 22445 bytes
 ...ts-bridge-combined-va-2016-01-01-2017-01-01.png |  Bin 0 -> 46376 bytes
 ...ats-bridge-country-aq-2016-01-01-2017-01-01.png |  Bin 0 -> 10695 bytes
 ...ats-bridge-country-va-2016-01-01-2017-01-01.png |  Bin 0 -> 24626 bytes
 ...-relay-country-aq-2016-01-01-2017-01-01-off.png |  Bin 0 -> 22730 bytes
 ...-relay-country-va-2016-01-01-2017-01-01-off.png |  Bin 0 -> 23626 bytes
 2017/metrics-privacy/privacy-in-memory.tex         | 1560 ++++++++++++++++++++
 2017/metrics-privacy/references.bib                |  153 ++
 2017/metrics-privacy/tortechrep.cls                |    1 +
 9 files changed, 1714 insertions(+)

diff --git a/2017/metrics-privacy/images/userstats-bridge-combined-aq-2016-01-01-2017-01-01.png b/2017/metrics-privacy/images/userstats-bridge-combined-aq-2016-01-01-2017-01-01.png
new file mode 100644
index 0000000..a01faf7
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-bridge-combined-aq-2016-01-01-2017-01-01.png differ
diff --git a/2017/metrics-privacy/images/userstats-bridge-combined-va-2016-01-01-2017-01-01.png b/2017/metrics-privacy/images/userstats-bridge-combined-va-2016-01-01-2017-01-01.png
new file mode 100644
index 0000000..d61cf9c
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-bridge-combined-va-2016-01-01-2017-01-01.png differ
diff --git a/2017/metrics-privacy/images/userstats-bridge-country-aq-2016-01-01-2017-01-01.png b/2017/metrics-privacy/images/userstats-bridge-country-aq-2016-01-01-2017-01-01.png
new file mode 100644
index 0000000..572482b
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-bridge-country-aq-2016-01-01-2017-01-01.png differ
diff --git a/2017/metrics-privacy/images/userstats-bridge-country-va-2016-01-01-2017-01-01.png b/2017/metrics-privacy/images/userstats-bridge-country-va-2016-01-01-2017-01-01.png
new file mode 100644
index 0000000..b3eea7e
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-bridge-country-va-2016-01-01-2017-01-01.png differ
diff --git a/2017/metrics-privacy/images/userstats-relay-country-aq-2016-01-01-2017-01-01-off.png b/2017/metrics-privacy/images/userstats-relay-country-aq-2016-01-01-2017-01-01-off.png
new file mode 100644
index 0000000..b808134
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-relay-country-aq-2016-01-01-2017-01-01-off.png differ
diff --git a/2017/metrics-privacy/images/userstats-relay-country-va-2016-01-01-2017-01-01-off.png b/2017/metrics-privacy/images/userstats-relay-country-va-2016-01-01-2017-01-01-off.png
new file mode 100644
index 0000000..a6e966b
Binary files /dev/null and b/2017/metrics-privacy/images/userstats-relay-country-va-2016-01-01-2017-01-01-off.png differ
diff --git a/2017/metrics-privacy/privacy-in-memory.tex b/2017/metrics-privacy/privacy-in-memory.tex
new file mode 100644
index 0000000..72cdf2a
--- /dev/null
+++ b/2017/metrics-privacy/privacy-in-memory.tex
@@ -0,0 +1,1560 @@
+\documentclass{tortechrep}
+\usepackage{url}
+\usepackage{amsthm}
+\usepackage{thmtools}
+\usepackage{mathtools}
+\usepackage{hyperref}
+\usepackage{comment}
+\usepackage{fancyvrb}
+\usepackage{fancyhdr}
+\usepackage[Q=yes]{examplep}
+\usepackage{marginnote}
+\usepackage[light]{draftcopy}
+\usepackage{graphicx}
+\usepackage{caption}
+\usepackage{subcaption}
+
+%%% useful definitions and settings
+\setcounter{tocdepth}{1}
+
+%%% no break in verbatim/code.
+\newcommand{\Qx}[1]{\mbox{\Q{#1}}}
+
+%%%
+
+\begin{document}
+%%%% Settings inside document-env.
+% general setting for Verbatim environment
+\fvset{frame=leftline,numbers=left,numbersep=2pt,gobble=4,stepnumber=1}
+\VerbatimFootnotes
+\graphicspath{{./images/}}
+
+%%%%
+
+\title{Privacy analysis of Tor's in-memory statistics}
+\author{Karin Herm\\The Tor Project\\ iwakeh$\bowtie$torproject.org}% avoiding spam
+
+\reportid{2017-04-001}
+\date{April 2017}
+\maketitle
+\tableofcontents
+
+\begin{abstract}
+  This report analyzes which possibly sensitive, potentially
+  personally identifying data is stored in memory of Tor relays and
+  bridges or reported to the directory authorities and makes
+  suggestions to reduce the collection and temporary storage of such
+  data. %
+\end{abstract}
+\pagebreak
+\section{Introduction}\label{intro} %
+Tor network metrics and the underlying data have been available for
+many years by now and proven to be a valuable source for analyzing and
+improving the network as well as for censorship detection.%
+\footnote{Network analysis estimation of cell traffic, estimation of
+  onion services induced traffic and user count estimation as well as
+  censorship detection \cite{tr200908001,tr201210001,tr201504001} and
+  all data and visualizations on
+  \href{https://metrics.torproject.org}{MetricsWeb}.}%
+
+Tor Metrics' data collecting and processing chain handles various types
+of data ranging from raw data as measured by running Tor servers%
+\footnote{%
+  Here and in the following {\em Tor server} refers to relays and
+  bridges and other parts of the Tor network fulfilling a server
+  role. %
+  The term {\em client} is used for Tor instances simply connecting to
+  the network. %
+  Tor servers report different statistics depending on their
+  configuration. %
+  A bridge, a normal relay, an entry guard relay, etc., they all have
+  access to different data and report different statistics. %
+} %
+to preprocessed and aggregated data ready for further statistical work
+and as a basis for visualizations. %
+
+This report aims at improving privacy protection {\em before} any data
+is reported. %
+Of primary interest is the identification of possibly harmful data
+that is not a necessary part of the running Tor server, e.g.~data held
+in-memory or written to files for providing network/router metrics
+reports or data written to logs for informative purposes. %
+
+Section \ref{overview} provides an overview of the Tor Metrics system,
+its privacy goals, and a more detailed explanation of the metrics
+collection process as well the associated data. %
+In-memory data with possibly negative impact to client privacy is
+identified in section \ref{privacy-im}. %
+Section \ref{mitigate} surveys several measures to reduce privacy
+impact. %
+Building hereon section \ref{detail} details the changes necessary for
+an implementation of the favored
+solution. %
+The suggestions made in this report for reducing privacy impact go
+beyond the scope of a single project and some will need further work
+to reach the implementation stage. %
+The summary in section \ref{summary} takes account of this and also
+sketches possible next steps. %
+
+\section{Background} \label{overview}%
+
+This report assumes the reader to be familiar with the Tor software
+and Tor network and to some extent with the functionality and data
+offered by Tor Metrics. %
+
+The following sections first present an overview of the data
+processing chain in Tor Metrics and summarize the privacy goals behind
+Tor Metrics data collection. %
+Sub-section~\ref{mc} provides a description of the processes of
+measuring and counting implemented in Tor servers for metrics purposes
+and shows where in the code the processing of the data takes place. %
+
+\subsection{Tor Metrics}  \label{overview-metrics}%
+Tor servers running as relays or bridges publish their presence and
+capabilities to the directory authorities in form of simple
+files, the descriptors. %
+
+The current system of collecting data about the Tor network is built
+on descriptors, which are mainly produced and distributed for the
+operation of the network except for extra-info descriptors, which also
+provide metrics about the network. %
+This way of measuring the network generates minimal overhead for the
+network's operation and the data produced is freely available%
+\footnote{See \cite[dir-spec.txt]{torspec} about how to retrieve
+  descriptors.} %
+to anyone who cares to collect it. %
+The descriptors are machine and human readable and the knowledge
+required to make use of them is published in Tor's
+specification~\cite{torspec}. %
+
+All descriptors available at a fixed point in time give a good picture
+of the current status of the network. %
+In order to collect these {\em pictures} and combine them to a {\em
+  history} Tor Metrics introduced CollecTor,%
+\footnote{%
+  The main instance is \url{https://collector.torproject.org}. %
+  Since $2016$ there are also several mirror instances sharing their
+  data to gather even more of the ephemeral descriptors and other Tor
+  network related data.} %
+which gathers and archives the \emph{raw facts} in form of
+descriptors%
+\footnote{Actually, some data, e.g., bridge descriptors, are
+  pre-processed in order to remove possibly privacy critical
+  information, but for the current report they can be still considered
+  raw data.} %
+about the Tor network. %
+
+A descriptor document only carries information about a certain point
+in time, more exactly a time interval, as for example, a measurement
+interval for extra-info descriptors or the consensus, which applies to
+the entire network for its valid time interval. %
+Tor Metrics also provides machine and human centered services that
+create aggregated and enriched data from the descriptor collection. %
+The central services by Tor Metrics building on CollecTor are Onionoo%
+\footnote{\url{https://oninoo.torproject.org}} %
+and MetricsWeb.%
+\footnote{\url{https://metrics.torproject.org}} %
+Onionoo aggregates the descriptors available at CollecTor and provides
+current and historic data about {\em currently running} Tor servers. %
+Based on Onionoo there is an ecosystem of clients building
+visualizations and other results helping users to find the piece of
+information they need.%
+\footnote{See \url{https://metrics.torproject.org/development.html}
+  for development tools and
+  \url{https://metrics.torproject.org/operation.html} for user
+  centered services.} %
+
+MetricsWeb uses CollecTor's data for providing {\em the history} of
+the entire Tor network in form of aggregated and enriched data sets,
+which serve as the basis for the numerous visualizations on MetricsWeb
+and can be freely downloaded for further use. %
+
+\subsection{Privacy goals} \label{overview-privacy}%
+
+The goals of a privacy and anonymity network like Tor are not easily
+combined with extensive data gathering, but at the same time data is
+needed for monitoring and improving the network and detecting possible
+censorship events or attacks against the network. %
+Safety and privacy concerns regarding data collection by Tor Metrics
+is guided by the Safety Board's guidelines.\footnote{%
+  See
+  \url{https://research.torproject.org/safetyboard.html\#guidelines}.
+} %
+Safety and privacy assessment is usually done informally by discussion
+during the proposal process\footnote{%
+  The proposal process is defined in
+  \cite[proposals/001-process.txt]{torspec} and security and anonymity
+  implication should be part of any proposal (cf. \cite[line 114 of
+  proposals/001-process.txt]{torspec}). } %
+for changes to the Tor source, and/or supported by closer analysis in
+form of Tor Tech Reports, for example, the introduction of onion
+service statistics was backed by a Tor Tech Report \cite{tr201504001},
+which substantiated the privacy standards implemented and the
+statistical accuracy of the data to be collected.%
+\footnote{See also the related blog post
+  \url{https://blog.torproject.org/blog/some-statistics-about-onions}.} %
+
+It is out of scope of this report and will be future work to provide
+such an assessment for both privacy and statistical accuracy
+throughout the data-verse of Tor Metrics. %
+Until such background is available security and privacy assessment
+will be based on the guidelines, best practices, and heuristic
+arguments. %
+The current report focuses on in-memory data and considers the
+scenario that an attacker gains access to in-memory storage. %
+Thus, any run-time data for normal processing as well as the in- and
+outgoing traffic are also available to the intruder. %
+Hence, at most events/data that occurred and were finalized {\em
+  before} the breach can potentially be protected.%
+\footnote{ %
+  The goal that an adversary cannot learn the state of the measurement
+  before time of compromise, is usually referred to as \emph{forward
+    privacy.}  } %
+Another goal is to reduce reporting of potentially privacy problematic
+data. %
+
+\subsection{Measuring and counting} \label{mc} %
+
+Tor instances keep data in-memory and on disk for normal operation,
+for facilitation of local administration of the Tor server, and for
+reporting metrics data. %
+The latter is mainly accomplished by uploading extra-info descriptors
+to authorities. %
+For a quick orientation about the structure of these descriptors two
+examples of extra-info descriptors can be found in the appendix on
+page~\pageref{descriptors}. %
+
+\subsubsection{Server internal processing} \label{server-proc}
+
+Servers write their measurements and counting results to separate
+files, the ``stats files'', which are located in sub-folder \Qx{stats}
+of a configurable path. %
+These files are parsed and their content is assembled to form an
+extra-info descriptor, which will be uploaded to an authority. %
+The upload of extra-info descriptors happens together with the upload
+of the server descriptor. %
+
+The callback \Qx{check_descriptor} runs every minute, checks, if
+descriptors have to be uploaded, if necessary, it creates the server
+descriptor and the extra-info descriptor, which is populated from
+previously prepared stats files: dirreq-stats, hidserv-stats,
+entry-stats, buffer-stats, exit-stats, conn-stats. %
+
+Writing of stats files is triggered by two callbacks,
+\Qx{write_stats_file_callback}%
+\footnote{As defined in \cite[\Qx{src/or/main.c:1702-1747}]{torgit}.}
+and \Qx{record_bridge_stats_callback}.%
+\footnote{As defined in
+  \cite[\Qx{src/or/main.c:1752-1777}]{torgit}.} %
+These callbacks are registered to be run regularly after their first
+start after one second.%
+\footnote{Scheduled by the code in
+  \cite[\Qx{src/or/main.c:1265-1275}]{torgit} and the callback's
+  return values.} %
+Afterwards, the corresponding tasks are run in their own intervals,
+i.e., after running for the first time the next interval is currently
+limited to maximal one hour and the actual interval will be the
+smallest demanded by the respective sub-tasks. %
+Given that all configuration options for statistics are enabled the
+following functions are called from \Qx{write_stats_file_callback}: %
+\begin{description}
+\item [\Qx{rep_hist_buffer_stats_write}:] statistics about cell
+  processing for monitoring relay performance (cf.~\ref{circ})
+\item [\Qx{geoip_dirreq_stats_write}:] directory statistics
+  (cf.~\ref{dirreq})
+\item [\Qx{geoip_entry_stats_write}:] entry contact statistics
+  (cf.~\ref{clienthist})
+\item [\Qx{rep_hist_hs_stats_write}:] onion services statistics
+  (cf.~\ref{hidserv})
+\item [\Qx{rep_hist_exit_stats_write}:] exit traffic statistics
+  (cf.~\ref{exit})
+\item [\Qx{rep_hist_conn_stats_write}:] traffic statistics between
+  relays (cf.~\ref{conn}).
+\item [\Qx{rep_hist_desc_stats_write}:] statistics about served
+  descriptors (only for bridge authorities).
+\end{description}
+The \Qx{record_bridge_stats_callback} only triggers one function:
+\Qx{geoip_bridge_stats_write}, which writes bridge connection
+statistics (see \ref{clienthist}). %
+
+All of these functions verify, if their individual measurement
+interval has elapsed. %
+If so, they assemble their respective data, reset the data collecting
+structures, and write the data to files in the configured statistics
+directory. %
+This process is similar for all stats-files, but not identical. %
+Some of the concerned functions handle the reset of the measurement
+structures in-memory immediately after assembling the data to be
+written and others only reset after a successful write. %
+For example, \Qx{geoip_entry_stats_write} only resets the data
+structure when writing succeeds, which can cause data retention for
+more than the intended $24$ hour interval and
+\Qx{geoip_bridge_stats_write} doesn't remove client IPs from memory
+until the {\em next} interval's statistics are going to be written,
+which leads to a usual retention time of up to 48 hours.%
+\footnote{Some of these difference were introduced on purpose, e.g.,
+  the 48 hour interval seems to be due to a technical choice for
+  bridge metrics, as it is already mentioned in the introduction of
+  the extra-info proposal
+  cf.~\cite[proposals/166-statistics-extra-info-docs.txt]{torspec}. %
+} %
+In \Qx{geoip_entry_stats_write} the removal of older client data is
+only performed, if the interval for the next reporting is reached,%
+\footnote{See \cite[\Qx{src/or/geoip.c}:1627-1654]{torgit}.} %
+and \Qx{geoip_remove_old_clients} removes clients older than the
+current report interval of 24 hours, which is the argument
+\Qx{start_of_dirreq_stats_interval} and then removes the data after
+computing and writing statistics.%
+\footnote{ \cite[\Qx{src/or/geoip.c}:1648]{torgit}} %
+Thus, if writing fails 
+there could be up to 48 hours of client data available in-memory.%
+\footnote{Cf.~\cite[\Qx{src/or/geoip.c}:1644,1645]{torgit}} %
+For bridge clients ip connections the retention time is usually more
+than 24 hours, because the old clients are removed%
+\footnote{In function \Qx{geoip_bridge_stats_write}
+  \cite[\Qx{src/or/geoip.c}:1492-1530]{torgit}.} %
+{\em before} statistics computation and here only those from {\em
+  before} the current reporting interval. %
+
+\subsection{Data structures}
+
+This section describes in-memory storage structures for all data
+collected for metrics purposes and explains how these structures are
+maintained during a measurement interval. %
+
+The following assumes some familiarity with the data fields of extra-info
+descriptors.%
+\footnote{%
+  Two example descriptors are printed in appendix \ref{descriptors}. %
+} %
+The descriptions are grouped by the extra-info descriptor target field
+and exclude fields that are not in the focus of this analysis,
+e.g. identity, digests, statistic interval end times. %
+
+\subsubsection{Directory requests counts}\label{dirreq}
+In order to derive usage by country Tor servers keep track of the
+originating country of directory requests. %
+The resulting data is written to extra-info field \Qx{dirreq-v3-reqs}
+as a list of mappings from two-letter country codes%
+\footnote{ GeoIp codes usually refer to countries, but in some cases
+  to other kinds of jurisdiction. %
+  For the topic treated in this report it does no harm to simply refer
+  to countries in all cases.  } %
+to the number of requests for v3 network statuses from that country,
+rounded up to the nearest multiple of 8. %
+
+During run-time the counts are stored in a list of
+\mbox{\Qx{geoip_country_t}} structures%
+\footnote{ As defined in \cite[\Qx{src/or/geoip.c:55-59}]{torgit}.
+} %
+without binning or obfuscation. %
+The count \Qx{n_v3_ns_requests} is increased when a client is
+recorded.%
+\footnote{\label{noteclient}%
+  This happens by calling function \Qx{geoip_note_client_seen} in
+  \cite[\Qx{src/or/geoip.c}:560-613]{torgit}.} %
+The map of \Qx{geoip_country_t} structures is reset%
+\footnote{See function \Qx{geoip_dirreq_stats_write} in
+  \cite[\Qx{src/or/geoip.c}:1284-1312]{torgit}.  } %
+after writing the derived values to the stats file. %
+
+\subsubsection{Connecting client counts}\label{clienthist}
+
+Connecting clients use the Tor network and their count is tracked in
+regard to originating country and in case of bridges also the
+transport used and the IP version. %
+The resulting data is written to the fields \Qx{bridge-ips},
+\Qx{bridge-ip-transports} and \Qx{bridge-ip-versions} as well as
+\Qx{dirreq-v3-ips} and \Qx{entry-ips}, of which the latter two are
+currently not used in Tor Metrics. %
+
+In order to avoid repeated counting of the same client IP connecting
+the client IPs are stored in-memory in maps of \Qx{clientmap_entry_t}%
+\footnote{Defined in \cite[\Qx{src/or/geoip.c}:475-491]{torgit}.} %
+without binning or obfuscation. %
+
+The data reported in \Qx{bridge-ips} is used for all MetricsWeb graphs
+about bridge user counts and together with \Qx{bridge-ip-transports},
+which is a list of mappings from pluggable transport names to the
+number of unique IP addresses that have connected using that pluggable
+transport, for MetricsWeb's
+\href{https://metrics.torproject.org/userstats-bridge-transport.html}%
+{\emph{Bridge users by transport}} and
+\href{https://metrics.torproject.org/userstats-bridge-combined.html}%
+{\emph{Bridge users by country and transports}} graphs. %
+The values from \Qx{bridge-ip-versions}, which is a list of unique IP
+addresses that have connected to the bridge per protocol family, are
+used for MetricsWeb's
+\href{https://metrics.torproject.org/userstats-bridge-version.html}%
+{\emph{Bridge users by IP version}} graph. %
+
+All the values above are reported rounded to the next multiple of eight. %
+The counts are taken from the clientmap, binned, and written to the
+file \Qx{stats/bridge-stats}. %
+All countries with at least one count are reported. %
+
+
+\subsubsection{Directory response counts}
+Another field used to derive client contacts is \Qx{dirreq-v3-resp},
+from which the success count of responses made by the Tor server is
+currently used to determine the client count of bridges. %
+Field \Qx{dirreq-v3-resp} reports a list of mappings from response
+statuses to the number of requests for v3 network statuses that were
+answered with that response status, rounded up to the nearest multiple
+of eight. %
+All response statuses with at least one response are reported. %
+
+Counts by response status are stored in a simple array without
+obfuscation\footnote{%
+  Array definition \cite[\Qx{src/or/geoip.c:640}]{torgit} and array
+  processing \cite[\Qx{src/or/geoip.c:644-656}]{torgit}.} %
+and the binned values are computed just before writing statistics to
+file \Qx{stats/dirreq-stats}, the array is reset after writing the
+statistics file successfully.%
+\footnote{\label{reset-dirreq}%
+  In function \Qx{geoip_reset_dirreq_stats}
+  cf.~\cite[\Qx{src/or/geoip.c:1179-1208}]{torgit}.}
+
+\subsubsection{Server bandwidth metrics}\label{bw} 
+The fields \Qx{write-history} and \Qx{read-history} declare how much
+bandwidth the Tor server has used recently. Usage is divided into
+intervals of currently four hours. %
+The end of the most recent interval of the measurements is given. %
+Values are the number of bytes used in the last intervals, ordered
+from oldest to newest. %
+Stored in struct \Qx{bw_array_t} using circular arrays for maxima and
+totals.%
+\footnote{Cf.~\cite[\Qx{src/or/rephist.c:1209-1236}]{torgit}} %
+
+Similarly the extra-info descriptor fields \Qx{dirreq-write-history}
+and \Qx{dirreq-read-history}%
+\footnote{Assembled in \cite[\Qx{src/or/rephist.c:1497-1550}]{torgit}} %
+declare how much bandwidth the Tor server has spent on answering
+directory requests. %
+These values are cut at the value of the configured max bandwidth for
+reporting.  They are also stored in struct \Qx{bw_array_t} (as
+\Qx{write-history} and \Qx{read-history}).
+
+All four \Qx{*-history} values are stored without obfuscation or
+binning and are only cutoff and rounded down to 1KB before they are
+reported.%
+\footnote{The in-memory values are not changed, cf.~function
+  \Qx{rep_hist_fill_bandwidth_history} in
+  \cite[\Qx{src/or/rephist.c:1448-1491}]{torgit}.} %
+
+\subsubsection{Directory download metrics}
+\Qx{dirreq-v3-direct-dl} and \Qx{dirreq-v3-tunneled-dl} provide
+statistics about possible failures in the download process of v3
+network statuses. %
+The list currently contains values for \Qx{complete}, \Qx{timeout},
+and \Qx{running}.  Values are stored in a map of
+\Qx{dirreq_map_entry_t} types.%
+\footnote{\cite[\Qx{src/or/geoip.c:700-714}]{torgit} } %
+
+The values are rounded to the next multiple of 4 before printing
+statistics and only printed when the rounded value of \Qx{complete} is
+bigger than 16.  After writing stats the values are
+cleared.\footnote{%
+  Cf.~footnote \ref{reset-dirreq}, page \pageref{reset-dirreq}.  }
+
+\subsubsection{Circuit metrics}\label{circ}
+
+\Qx{cell-*} Data is derived from circuits%
+\footnote{See \Qx{circuit_t} in
+  \cite[\Qx{src/or/or.h}:2943-3084]{torgit}.} %
+held in-memory for normal operation.  The values are derived at report
+time and statistics for disposed circuits are stored at the time of
+their disposal. %
+After assembling the data, which will be written to the
+\Qx{buffer-stats} file, the data structure used is reset. %
+
+\subsubsection{Onion services metrics}\label{hidserv}
+
+Onion services metrics are reported mainly in two fields:
+\Qx{hidserv-rend-relayed-cells} and \Qx{hidserv-dir-onions-seen}. %
+\Qx{hidserv-rend-relayed-cells} reports the approximate number of
+relay cells seen in either direction on a circuit after receiving and
+successfully processing a rendezvous cell. %
+The original measurement value is obfuscated only for reporting%
+\footnote{\label{dirspec212}%
+  Cf.~\cite[section 2.1.2 of dir-spec.txt]{torspec}} %
+and stored in-memory as part of the \Qx{hs_stat_t} structure without
+binning or obfuscation.%
+\footnote{Cf.~\cite[\Qx{src/or/rephist.c}:3002-3009]{torgit}} %
+\Qx{hidserv-dir-onions-seen} reports the approximate number of unique
+onion-service identities seen in descriptors published to and accepted
+by this onion-service directory. %
+The original measurement value is obfuscated only for reporting,%
+\footnote{See footnote \ref{dirspec212}, page \pageref{dirspec212}.} %
+whereas this value is derived from the \Qx{hs_stat_t} structure,
+which contains a clear list of digests of the onion services' public
+keys. %
+The in-memory struct is reset after creating the report string for the
+statistics file \Qx{hidserv-stats}.%
+\footnote{%
+  Reported metrics of onion services are binned and obfuscated using
+  the Laplace distribution.  The exact parameters are defined in
+  \cite[\Qx{src/or/rephist.c}:3112-3133]{torgit}.} %
+
+\subsubsection{Exit traffic metrics}\label{exit}
+The fields \Qx{exit-streams-opened}, \Qx{exit-kibibytes-written},
+\Qx{exit-kibibytes-read} contain information about exit traffic. %
+Data for all three fields are kept in arrays.%
+\footnote{Cf.~\cite[\Qx{src/or/rephist.c}:2067-2072]{torgit}} %
+The exact values for all ports are stored in-memory. %
+The reported number of opened exit streams to a port is rounded up to
+the nearest multiple of four, the other two values are rounded to the
+next $1024$ bytes.%
+\footnote{%
+  The calculation of the reported values is performed in
+  \Qx{rep_hist_format_exit_stats}
+  \cite[\Qx{src/or/rephist.c}:2120-2269]{torgit} } %
+All in-memory counters are erased after computing the metrics.%
+\footnote{ Cf.~\cite[\Qx{src/or/rephist.c:2291}]{torgit}.} %
+
+\subsubsection{Connection metrics} \label{conn} %
+The \Qx{conn-bi-direct} line is filled from simple counters.%
+\footnote{Cf.~function \Qx{rep_hist_format_conn_stats}
+  \cite[\Qx{src/or/rephist.c:2903-2922}]{torgit}.  } %
+The data reported is used for network and relay related statistics,
+which are provided by MetricsWeb as one of the
+\href{https://metrics.torproject.org/connbidirect.html}%
+{performance related graphs}. %
+The counters are reset immediately after statistics computation
+independent of write success.%
+\footnote{Cf.~function \Qx{rep_hist_conn_stats_write} line $2942$
+  \cite[\Qx{src/or/rephist.c:2928-2952}]{torgit}.  } %
+
+\subsubsection{Unused extra-info descriptor fields} \label{unused} %
+
+The data of the following extra-info descriptor fields are currently
+not used anywhere in Tor Metrics: %
+\begin{itemize}
+\item all \Qx{cell-*} fields,
+\item all \Qx{exit-*} fields,
+\item \Qx{dirreq-v3-direct-dl} and \Qx{dirreq-v3-tunneled-dl}.
+\end{itemize}
+
+It might be a premature decision to simply stop reporting these
+unused values in extra-info descriptors, because the reason for not
+using them could be lack of awareness that they are reported or a lack
+of resources to put them to use. %
+For example, the values from exit-node related fields, i.e.,
+\Qx{exit-*}, could be used to address questions related to exit data,
+which are asked in research (e.g.~in \cite{privcount} cf.~\ref{priv}),
+and to introduce new statistics and graphs in MetricsWeb as well as
+making aggregate data sets available. %
+On the other hand, concerns where raised that providing the
+\Qx{exit-*} statistics would enable attacks that could uniquely
+identify the applications used by clients or for fingerprinting unusal
+port etc.%
+\footnote{These were brought to Tor Metrics attention by Rob Jansen
+  who addressed the topic in
+  \href{https://lists.torproject.org/pipermail/metrics-team/2016-January/000057.html}%
+  {Tor Metrics' mailing list}: %
+  ``Tor is classifying its traffic into ports, which could uniquely
+  identify the application being used by the client. They also track
+  bandwidth usage per port (and per exit); again, this is bad for
+  those using a random or unique looking ports (that a given exit does
+  not see very often) because it could be used to create a
+  fingerprint. Intersection attacks become easier with this
+  information.'' %
+
+  \Qx{cell-*} statistics are percieved less critical, but still: %
+  ``This provides queue timings and number of cells being processed at
+  a relay. The number of cells can be used to compute bandwidth of
+  circuits. It may be possible to launch some attacks that create
+  several circuits with the intent of moving which decile buckets some
+  legitimate circuits get placed into, but this is less worrisome of
+  an attack than the others.'' %
+} %
+In general, ending the collection of currently unused data should be
+considered carefully and not hastened. %
+The future assessment of all Metrics' data will be the right project
+to address the question of whether to keep or drop the collection of
+currently unused metrics. %
+
+\subsubsection{Other data}
+This section concentrates on data gathered or written for other
+purposes than filling an extra-info descriptor. %
+
+\subsubsection*{Stats Heartbeat}
+
+The function \Qx{log_heartbeat}%
+\footnote{\cite[src/or/status.c:91-165]{torgit}} performs some checks
+to determine the state of the running relay/bridge, but also logs some
+statistics about client connections. %
+\Qx{log_heartbeat} is one of the periodic event callbacks.%
+\footnote{\cite[\Qx{src/or/main.c:1193-1220}]{torgit}} %
+Function \Qx{format_client_stats_heartbeat}%
+\footnote{\cite[\Qx{src/or/geoip.c:1457-1488}]{torgit}} computes the
+exact number of different client connections for the last six hours
+using \Qx{client_history} unless turned off or set to a different
+interval in property \Qx{HeartbeatPeriod}. %
+In addition, the number ob bytes written and read by the Tor server
+process is logged.%
+\footnote{Cf.~\cite[\Qx{src/or/main.c:159-162}]{torgit}.} %
+
+\subsubsection*{Logging}\label{debuglog}
+The debug level logs client data in addition to operational data. 
+\begin{description}
+\item[\Qx{geoip_note_client_seen}] logs the client seen with the
+  transport used in debug mode.%
+  \footnote{See line 582 in \Qx{geoip_note_client_seen} (cf.~footnote
+    \ref{noteclient}, page \pageref{noteclient}).}
+\item[\Qx{geoip_get_transport_history}] logs the true total number of
+  clients and the true numbers for each transport type in debug
+  level.\footnote{Cf.~\cite[\Qx{src/or/geoip.c:863,885,900}]{torgit}.}
+\item[\Qx{rep_hist_note_exit_bytes}] logs for each port the true number
+  of bytes read and written in debug mode.%
+  \footnote{Cf.~\cite[\Qx{src/or/rephist.c:2313}]{torgit}.}
+\item[\Qx{rep_hist_note_exit_stream_opened}] logs the port to which an
+  exit stream was opened in debug mode.%
+  \footnote{Cf.~\cite[\Qx{src/or/rephist.c:2325}]{torgit}.}
+\end{description}
+
+
+\section{Possible privacy issues} \label{privacy-im}%
+
+Tor servers configured to keep statistics and report extra-info
+descriptors%
+\footnote{Reporting of extra-info descriptors can be turned off or
+  limited via configuration. %
+  It is assumed that reporting and logging options are enabled, i.e.,
+  Tor server options like \Qx{BridgeRecordUsageByCountry}, the various
+  \Qx{*Statistics} etc.~are set to $1$.} %
+have a reporting interval of 24 hours. %
+The following types of data are held in-memory up to this interval or
+even longer depending on the type of data and time of collection. %
+\begin{itemize}
+\item Client IPs from various types of contacts to a server, i.e.,
+  contacts to bridges, to entry relays, to directory
+  mirrors. %
+\item Public key digests of onion services and cell counts
+  (cf.~\ref{hidserv});
+\item bandwidth used generally and bandwidth consumed for serving
+  directories (cf.~\ref{bw});
+\item exit traffic stream count as well as exit bytes written and read
+  (cf.~\ref{exit}).
+\end{itemize}
+
+The most critical data in the above list are client IPs and related
+information.%
+\footnote{\label{jansen}%
+  These were also mentioned as most critical by Rob Jansen in
+  his mail to
+  \href{https://lists.torproject.org/pipermail/metrics-team/2016-January/000057.html}%
+  {Tor Metrics' mailing list}: %
+
+  ``[unique ips per country code]
+  *-ips (there are many of these, e.g. "entry-ips")
+  Usually this involves storing individual user IP addresses in
+  memory (in order to track uniqueness) over some period of time
+  (usually 24 hours), sometimes for longer than the user would have
+  otherwise been known to Tor (if a user's session is 1 hour, Tor
+  could remember the IP for at most 23 additional hours). This is
+  reported, e.g., per entry; there are many cases in the data where it
+  is very likely that only one user is connecting to a guard from a
+  given country (because it is rounded up to 8). Users in small
+  countries have the greatest risk (intersection attacks become really
+  easy).'' %
+} %
+
+The following extra-info fields depend on code and in-memory
+structures used for storing the client IPs: %
+\begin{itemize}
+\item Provided by bridges:
+  \begin{itemize}
+  \item unique client count by country of origin for every contact
+    in field \Qx{bridge-ips},
+  \item IP version in \Qx{bridge-ip-versions}, and
+  \item transport used in \Qx{bridge-ip-transports}.
+  \end{itemize}
+\item Relays and bridges report: %
+  \begin{itemize}
+  \item unique client count by country of origin for directory
+    requests \Qx{dirreq-v3-ips} for successful responses.
+  \end{itemize}
+\item Entry guards report \Qx{entry-ips}, i.e., the unique client count by
+  country of origin for every contact. %
+\end{itemize}
+
+Some of these fields, namely \Qx{dirreq-v3-ips} and \Qx{entry-ips},
+are currently not used further up in the Tor Metrics data processing
+chain, but others support vital client statistics about the Tor
+network.%
+\footnote{%
+  The bridge client count estimates are built on \Qx{bridge-ips},
+  \Qx{bridge-ip-versions}, \Qx{bridge-ip-transports}.} %
+Section \ref{mitigate} explores the options for keeping these
+statistics and reducing or even avoiding the in-memory storage of
+lists of IP addresses of Tor clients. %
+
+The client data sets of MetricsWeb and the visualizations based on
+them occasionally cause questions about privacy implications of small
+client counts per country or per country and transport. %
+Section \ref{small} gives some examples and provides information about
+client counts per country and other parameters. %
+These concerns are raised for tables and graphs at the aggregated data
+level, but the underlying data is tightly connected to the IP
+addresses collected in-memory. %
+Thus, it makes sense to also address this privacy issue in the current
+report, which is done in section \ref{obfuscation}. %
+
+\subsection{Small clients counts} \label{small}%
+Small countries usually have very tiny Tor client counts, examples for
+Antarctica and Vatican City are shown figures \ref{antarctica} (page
+\pageref{antarctica}) and \ref{vatican} (page \pageref{vatican}). %
+
+\begin{figure}[!h]
+  \centering
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-relay-country-aq-2016-01-01-2017-01-01-off.png}
+    \caption{Users connecting to relays.}
+    \label{antarctica-relay}
+  \end{subfigure}
+  %  
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-bridge-country-aq-2016-01-01-2017-01-01.png}
+    \caption{User(s) connecting to bridge(s).}
+    \label{antarctica-bridge}
+  \end{subfigure}
+  % 
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-bridge-combined-aq-2016-01-01-2017-01-01.png}
+    \caption{User by transport.}
+    \label{antarctica-combi}
+  \end{subfigure}
+  \caption{Antarctica Tor usage 2016.  MetricsWeb is the source of
+    all graphs (see table \ref{urls}).  }
+  \label{antarctica}
+\end{figure}
+
+\begin{figure}[!h]
+  \centering
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-relay-country-va-2016-01-01-2017-01-01-off.png}
+    \caption{Users connecting to relays.}
+    \label{vatican-relay}
+  \end{subfigure}
+%  
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-bridge-country-va-2016-01-01-2017-01-01.png}
+    \caption{User(s) connecting to bridge(s).}
+    \label{vatican-bridge}
+  \end{subfigure}
+  % 
+  \begin{subfigure}[b]{0.45\textwidth}
+    \centering
+    \includegraphics[width=\textwidth]{userstats-bridge-combined-va-2016-01-01-2017-01-01.png}
+    \caption{User(s) by transport.}
+    \label{vatican-combi}
+  \end{subfigure}
+  \caption{Vatican City Tor usage 2016.  MetricsWeb is the source of
+    all graphs (see table \ref{urls})}
+  \label{vatican}
+\end{figure}
+Counts of clients from Antarctica directly connecting to the Tor
+network during the year $2016$ are graphed in figure
+\ref{antarctica-relay}, the even smaller count of bridge users from
+Antarctica in 2016 in figure \ref{antarctica-bridge}, and figure
+\ref{antarctica-combi} breaks the bridge connection down into the type
+of transport used. %
+Similarly Tor client count during $2016$ for Vatican City. %
+Most notably, bridge users from Antarctica (\ref{antarctica-combi})
+and Vatican City (see figure \ref{vatican-combi}) seem to be all
+distinguished by the type of transport they use. %
+
+These two are not even the most extreme examples in terms of client
+counts, Vatican City has a median of 13 users in $2016$ and
+Antarctica a median of 8. %
+For $2016$ there are 25 countries with a median user number less than
+ten. %
+Table \ref{median-low} (page \pageref{median-low}) shows the count of
+countries with less than $m$ median users per day in $2016$, and as
+contrast table \ref{median-high} lists the count of countries with
+median user numbers starting at $1000$. %
+
+These small counts of distinguishable subsets of Tor clients look
+problematic concerning privacy.%
+\footnote{%
+Also cf.~footnote \ref{jansen}, page \pageref{jansen}. %
+} %
+
+\section{Mitigate privacy impact}\label{mitigate} %
+
+The following sections take a look at various
+techniques/mechanisms/systems to reduce privacy impact reaching from
+privacy aware counting in \ref{elaborate} over using Tor external data
+gathering systems in \ref{priv} to exploring the options of simply
+avoiding the collection of problematic data in~\ref{implchange}. %
+
+Many of the techniques and measurements listed in the following
+sections are far from being implemented and would need extensive work
+to be useful in practice. %
+Hence, the following should be read as a description of what might be
+possible and not as what will be implemented in the near future. %
+A more concrete list of what could be implemented in the nearer future
+is given in sections \ref{conclusion} and \ref{summary}. %
+
+\subsection{Counting, surveys, sketches}\label{elaborate}%
+
+Counting of unique items is na\"{\i}vely done by keeping a unique list
+of these items. %
+For finding an approximate count of unique items this could be avoided
+trading in accuracy of the resulting metrics. %
+The following sections discuss mechanisms for counting without keeping
+all items in-memory. %
+
+\subsubsection{Probabilistic counting}\label{count} 
+Estimating the count of unique items, e.g., connecting clients,
+without storing all items registered during the measurement interval
+could be solved by probabilistic counting as proposed in
+\cite{fm85}. %
+Without any additional randomization this would give a part of the
+clients additional privacy by plausible deniability depending on the
+used hash function\footnote{But it cannot prevent the identification
+  of certain IPs with high probability (for example, cf.~\cite[section
+  4.1.1]{hambolu14} or \cite[section 2.2]{ts11}).}  and certainly
+provide another barrier for an attacker to determine client IPs from
+the data held in-memory. %
+Compared to the current scenario this could provide a gain in privacy
+for the IP counting task. %
+In addition, error estimates and efficiency of the probabilistic
+counting method are known (see \cite{fm85}) and would provide a basis
+for computing the aggregate statistics from the individual reports. %
+
+The steps necessary for deploying such a solution require extensive
+effort: for the actual implementation the hash function used and size
+of sketches as well as the accuracy of the count estimate need to be
+chosen. %
+The intended accuracy leads to the decision between using the simple
+algorithm or the algorithm with stochastic averaging. %
+The metrics derived might need to be adjusted depending on the now
+available error estimates. %
+
+\subsubsection{Privacy preserving surveys}\label{survey}
+Clients connecting to a Tor server could be viewed as entities taking
+a survey. %
+A recent approach with even differential privacy guarantees\footnote{
+  See \cite[section 3]{epk14} for definition and proves of their
+  differential privacy claims for RAPPOR.  } is the method proposed in
+\cite[RAPPOR - Randomized Aggregateable Privacy Preserving Ordinal
+Response]{epk14}. %
+RAPPOR is based on client side generation of noisy sketches and a
+machine learning approach for evaluating these sketches to calculate
+estimates for the statistics of interest. %
+Clients need to compute an initial noisy sketch from their data, which
+is called permanent randomized response, and use this permanent
+response to produce an again obfuscated sketch, the instantaneous
+response, as actual report. %
+The instantaneous response sketch would have to be part of all those
+connections made by the client that are used for statistics, e.g., it
+would need to be added to a directory request. %
+
+In total, the changes necessary for implementing a protocol like
+RAPPOR are extensive: changes to the client code, the Tor server code,
+the communication protocol, and the final processing for deriving the
+wanted estimates. %
+A survey setting trusting client generated data sketches would also
+open room for spam or manipulation of the metrics taken. %
+
+\subsection{Metrics systems proposed by Tor related research} 
+\label{priv} %
+With the progress of privacy research during the last years metrics
+systems for collecting network data in a privacy conscious manner were
+proposed. %
+Two systems explicitly targeting metrics collection from the Tor
+network, which not only provide the design and privacy assessment of
+their system, but also make the code-base from their respective proof
+of concept and reference implementations freely available, are
+PrivEx~\cite{privex} and PrivCount~\cite{privcount}. %
+It is out of the scope of this report to suggest or discuss any
+replacements or additional metrics systems for the current file based
+Tor Metrics system. %
+Still, looking closely at PrivEx and PrivCount provides valuable
+insight about what they deem potentially privacy endangering data and
+what data of interest might not yet be available through Tor
+Metrics. %
+
+PrivEx \cite{privex} proposes a metrics system running separately from
+Tor instances and introduces its own network of various types of
+server instances. %
+The data processed is retrieved from adapted Tor server instances via
+the controller protocol, which is extended for PrivEx purposes. %
+
+PrivCount builds on one collection scheme introduced by PrivEx and
+extends its collection ability as well as some operational
+properties. %
+The data collecting instances of the PrivCount network also use the
+controller protocol, i.e., an extended version of the currently
+implemented protocol, to retrieve the data of interest from the Tor
+server they are collecting from. %
+
+The main purpose of PrivEx' reference implementation is the
+combination of in- and out-going traffic of the Tor network. %
+In particular, identifying the number of connections made from Tor
+clients to possibly censored web-addresses, which gives an estimate
+about Tor usage for censorship circumvention. %
+
+PrivCount focusses on entry and exit statistics. %
+This comprises client counts at the entry nodes, which are collected
+via the extended controller protocol and not based on the Tor server
+internal client IP list, and various metrics for traffic exiting the
+network. %
+PrivCount's exit statistics are concerned with streams exiting via
+certain ports and the influence of exit policies on exit traffic.%
+\footnote{%
+  The authors of \cite{privcount} don't address why the data provided
+  in the various extra-info descriptor fields \Qx{exit-*} is
+  insufficient or how the data overlaps. %
+} %
+
+In general, an externally operated metrics system is quite expensive
+to maintain compared to the current Tor Metrics system. %
+Furthermore, newly implemented controller events for retrieving data
+could be also a data source for an attacker, if not properly secured
+by the server operator. %
+It would require additional operation of metrics server instances,
+additional maintenance of the code-base, and additional processes to
+integrate the new data sources into the existing ones. %
+In addition, the privacy properties of such system and the security of
+their implementation would be more difficult to assess from external
+parties than the current descriptor based Tor Metrics system. %
+
+\subsection{Mitigating implementation changes}%
+\label{implchange} %
+
+The following measures are directly derived from source code analyses
+of both the metrics related Tor server code and the core Tor Metrics
+code for data aggregation and client count estimation. %
+They are generally concerned with avoiding data gathering and reducing
+the availability of sensitive data via other channels like logging or
+controller events. %
+
+\subsubsection{Reduce duration of in-memory data retention}%
+\label{retention} %
+
+Tor servers configured to report statistics keep client IP addresses
+and associated information in-memory for at least one measurement
+interval of 24 hours. %
+Unfortunately, the current code retains these IPs and related
+information for even up to two such measurement intervals (in case of
+bridges), because the old data originating from the previous interval
+is only released before writing statistics about the current
+measurement interval. %
+Erasing data immediately after computing statistics would more than
+half the retention time. %
+
+\subsubsection{Avoid problematic logging and controller events}%
+\label{problemlog} %
+
+Some of the possibly harmful data held in-memory for providing metrics
+is currently also used for logging and responding to controller
+clients. %
+
+The controller protocol is defined in \cite[control-spec.txt]{torspec}
+and allows triggering of the heartbeat log message (\cite[section 3.7,
+control-spec.txt]{torspec}). %
+Another request defined in \cite[sections 3.9 and 4.1.14,
+control-spec.txt]{torspec} to receive information from bridges about
+recent client connections.%
+\footnote{In particular this is the clients-seen event, which is used 
+by nyx \url{arm.torproject.org}.}
+The replies contain complete counts by country and transport (also see
+\Qx{geoip_bridge_stats_write}). %
+
+Using the option \Qx{HeartbeatPeriod} a Tor server can be configured
+to write a recurring log message, which serves the purpose of
+informing the operator that the server is still running and
+working.  %
+The minimal reporting interval is $30$ minutes and the statement
+logged contains the exact number of different client connections for
+the last six hours. %
+In addition, the heartbeat log message can be triggered (without any
+time constraints) by a controller client signal. %
+
+Additional logging of collected data, e.g., client counts per
+transport, exit port opened and exit bytes read/written, takes place
+in debug mode (cf.~\ref{debuglog}). %
+
+In order to improve Tor client privacy these functionalities ought to
+be changed to only report data unrelated to client IPs and only about
+time intervals equal or bigger than the chosen reporting intervals for
+extra-info descriptors. %
+
+\subsubsection{Replace problematic data sources} \label{avoid} %
+Client IPs are currently only kept in-memory for deriving estimates of
+bridge client counts where at the same time the estimates for direct
+Tor client counts are derived from counts of successful directory
+requests taking multiple requests into account as these occur usually
+as a constant factor for each client. %
+There is no reason, why the estimations should differ and the IP lists
+in-memory became obsolete, if the same estimation method for bridge
+client counts would be supplied.%
+\footnote{%
+  The question about removing the map and corresponding measurements
+  from the code that hold client IP addresses was raised a while ago,
+  for details see Tor Bugtracker \cite[ticket \#15469]{tortrac}.  } %
+This would cause the estimates to be even more comparable and also
+reduce configuration and simplify the metrics related code of the
+Tor. %
+
+Such a removal would affect the following extra-info descriptor
+fields: %
+\begin{itemize}
+\item \Qx{dirreq-v3-ips},
+\item \Qx{entry-ips},
+\item \Qx{bridge-ips},
+\item \Qx{bridge-ip-versions}, and
+\item \Qx{bridge-ip-transports}.
+\end{itemize} %
+As all extra-info descriptor fields regarding entries and bridges are
+concerned, the two fields \Qx{bridge-stats-end} and
+\Qx{entry-stats-end} would loose their meaning and could also be
+omitted. %
+
+The fields \Qx{dirreq-v3-ips} and \Qx{entry-ips} are currently not
+used for any statistics or data sets provided by Tor Metrics and could
+be dropped. %
+
+All other fields from above are the basis for bridge client count
+estimates.%
+\footnote{For details see section \ref{cstat}.  %
+  The directly connecting client count is entirely based on
+  \Qx{dirreq-v3-reqs}, which is not derived from a clientmap
+  structure.  } %
+The field \Qx{dirreq-v3-reqs} is also available in extra-info
+descriptors uploaded by bridges%
+\footnote{%
+  In $2016$ roughly $80\%$ of bridge extra-info descriptors that
+  provided \Qx{bridge-ips} also contained \Qx{dirreq-v3-reqs}. %
+} %
+and could be used for clients by country count for replacing
+\Qx{bridge-ips}. %
+The fields \Qx{bridge-ip-versions} and \Qx{bridge-ip-transports} are
+used to estimate fractions of the client counts that have their origin
+in a certain country or use a certain IP version. %
+These could be filled by counting countries and versions of the
+occurring requests registered in \Qx{dirreq-v3-reqs}, of course the
+corresponding aggregated statistics and estimates need to be
+adapted. %
+All fields mentioned above could be dropped and two new fields for
+both relay and bridge extra-info descriptors need to be added; in
+particular, \Qx{dirreq-v3-versions} and \Qx{dirreq-v3-transports}.%
+\footnote{For consistent naming it might be useful to change the field
+  name \Qx{dirreq-v3-reqs} to \Qx{dirreq-v3-countries}.} %
+This would lead to less fields in extra-info descriptors, increased
+privacy, and provide more comparable estimates for relays and
+bridges. %
+A more detailed description and analysis of the included processing
+changes for generating estimates is given in section \ref{detail}. %
+
+\subsubsection{Obfuscate stored and reported data}%
+\label{obfuscation} %
+
+Client counts per country can be very low on a server basis, e.g.,
+roughly $80\%$ of counts reported in extra-info descriptors for the
+three biggest Tor user groups (de, ru, us) only report the lowest
+count possible. %
+Raising the available threshold constants for reporting total client
+counts and client counts by country%
+\footnote{ As defined in
+  \cite[\Qx{src/or/geoip.c:658-667}]{torgit}.} %
+cannot be used as mitigation measure as it also would render most of
+the client count estimates useless. %
+Instead of using thresholds a white list could be introduced that
+lists all countries for which the count should be recorded. %
+Only countries on the white list would be added to the counting array
+and all others would be obfuscated by summing them under \Qx{other}. %
+The list itself could be provided in an easily parsable text format
+added to Tor server source code. %
+
+There are two ways to choose countries for the white list: either by
+population size or by Tor usage based on Tor client count
+statistics. %
+A choice by population count at a threshold of $2,000,000$ would lead
+to a list of $147$ white listed countries.%
+\footnote{%
+  According to the World Factbook
+  \cite[\href{https://www.cia.gov/library/publications/the-world-factbook/rankorder/2119rank.html}%
+  {countries by population size} and
+  \href{https://www.cia.gov/library/publications/the-world-factbook/rankorder/rawdata_2119.txt}%
+  {raw data}]{cia}. %
+} %
+Using the Tor usage approach a cut-off at a daily mean of $1000$ Tor
+clients would generate a list of $97$ countries based on data from
+$2016$ (cf.~table \ref{median-high}). %
+
+Either choice of generating the white-list would need to be
+re-adjusted yearly or more often, which would also cause additional
+maintenance work. %
+The second approach would be more difficult to adjust, because once a
+white-listing mechanism is introduced the data for adjusting won't be
+available anymore from Tor Metrics statistics and would need to be
+generated by other means. %
+
+It should also be evaluated, if client directory responses (field
+\Qx{dirreq-v3-resp}) and the client count related fields proposed in
+section \ref{implchange} (client counts by country
+\Qx{dirreq-v3-countries}, version \Qx{dirreq-v3-versions} and
+transport \Qx{dirreq-v3-transports}) even when not based on in-memory
+client IP lists should be obfuscated. %
+In order to obtain obfuscation for both the in-memory counts and the
+reported results noise addition at counter initialization seems to be
+an efficient measure on first glance. %
+For onion service statistics Tor Metrics implemented the generation of
+Laplace noise,%
+\footnote{\cite[proposals/238-hs-relay-stats.txt]{torspec} and
+  \cite{tr201501001,tr201504001}} %
+which could be applied in the current scenario and fosters code reuse
+of critical parts like the Laplace noise generation. %
+
+But, a simulation applying noise to collected data and processing the
+resulting data further for use in MetricsWeb showed that the
+additional noise would render the existing statistics very
+inaccurate.%
+\footnote{A closer look at the involved statistics: %
+  The current estimations for user counts by country rely on a sum of
+  reported data. %
+  In the sketched obfuscation scenario this sum would also contain a
+  sum $W_n$ of Laplace random values, where $n$ is the number of
+  reported values for the particular country. %
+  The standard deviation of $W_n$ depends on the obfuscation
+  parameters and on $\sqrt n$. %
+  A daily median (mean) of reports from relays is around $1300$ (mean:
+  $2200$) and $550$ (mean $150$) for bridges. %
+  Such values are not tolerable in the current estimation process and
+  obfuscation should only be introduced with additional measures to
+  keep the existing accuracy.  } %
+Thus, it is advisable to conduct further research and wait for the
+already planned assessment for both privacy and statistical accuracy
+throughout the data-verse of Tor Metrics. %
+
+\subsection{Conclusion} \label{conclusion}
+
+Integrating counting systems or parts thereof as discussed in section
+\ref{elaborate} would require extensive design and implementation work
+for changes of the current Tor source code and also for the
+aggregating and estimation code further up in the Tor Metrics
+processing chain. %
+
+Applying the measurement systems outlined in section \ref{priv} in Tor
+Metrics would mean a step toward using a second totally different and
+more costly manner of measuring Tor. %
+If introduced in addition to the current file based system
+(cf.~\ref{overview}) the cost of operation would be very high and the
+actual problem of in-memory retention is not addressed as the Priv*
+systems use Tor's internal accounting of client connections and other
+measured data. %
+
+The changes necessary for the third approach in \ref{avoid} affect
+both the Tor server and Tor Metrics code bases in very clear ways,
+which consist mostly in code reduction, streamlining, and using
+different fields of already parsed extra-info descriptors. %
+Thus, it seems to be the most feasible answer for improving privacy
+current Tor server code. %
+The other measures listed in section \ref{implchange} would easily fit
+into the changes necessary for applying \ref{avoid} or be obsolete
+with the introduction of these changes. %
+
+The details and various steps of avoiding and reducing data collection
+are given in section~\ref{detail}.%
+
+\section{Impact of implementation changes} \label{detail} %
+
+Section \ref{avoid} sketches a solution for avoiding the in-memory
+storage of client IP for client count metrics by replacing the source
+of vital estimates. %
+First the actual methods for client count estimation are discussed in
+\ref{cstat}. %
+Based on this the changes necessary are detailed in
+\ref{metrics-change} before identifying the changes to the Tor server
+code and the possible side effects in \ref{changes}. %
+
+\subsection{Client related estimates} \label{cstat} %
+
+The current method of estimating client numbers was introduced in
+$2013$ for both bridge and relay clients%
+\footnote{See ticket \cite[\#8462]{tortrac} and related code
+  \url{https://gitweb.torproject.org/metrics-web.git/log/?qt=grep\&q=8462}.
+  The code was integrated into Tor Metrics code during $2015$.}  %
+to replace an estimation method based on the number of unique IP
+addresses making connections to Tor servers. %
+The daily estimate uses values taken from extra-info descriptors, in
+particular the count of daily directory responses (respectively
+requests) and the number of bytes written delivering the directory
+data.%
+\footnote{Values taken from extra-info descriptor fields
+  \Qx{dirreq-v3-reqs}, \Qx{dirreq-v3-resp} and
+  \Qx{dirreq-write-history}.} %
+According to \cite{tr201210001} it suffices to estimate the total
+number of directory requests to bridges and relays, from which the
+client count is calculated directly. %
+
+The data from extra-info descriptors used for bridge related estimates
+is also available for relays. %
+Thus, it seems natural to apply the same formula for estimating relay
+client numbers. %
+Looking at the code the implementations differ for bridge and relay
+clients. %
+For relay clients the code diverts from the estimation method
+explained in \cite{tr201210001} and uses request counts per country.%
+\footnote{Listed in descriptor field \Qx{dirreq-v3-reqs}. %
+  The relevant code can be found in \cite[from line 91 of
+  \Qx{modules/clients/src/org/torproject/metrics/clients/Main.java}]{mwgit}. %
+} %
+The raw data is taken from extra-info descriptor field
+\Qx{dirreq-v3-reqs} and used to fill clients by country counts as well
+as the entire count of clients for this relay. %
+
+Bridge client counts are implemented as suggested in
+\cite{tr201210001} and are estimated from directory request responses
+as well as contact IP counts,%
+\footnote{\cite[starting at line 230 of
+  \Qx{modules/clients/src/org/torproject/metrics/clients/Main.java}]{mwgit}
+} %
+which are derived from \Qx{dirreq-v3-resp}. %
+The total value of client contacts is taken from descriptor field
+\Qx{dirreq-v3-resp} (the successful responses) and counts of
+connections from different countries is derived from the field
+\Qx{bridge-ips}. %
+The fractions for \Qx{version} use descriptor field
+\Qx{bridge-ip-versions} and \Qx{transport} is derived from
+\Qx{bridge-ip-transports}. %
+The bridge client number estimates per country build on the estimate
+for the number of total clients and derive the client numbers by
+applying the fraction per country estimated from the number of
+connections made by country, i.e., \Qx{bridge-ips}. %
+The current method of estimating caused unlikely results for the
+number of bridge clients by country. %
+The discussion and analysis of these problematic results%
+\footnote{A discussion via Tor Bugtracker \cite[ticket
+  \#18167]{tortrac} began a year ago considering the usage of various
+  fields for bridge client per country estimation, i.e.,
+  \Qx{bridge-ips} vs.~\Qx{dirreq-v3-reqs}. %
+
+  Most bridges report \Qx{dirreq-v3-reqs} already, for $2016$ almost
+  $90\%$ of bridges reporting \Qx{bridge-ips} also provided the field
+  \Qx{dirreq-v3-reqs} in their extra-info descriptor. %
+} %
+suggests that switching to a calculation of bridge client count
+estimates that uses the same extra-info descriptor fields as direct
+client count estimates would even improve the estimate. %
+
+\subsection{Data changes} \label{data-change} %
+
+The change proposed in \ref{avoid} would result in dropping the fields
+\Qx{bridge-stats-end}, \Qx{bridge-ips}, \Qx{bridge-ip-versions},
+\Qx{bridge-ip-transports}, \Qx{entry-ips}, \Qx{entry-stats-end}, and
+\Qx{dirreq-v3-ips} from extra-info descriptors. %
+Two additional fields \Qx{dirreq-v3-transports} and
+\Qx{dirreq-v3-versions} need to be added in order to keep the current
+Tor Metrics statistics about client counts.%
+\footnote{%
+  The naming is chosen along the current naming scheme that includes
+  the string \Qx{v3}. %
+  It might be useful to drop this string from all of the \Qx{dirreq-*}
+  descriptor fields. %
+} %
+
+
+\subsection{Metrics changes} \label{metrics-change} %
+
+As explained in \ref{cstat} the client count estimates for relays are
+already independent of descriptor fields that are to be dropped. %
+Using the same estimation approach for bridges would lead to more
+comparable and even more accurate results (cf.~\ref{cstat}). %
+The necessary code changes for MetricsWeb would result in unified
+processing of the two extra-info descriptor types. %
+The changes necessary for metrics-lib/DescripTor would in general
+result in providing the two new methods for the additional fields, but
+are free of changes to the parsing logic. %
+
+Another affected code base of Tor Metrics would be Onionoo, which uses
+the fields \Qx{bridge-ips}, \Qx{bridge-ip-versions}, and
+\Qx{bridge-ip-transports} for providing additional information in
+bridge {\em client documents}.%
+\footnote{%
+  See section \emph{Bridge clients objects} of the Onionoo protocol
+  definition \url{https://onionoo.torproject.org/#clients} and Onionoo
+  source code \cite[class
+  \Qx{org.torproject.onionoo.updater.ClientStatusUpdater}]{ogit}. %
+} %
+The relevant Onionoo protocol fields depending on \Qx{bridge-*}
+descriptor fields are still in {\em beta} stage and could either be
+removed or simply be filled from the new fields available, which is a
+minor code change.%
+\footnote{%
+  Taking into account that the new fields would have multiple counts
+  per day and client and would need to be adjusted with the factor
+  used for the total client count estimation.  } %
+
+\subsection{Tor server changes and side effects} \label{changes} %
+
+When describing the code changes one needs to make choices; and the
+choice here was to describe the maximal code reduction possible, but
+of course there is room to alter the proposed changes and still reach
+the intended goal. %
+
+The following gives a terse overview of the code changes necessary for
+Tor server according to section \ref{data-change}. %
+Also provided are possible Tor server configuration simplifications,
+and side effects or changes regarding logging and controller
+functionality. %
+
+\subsubsection{Server changes}
+With the changes to extra-info descriptor proposed in
+\ref{data-change} the Tor server options
+\begin{itemize}
+\item \Qx{BridgeRecordUsageByCountry} and 
+\item \Qx{EntryStatistics}
+\end{itemize}
+could be omitted and replaced by option \Qx{DirReqStatistics}, which
+could be used for all types of servers alike. %
+
+The alterations for the metrics providing code in Tor servers would
+mostly be code removal. %
+The description follows the process of collection as described in
+\ref{server-proc} in order to cover all affected places in the server
+code. %
+
+The \Qx{record_bridge_stats_callback}%
+\footnote{ \cite[\Qx{src/or/main.c}:1752-1777]{torgit} } %
+could be omitted entirely together with the following functions:
+\begin{itemize}
+\item \Qx{geoip_bridge_stats_init}, %
+\item \Qx{geoip_bridge_stats_write}, %
+\item \Qx{geoip_get_transport_history}, %
+\item \Qx{geoip_get_client_history}, and %
+\item \Qx{geoip_format_bridge_stats} (also see \ref{control-change} for
+  controller related changes). %
+\end{itemize}
+The second main metrics callback \Qx{write_stats_file_callback} would
+be kept, but shortened to not provide entry statistics anymore. %
+The affected functions would be: %
+\begin{itemize}
+\item \Qx{geoip_entry_stats_write},
+\item \Qx{geoip_format_entry_stats},
+\item \Qx{geoip_reset_entry_stats}.
+\end{itemize}
+Other functions for handling \Qx{clientmap}s:
+\begin{itemize}
+\item \Qx{geoip_remove_old_clients}, 
+\item \Qx{remove_old_client_helper_},
+\item \Qx{geoip_get_client_history}.
+\end{itemize}
+In order to record versions and transports for bridges lists of new
+structs \Qx{geoip_version_t} and \Qx{geoip_transport_t} similar to
+\Qx{geoip_country_t} would need to be defined.%
+\footnote{ \cite[\Qx{src/or/geoip.c:55-59}]{torgit}} %
+The function \Qx{geoip_note_client_seen} would need to be adapted to
+fill the new structures for recording client data. %
+In addition, the code for handling client ip statistics would need to
+be removed and the code for filling the new lists of
+\Qx{geoip_version_t} and \Qx{geoip_transport_t} would need to be
+added. %
+
+Changes would also be necessary for \Qx{geoip_dirreq_stats_write},
+which is called from \Qx{write_stats_file_callback}. %
+This function would need to be adapted to omit writing the dropped
+descriptor fields and add the new descriptor fields derived from the
+above mentioned structures. %
+Any calls to \Qx{geoip_note_client_seen} with action
+\Qx{GEOIP_CLIENT_CONNECT} could also be removed. %
+
+\subsubsection{Affected controller events} \label{control-change}
+
+Once \Qx{clientmap} structures and related code are removed from
+Tor server code the controller code also needs to be changed. %
+The functions \Qx{format_bridge_stats_controller} and
+\Qx{control_event_clients_seen} would either need to be removed or
+adapted to the new structures for recording the counts. %
+
+Another affected controller function is
+\Qx{format_client_stats_heartbeat}, which would need to be adapted to
+not report the client counts by country anymore. %
+
+\section{Summary} \label{summary} %
+
+The previous sections of this report describe the Tor Metrics
+processing chain and the data provided by Tor Metrics with the aim to
+identify several ways to improve privacy regarding data held in-memory
+for clients of the Tor network. %
+Possible mitigation measures are surveyed and the most feasible
+approach was detailed in section~\ref{detail}. %
+
+Many of the discussed improvements generate a workload for several
+future projects and some also need further research. %
+Nevertheless, a recommendation for a list of first changes can be
+derived: %
+
+\begin{itemize}
+\item Replace the current server internal counting mechanism in order
+  to avoid holding client IPs in-memory. %
+  This leads to the immediate privacy improvement of not keeping
+  client IPs in-memory for statistical purposes. 
+\item Use \Qx{dirreq-v3-reqs} for client count estimation (for both
+  bridges and relays, as suggested in \ref{avoid}). %
+  This would keep the statistics on client count as accurate as before
+  without relying on client IP lists. %
+\item Base the new fields \Qx{dirreq-v3-versions} and
+  \Qx{dirreq-v3-transports} on the current counting mechanism used for
+  \Qx{dirreq-v3-reqs}. %
+  This would also keep the statistics based on client count as
+  accurate as before without relying on client IP lists. %
+\item Remove controller protocol parts that rely on the old client
+  count mechanism. %
+  This would avoid reporting privacy impacting data to the control
+  port.
+\item Remove unnecessary logging of vital data or tie the logging to
+  test-mode for avoiding privacy impacting data in logs. 
+\end{itemize}
+
+These changes have a clearly defined scope and would result in privacy
+improvement. %
+Identifying immediate changes for implementation and defining future
+changes for metrics collection is based on the following steps: %
+\begin{itemize}
+\item Distill a change proposal for the Tor server changes chosen to
+  be implemented.
+\item Provide several Tor server patches for the changes identified
+  above.
+\item Provide patches for the necessary adaptions in the Tor Metrics
+  processing chain.
+\item Assess privacy questions as raised in this report and
+  statistical accuracy throughout the data-verse of Tor Metrics. %
+  Also assess the introduction of more obfuscation measures for
+  various client counts without impacting estimation accuracy. %
+  In addition, the removal of unused data fields from extra-info
+  descriptors (as identified in section \ref{unused}) should be
+  addressed and evaluated. %
+\end{itemize}
+
+The assessment listed in the last item above is in part a consequence
+of this report, which is planned to start in the second half of
+$2017$. %
+
+\pagebreak
+
+\appendix
+\section{Appendix} %
+\subsection{Tables}
+\begin{table}[!h]
+\begin{subtable}{0.45 \textwidth }
+  \begin{tabular}{|c||c|c|c|c|c|
+}\hline
+    $m$ &  10 & 50 & 130 & 210 & 340 \\\hline
+    $C$ &  25 & 52 &  87 & 103 & 116 \\\hline
+  \end{tabular}
+  \caption{Count $C=|\{\rm{median}(c) < m\}|$ of countries with median
+    of daily users in $2016$ less than the given limit $m$.}
+  \label{median-low}
+\end{subtable}\quad
+\begin{subtable}{0.45 \textwidth }
+  \begin{tabular}{|c||c|c|c|c|c|c|c|}\hline
+    $m$ in $10^3$ &  1 &  5 & 10 & 50 & 100 & 200 & 300 \\\hline
+    $C$           & 97 & 51 & 26 & 5 &   4 &   2 &   1 \\\hline
+  \end{tabular}
+  \caption{Count $C=|\{\rm{median}(c) > m\}|$ of countries with median
+    of daily users in $2016$ higher than the given limit $m$ thousands.}
+  \label{median-high}
+\end{subtable}
+\caption{Count of countries with median of daily users in $2016$. 
+ There are roughly $250$ countries, and $433.5$ is the median of the median
+  daily client count of all countries in $2016$.}
+\end{table}
+\begin{table}[!h]
+  \begin{tabular}[h]{|r|l|}\hline
+    Figure & Source \\\hline
+    \ref{antarctica-relay} & {\tiny \url{https://metrics.torproject.org/userstats-relay-country.html?start=2016-01-01&end=2016-12-31&country=aq}}\\
+    \ref{antarctica-bridge} & {\tiny \url{https://metrics.torproject.org/userstats-bridge-country.html?start=2016-01-01&end=2016-12-31&country=aq}}\\
+    \ref{antarctica-combi} & {\tiny \url{https://metrics.torproject.org/userstats-bridge-combined.html?start=2016-01-01&end=2016-12-31&country=aq}}\\\hline
+    \ref{vatican-relay} & {\tiny \url{https://metrics.torproject.org/userstats-relay-country.html?start=2016-01-01&end=2016-12-31&country=va}}\\
+    \ref{vatican-bridge} & {\tiny \url{https://metrics.torproject.org/userstats-bridge-country.html?start=2016-01-01&end=2016-12-31&country=va}}\\
+    \ref{vatican-combi} & {\tiny \url{https://metrics.torproject.org/userstats-bridge-combined.html?start=2016-01-01&end=2016-12-31&country=va}}\\\hline
+  \end{tabular}
+  \caption{Graph source URLs.}
+  \label{urls}
+\end{table}
+\pagebreak
+\subsection{Extra-info descriptor examples}\label{descriptors} %
+\subsubsection{Bridge extra-info descriptor}\label{bei} %
+\begin{Verbatim}[gobble=0,fontsize=\fontsize{3mm}{1mm}]
+ at type bridge-extra-info 1.3
+extra-info Unnamed EF93668E48BD4F8DB9DF6D4CFCBF1A7BB5EC7CC2
+master-key-ed25519 a3febLYkK9UmKf4PDhrw/cTefN1l5X0LsAt7BqdcrLM
+published 2017-03-01 17:14:17
+write-history 2017-03-01 14:10:28 (14400 s) 2253824,1248256,1308672,489472,592896,300560384
+read-history 2017-03-01 14:10:28 (14400 s) 6366208,5633024,7112704,4847616,6200320,306330624
+dirreq-write-history 2017-03-01 14:10:28 (14400 s) 1581056,683008,673792,36864,33792,662528
+dirreq-read-history 2017-03-01 14:10:28 (14400 s) 56320,9216,4096,4096,2048,2048
+geoip-db-digest C14DF5AE94101562DEACDD296278B0EFA3EA26E5
+geoip6-db-digest A88A828020A558D37F97CF683D4521270F0511A2
+dirreq-stats-end 2017-03-01 15:01:19 (86400 s)
+dirreq-v3-ips in=8,mx=8,ru=8
+dirreq-v3-reqs in=8,mx=8,ru=8
+dirreq-v3-resp ok=8,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0
+dirreq-v3-direct-dl complete=0,timeout=0,running=0
+dirreq-v3-tunneled-dl complete=8,timeout=0,running=0
+transport scramblesuit
+transport obfs3
+transport obfs4
+transport fte
+bridge-stats-end 2017-03-01 15:03:06 (86400 s)
+bridge-ips in=8,ir=8,mx=8,ru=8
+bridge-ip-versions v4=8,v6=0
+bridge-ip-transports obfs3=8,obfs4=8,scramblesuit=8
+router-digest-sha256 50hLT2H4vDO42C/fRWIgV5j3CTldi+ZMPyY3V0IYQSE
+router-digest 76BC2C857FDBED685085B16E3852799EF81A7B86
+\end{Verbatim}
+
+\subsubsection{Relay extra-info descriptor}\label{rei} %
+
+\begin{Verbatim}[gobble=0,fontsize=\fontsize{3mm}{1mm}]
+ at type extra-info 1.0
+extra-info Pounet27TorRelay EFE68EB2D54E657B5BBF4EB18627646F8DCF66C9
+published 2016-12-04 13:01:45
+write-history 2016-12-04 10:18:03 (14400 s) 57720832,70514688,199539712,...
+read-history 2016-12-04 10:18:03 (14400 s) 64663552,74992640,199556096,498191360,...
+dirreq-write-history 2016-12-04 10:18:03 (14400 s) 2048,652288,1426432,937984,...
+dirreq-read-history 2016-12-04 10:18:03 (14400 s) 4096,13312,23552,263168,24576,7168
+geoip-db-digest C1EB5237F2FBAF63381D8551157F13D12EFCCA25
+geoip6-db-digest 1F99B6B0EC78E9DB34D61AE7E0FC261D558E8E5D
+dirreq-stats-end 2016-12-03 13:24:35 (86400 s)
+dirreq-v3-ips de=8,ua=8
+dirreq-v3-reqs de=8,ua=8
+dirreq-v3-resp ok=8,not-enough-sigs=0,unavailable=0,not-found=0,not-modified=0,busy=0
+dirreq-v3-direct-dl complete=0,timeout=0,running=0
+dirreq-v3-tunneled-dl complete=4,timeout=8,running=0
+hidserv-stats-end 2016-12-03 18:35:50 (86400 s)
+hidserv-rend-relayed-cells 2876020 delta_f=2048 epsilon=0.30 bin_size=1024
+hidserv-dir-onions-seen 254 delta_f=8 epsilon=0.30 bin_size=8
+entry-stats-end 2016-12-03 18:35:50 (86400 s)
+entry-ips us=1064,it=504,fr=472,de=456,es=408,br=224,ru=216,jp=208,pl=192,gb=128,ar=120,th=104,ua=104,nl=88,ca=80,in=80,bg=72,se=72,at=56,mx=56,gr=48,tw=48,au=40,be=40,ch=40,cz=40,id=40,ro=40,sa=40,co=32,pt=32,ve=32,ae=24,cl=24,eg=24,hu=24,il=24,ma=24,my=24,ng=24,pe=24,za=24,dk=16,dz=16,ec=16,hk=16,hr=16,ie=16,lt=16,lv=16,ph=16,pk=16,rs=16,sg=16,sk=16,sn=16,tn=16,tr=16,vn=16,??=8,al=8,am=8,ao=8,az=8,ba=8,bd=8,bf=8,bh=8,bj=8,bn=8,bo=8,by=8,cd=8,ci=8,cm=8,cn=8,cr=8,cy=8,do=8,ee=8,fi=8,ge=8,gh=8,gp=8,gt=8,gu=8,hn=8,iq=8,ir=8,is=8,jm=8,jo=8,ke=8,kh=8,kr=8,kz=8,la=8,lb=8,lk=8,lr=8,lu=8,ly=8,md=8,mg=8,mk=8,mr=8,mt=8,mu=8,ni=8,no=8,np=8,nz=8,om=8,pa=8,pf=8,pr=8,ps=8,py=8,qa=8,re=8,sc=8,si=8,sv=8,sy=8,tg=8,tt=8,uy=8,xk=8,ye=8
+cell-stats-end 2017-01-30 18:35:50 (86400 s)
+cell-processed-cells 5430,23,10,8,7,4,4,3,2,1
+cell-queued-cells 0.38,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
+cell-time-in-queue 56,0,0,0,0,0,0,0,0,3
+cell-circuits-per-decile 15573
+conn-bi-direct 2016-12-03 18:35:50 (86400 s) 1417304,46267,48669,100569
+router-sig-ed25519 pTwQjRcWzRYJyyhIdfcLia2vhVpn0GgRth7+IpNbyvnATzs5UjQv6v72WSNg8mwg9RzdOpDd+zMQrf5clUnEDA
+router-signature
+-----BEGIN SIGNATURE-----
+M7Ru2Lfaul9AUcmfZ6VFeOkc5kfOmlkQmbescB0aBAYFr0YaC+qbVZKhPEEvNB8d
+s6TBjpW5zWmqnDyLNI8klOFtt1Nm0k76Vfb/0Cx5jfiTx0ViyXC0zC0VBG1jmUkX
+FxMvXwC049xv2JVXvUupe83xt/13OIgDV0Z8kWYR64g=
+-----END SIGNATURE-----
+\end{Verbatim}
+\pagebreak
+\bibliography{references}%
+\cite{fm85,epk14,hambolu14,ts11} were recommended by Nick Mathewson 
+  (cf.~\href{https://trac.torproject.org/projects/tor/ticket/15469}
+  {Tor bug tracker ticket \#15469}, last accessed 2017-04-05).
+
+\end{document}
diff --git a/2017/metrics-privacy/references.bib b/2017/metrics-privacy/references.bib
new file mode 100644
index 0000000..71a4eba
--- /dev/null
+++ b/2017/metrics-privacy/references.bib
@@ -0,0 +1,153 @@
+ at misc{torspec,
+  author = {Roger Dingledine and Nick Mathewson},
+  title = {Tor Protocol Specification},
+  howpublished = {\url{https://gitweb.torproject.org/torspec.git/}},
+  note = {{\small Commit 8eee5024f66d4816d63b341550c01ba4ab059bfc}}
+}
+
+ at misc{mwgit,
+  author = {The Tor Project},
+  title = {Metrics Web Source Code},
+  howpublished = {\url{https://gitweb.torproject.org/metrics-web.git/tree}},
+  note = {{\small Commit 8bf149b0a89227c56e97a228b2558cacfcecc158}}
+}
+
+ at misc{ogit,
+  author = {The Tor Project},
+  title = {Onionoo source code},
+  howpublished = {\url{https://gitweb.torproject.org/onionoo.git/tree}},
+  note = {{\small Commit 5b219203b8781b27518133ad7d76e636e82d7fe5}}
+}
+
+ at misc{torgit,
+  author = {The Tor Project},
+  title = {Tor Source Code},
+  howpublished = {\url{https://gitweb.torproject.org/tor.git/tree}},
+  note = {{\small Commit a3ce303432f35a6f06f63f0679b9bb577f88dc3c}}
+}
+
+ at misc{cia,
+ author= {{Washington, DC: Central Intelligence Agency}},
+ title={The World Factbook 2013-14},
+ year={2013},
+ howpublished={\url{https://www.cia.gov/library/publications/the-world-factbook/index.html}},
+ note = {Accessed 2017-04-24}
+}
+
+ at article{fm85,
+ author = {Flajolet, Philippe and Martin, G. Nigel},
+ title = {Probabilistic Counting Algorithms for Data Base Applications},
+ journal = {J. Comput. Syst. Sci.},
+ issue_date = {September 1985},
+ volume = {31},
+ number = {2},
+ year = {1985},
+ issn = {0022-0000},
+ pages = {182--209},
+ numpages = {28},
+ url = {http://dx.doi.org/10.1016/0022-0000(85)90041-8},
+ doi = {10.1016/0022-0000(85)90041-8},
+ acmid = {5215},
+ publisher = {Academic Press},
+ address = {Orlando, FL, USA},
+}
+
+ at inproceedings{epk14,
+ title={RAPPOR: Randomized Aggregatable Privacy-Preserving Ordinal Response},
+ author={Erlingsson, {\'U}lfar and Pihur, Vasyl and Korolova, Aleksandra},
+ booktitle={Proceedings of the 2014 ACM SIGSAC Conference on Computer and Communications Security},
+ pages={1054--1067},
+ year={2014},
+ organization={ACM}
+}
+
+ at mastersthesis{hambolu14,
+ title={Privacy Preserving Statistics},
+ author={Oluwakemi Hambolu},
+ year={2014},
+ school={Clemson University, South Carolina, USA},
+ type={{Master of Science in Computer Engineering}}
+}
+
+ at conference{ts11,
+ author = "Florian Tschorsch and Bj{\"o}rn Scheuermann",
+ title = "Distributed Privacy-Aware User Counting",
+ year = 2011,
+ booktitle = "HotPETs '11: 4th Workshop on Hot Topics in Privacy Enhancing Technologies",
+ url = "https://petsymposium.org/2011/papers/hotpets11-final5Tschorsch.pdf"
+}
+
+ at techreport{tr201501001,
+  author = {David Goulet and Aaron Johnson and George Kadianakis and Karsten Loesing},
+  title = {Extrapolating network totals from hidden-service statistics},
+  institution = {The Tor Project},
+  number = {2015-01-001},
+  year = {2015},
+  url = {https://research.torproject.org/techreports/extrapolating-hidserv-stats-2015-01-31.pdf}
+}
+
+ at techreport{tr201210001,
+  author = {Karsten Loesing},
+  title = {Counting daily bridge users},
+  institution = {The Tor Project},
+  number = {2012-10-001},
+  year = {2012},
+  url = {https://research.torproject.org/techreports/counting-daily-bridge-users-2012-10-24.pdf}
+}
+
+ at techreport{tr201504001,
+  author = {George Kadianakis and Karsten Loesing},
+  title = {Hidden-service statistics reported by relays},
+  institution = {The Tor Project},
+  number = {2015-04-001},
+  year = {2015},
+  url = {https://research.torproject.org/techreports/hidden-service-stats-2015-04-28.pdf},
+}
+
+ at techreport{tr200908001,
+  author = {Karsten Loesing},
+  title = {Analysis of Circuit Queues in Tor},
+  institution = {The Tor Project},
+  number = {2009-08-001},
+  year = {2009},
+  url = {https://research.torproject.org/techreports/bufferstats-2009-08-25.pdf},
+}
+
+ at techreport{tr201109001,
+ author={George Danezis},
+ number={2011-09-001},
+ institution = {The Tor Project},
+ title={An anomaly-based censorship-detection system for Tor},
+ year={2011},
+ url={https://research.torproject.org/techreports/detector-2011-09-09.pdf},
+}
+
+ at misc{tortrac,
+  author = {{The Tor Project}},
+  title = {{Tor Bugtracker}},
+  howpublished = {\url{https://trac.torproject.org/}},
+}
+
+ at inproceedings{privcount,
+  title = {Safely Measuring Tor}, 
+  author = {Rob Jansen and Aaron Johnson}, 
+  booktitle = {Proceedings of the 23rd ACM Conference on Computer and Communications Security (CCS '16)}, 
+  pages= {1553--1567},
+  doi = {10.1145/2976749.2978310},
+  year = {2016}, 
+  month = {October}
+}
+
+ at inproceedings{privex,
+ author = {Elahi, Tariq and Danezis, George and Goldberg, Ian},
+ title = {{PrivEx:} Private Collection of Traffic Statistics for Anonymous Communication Networks},
+ booktitle = {Proceedings of the 2014 ACM SIGSAC Conference on Computer and Communications Security},
+ series = {CCS '14},
+ year = {2014},
+ isbn = {978-1-4503-2957-6},
+ location = {Scottsdale, Arizona, USA},
+ pages = {1068--1079},
+ numpages = {12},
+ url = {http://doi.acm.org/10.1145/2660267.2660280},
+ doi = {10.1145/2660267.2660280},
+}
diff --git a/2017/metrics-privacy/tortechrep.cls b/2017/metrics-privacy/tortechrep.cls
new file mode 120000
index 0000000..4c24db2
--- /dev/null
+++ b/2017/metrics-privacy/tortechrep.cls
@@ -0,0 +1 @@
+../../tortechrep.cls
\ No newline at end of file





More information about the tor-commits mailing list