[tor-commits] [onionoo/master] Add new document with per-bridge usage statistics.

karsten at torproject.org karsten at torproject.org
Tue Mar 11 07:53:28 UTC 2014


commit 985f9bf7a5b1301ef1db6217906e2808f01c0dc7
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Thu Mar 6 16:46:10 2014 +0100

    Add new document with per-bridge usage statistics.
    
    Implements #10331.
---
 etc/web.xml.template                              |    4 +
 src/org/torproject/onionoo/ClientsDataWriter.java |  633 +++++++++++++++++++++
 src/org/torproject/onionoo/ClientsDocument.java   |    8 +
 src/org/torproject/onionoo/ClientsStatus.java     |    5 +
 src/org/torproject/onionoo/DocumentStore.java     |   20 +-
 src/org/torproject/onionoo/Main.java              |    4 +-
 src/org/torproject/onionoo/ResourceServlet.java   |    2 +
 src/org/torproject/onionoo/ResponseBuilder.java   |   38 +-
 web/index.html                                    |  236 ++++++++
 9 files changed, 946 insertions(+), 4 deletions(-)

diff --git a/etc/web.xml.template b/etc/web.xml.template
index 25314aa..53a1878 100644
--- a/etc/web.xml.template
+++ b/etc/web.xml.template
@@ -36,6 +36,10 @@
     <servlet-name>Resource</servlet-name>
     <url-pattern>/weights</url-pattern>
   </servlet-mapping>
+  <servlet-mapping>
+    <servlet-name>Resource</servlet-name>
+    <url-pattern>/clients</url-pattern>
+  </servlet-mapping>
 
 </web-app>
 
diff --git a/src/org/torproject/onionoo/ClientsDataWriter.java b/src/org/torproject/onionoo/ClientsDataWriter.java
new file mode 100644
index 0000000..9e868a4
--- /dev/null
+++ b/src/org/torproject/onionoo/ClientsDataWriter.java
@@ -0,0 +1,633 @@
+/* Copyright 2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.onionoo;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+
+/*
+ * Example extra-info descriptor used as input:
+ *
+ * extra-info ndnop2 DE6397A047ABE5F78B4C87AF725047831B221AAB
+ * dirreq-stats-end 2014-02-16 16:42:11 (86400 s)
+ * dirreq-v3-resp ok=856,not-enough-sigs=0,unavailable=0,not-found=0,
+ *   not-modified=40,busy=0
+ * bridge-stats-end 2014-02-16 16:42:17 (86400 s)
+ * bridge-ips ??=8,in=8,se=8
+ * bridge-ip-versions v4=8,v6=0
+ *
+ * Clients status file produced as intermediate output:
+ *
+ * 2014-02-15 16:42:11 2014-02-16 00:00:00
+ *   259.042 in=86.347,se=86.347  v4=259.042
+ * 2014-02-16 00:00:00 2014-02-16 16:42:11
+ *   592.958 in=197.653,se=197.653  v4=592.958
+ *
+ * Clients document file produced as output:
+ *
+ * "1_month":{
+ *   "first":"2014-02-03 12:00:00",
+ *   "last":"2014-02-28 12:00:00",
+ *   "interval":86400,
+ *   "factor":0.139049349,
+ *   "count":26,
+ *   "values":[371,354,349,374,432,null,485,458,493,536,null,null,524,576,
+ *             607,622,null,635,null,566,774,999,945,690,656,681],
+ *   "countries":{"cn":0.0192,"in":0.1768,"ir":0.2487,"ru":0.0104,
+ *                "se":0.1698,"sy":0.0325,"us":0.0406},
+ *   "transports":{"obfs2":0.4581},
+ *   "versions":{"v4":1.0000}}
+ */
+public class ClientsDataWriter implements DataWriter, DescriptorListener {
+
+  private static class ResponseHistory
+      implements Comparable<ResponseHistory> {
+    private long startMillis;
+    private long endMillis;
+    private double totalResponses;
+    private SortedMap<String, Double> responsesByCountry;
+    private SortedMap<String, Double> responsesByTransport;
+    private SortedMap<String, Double> responsesByVersion;
+    private ResponseHistory(long startMillis, long endMillis,
+        double totalResponses,
+        SortedMap<String, Double> responsesByCountry,
+        SortedMap<String, Double> responsesByTransport,
+        SortedMap<String, Double> responsesByVersion) {
+      this.startMillis = startMillis;
+      this.endMillis = endMillis;
+      this.totalResponses = totalResponses;
+      this.responsesByCountry = responsesByCountry;
+      this.responsesByTransport = responsesByTransport;
+      this.responsesByVersion = responsesByVersion;
+    }
+    public static ResponseHistory fromString(
+        String responseHistoryString) {
+      String[] parts = responseHistoryString.split(" ", 8);
+      if (parts.length != 8) {
+        return null;
+      }
+      long startMillis = -1L, endMillis = -1L;
+      SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      dateTimeFormat.setLenient(false);
+      dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      try {
+        startMillis = dateTimeFormat.parse(parts[0] + " " + parts[1]).
+            getTime();
+        endMillis = dateTimeFormat.parse(parts[2] + " " + parts[3]).
+            getTime();
+      } catch (ParseException e) {
+        return null;
+      }
+      if (startMillis >= endMillis) {
+        return null;
+      }
+      double totalResponses = 0.0;
+      try {
+        totalResponses = Double.parseDouble(parts[4]);
+      } catch (NumberFormatException e) {
+        return null;
+      }
+      SortedMap<String, Double> responsesByCountry =
+          parseResponses(parts[5]);
+      SortedMap<String, Double> responsesByTransport =
+          parseResponses(parts[6]);
+      SortedMap<String, Double> responsesByVersion =
+          parseResponses(parts[7]);
+      if (responsesByCountry == null || responsesByTransport == null ||
+          responsesByVersion == null) {
+        return null;
+      }
+      return new ResponseHistory(startMillis, endMillis, totalResponses,
+          responsesByCountry, responsesByTransport, responsesByVersion);
+    }
+    private static SortedMap<String, Double> parseResponses(
+        String responsesString) {
+      SortedMap<String, Double> responses = new TreeMap<String, Double>();
+      if (responsesString.length() > 0) {
+        for (String pair : responsesString.split(",")) {
+          String[] keyValue = pair.split("=");
+          if (keyValue.length != 2) {
+            return null;
+          }
+          double value = 0.0;
+          try {
+            value = Double.parseDouble(keyValue[1]);
+          } catch (NumberFormatException e) {
+            return null;
+          }
+          responses.put(keyValue[0], value);
+        }
+      }
+      return responses;
+    }
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+          "yyyy-MM-dd HH:mm:ss");
+      dateTimeFormat.setLenient(false);
+      dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+      sb.append(dateTimeFormat.format(startMillis));
+      sb.append(" " + dateTimeFormat.format(endMillis));
+      sb.append(" " + String.format("%.3f", this.totalResponses));
+      this.appendResponses(sb, this.responsesByCountry);
+      this.appendResponses(sb, this.responsesByTransport);
+      this.appendResponses(sb, this.responsesByVersion);
+      return sb.toString();
+    }
+    private void appendResponses(StringBuilder sb,
+        SortedMap<String, Double> responses) {
+      sb.append(" ");
+      int written = 0;
+      for (Map.Entry<String, Double> e : responses.entrySet()) {
+        sb.append((written++ > 0 ? "," : "") + e.getKey() + "="
+            + String.format("%.3f", e.getValue()));
+      }
+    }
+    public void addResponses(ResponseHistory other) {
+      this.totalResponses += other.totalResponses;
+      this.addResponsesByCategory(this.responsesByCountry,
+          other.responsesByCountry);
+      this.addResponsesByCategory(this.responsesByTransport,
+          other.responsesByTransport);
+      this.addResponsesByCategory(this.responsesByVersion,
+          other.responsesByVersion);
+      if (this.startMillis > other.startMillis) {
+        this.startMillis = other.startMillis;
+      }
+      if (this.endMillis < other.endMillis) {
+        this.endMillis = other.endMillis;
+      }
+    }
+    private void addResponsesByCategory(
+        SortedMap<String, Double> thisResponses,
+        SortedMap<String, Double> otherResponses) {
+      for (Map.Entry<String, Double> e : otherResponses.entrySet()) {
+        if (thisResponses.containsKey(e.getKey())) {
+          thisResponses.put(e.getKey(), thisResponses.get(e.getKey())
+              + e.getValue());
+        } else {
+          thisResponses.put(e.getKey(), e.getValue());
+        }
+      }
+    }
+    public int compareTo(ResponseHistory other) {
+      return this.startMillis < other.startMillis ? -1 :
+          this.startMillis > other.startMillis ? 1 : 0;
+    }
+    public boolean equals(Object other) {
+      return other instanceof ResponseHistory &&
+          this.startMillis == ((ResponseHistory) other).startMillis;
+    }
+  }
+
+  private DescriptorSource descriptorSource;
+
+  private DocumentStore documentStore;
+
+  private long now;
+
+  public ClientsDataWriter(DescriptorSource descriptorSource,
+      DocumentStore documentStore, Time time) {
+    this.descriptorSource = descriptorSource;
+    this.documentStore = documentStore;
+    this.now = time.currentTimeMillis();
+    this.registerDescriptorListeners();
+  }
+
+  private void registerDescriptorListeners() {
+    this.descriptorSource.registerListener(this,
+        DescriptorType.BRIDGE_EXTRA_INFOS);
+  }
+
+  public void processDescriptor(Descriptor descriptor, boolean relay) {
+    if (descriptor instanceof ExtraInfoDescriptor && !relay) {
+      this.processBridgeExtraInfoDescriptor(
+          (ExtraInfoDescriptor) descriptor);
+    }
+  }
+
+  private static final long ONE_HOUR_MILLIS = 60L * 60L * 1000L,
+      ONE_DAY_MILLIS = 24L * ONE_HOUR_MILLIS;
+
+  private SortedMap<String, SortedSet<ResponseHistory>> newResponses =
+      new TreeMap<String, SortedSet<ResponseHistory>>();
+
+  private void processBridgeExtraInfoDescriptor(
+      ExtraInfoDescriptor descriptor) {
+    long dirreqStatsEndMillis = descriptor.getDirreqStatsEndMillis();
+    long dirreqStatsIntervalLengthMillis =
+        descriptor.getDirreqStatsIntervalLength() * 1000L;
+    SortedMap<String, Integer> responses = descriptor.getDirreqV3Resp();
+    if (dirreqStatsEndMillis < 0L ||
+        dirreqStatsIntervalLengthMillis != ONE_DAY_MILLIS ||
+        responses == null || !responses.containsKey("ok")) {
+      return;
+    }
+    double okResponses = (double) (responses.get("ok") - 4);
+    if (okResponses < 0.0) {
+      return;
+    }
+    String hashedFingerprint = descriptor.getFingerprint().toUpperCase();
+    long dirreqStatsStartMillis = dirreqStatsEndMillis
+        - dirreqStatsIntervalLengthMillis;
+    long utcBreakMillis = (dirreqStatsEndMillis / ONE_DAY_MILLIS)
+        * ONE_DAY_MILLIS;
+    for (int i = 0; i < 2; i++) {
+      long startMillis = i == 0 ? dirreqStatsStartMillis : utcBreakMillis;
+      long endMillis = i == 0 ? utcBreakMillis : dirreqStatsEndMillis;
+      if (startMillis >= endMillis) {
+        continue;
+      }
+      double totalResponses = okResponses
+          * ((double) (endMillis - startMillis))
+          / ((double) ONE_DAY_MILLIS);
+      SortedMap<String, Double> responsesByCountry =
+          this.weightResponsesWithUniqueIps(totalResponses,
+          descriptor.getBridgeIps(), "??");
+      SortedMap<String, Double> responsesByTransport =
+          this.weightResponsesWithUniqueIps(totalResponses,
+          descriptor.getBridgeIpTransports(), "<??>");
+      SortedMap<String, Double> responsesByVersion =
+          this.weightResponsesWithUniqueIps(totalResponses,
+          descriptor.getBridgeIpVersions(), "");
+      ResponseHistory newResponseHistory = new ResponseHistory(
+          startMillis, endMillis, totalResponses, responsesByCountry,
+          responsesByTransport, responsesByVersion); 
+      if (!this.newResponses.containsKey(hashedFingerprint)) {
+        this.newResponses.put(hashedFingerprint,
+            new TreeSet<ResponseHistory>());
+      }
+      this.newResponses.get(hashedFingerprint).add(
+          newResponseHistory);
+    }
+  }
+
+  private SortedMap<String, Double> weightResponsesWithUniqueIps(
+      double totalResponses, SortedMap<String, Integer> uniqueIps,
+      String omitString) {
+    SortedMap<String, Double> weightedResponses =
+        new TreeMap<String, Double>();
+    int totalUniqueIps = 0;
+    if (uniqueIps != null) {
+      for (Map.Entry<String, Integer> e : uniqueIps.entrySet()) {
+        if (e.getValue() > 4) {
+          totalUniqueIps += e.getValue() - 4;
+        }
+      }
+    }
+    if (totalUniqueIps > 0) {
+      for (Map.Entry<String, Integer> e : uniqueIps.entrySet()) {
+        if (!e.getKey().equals(omitString) && e.getValue() > 4) {
+          weightedResponses.put(e.getKey(),
+              (((double) (e.getValue() - 4)) * totalResponses)
+              / ((double) totalUniqueIps));
+        }
+      }
+    }
+    return weightedResponses;
+  }
+
+  public void updateStatuses() {
+    for (Map.Entry<String, SortedSet<ResponseHistory>> e :
+        this.newResponses.entrySet()) {
+      String hashedFingerprint = e.getKey();
+      SortedSet<ResponseHistory> history =
+          this.readHistory(hashedFingerprint);
+      this.addToHistory(history, e.getValue());
+      history = this.compressHistory(history);
+      this.writeHistory(hashedFingerprint, history);
+    }
+    Logger.printStatusTime("Updated clients status files");
+  }
+
+  private SortedSet<ResponseHistory> readHistory(
+      String hashedFingerprint) {
+    SortedSet<ResponseHistory> history = new TreeSet<ResponseHistory>();
+    ClientsStatus clientsStatus = this.documentStore.retrieve(
+        ClientsStatus.class, false, hashedFingerprint);
+    if (clientsStatus != null) {
+      Scanner s = new Scanner(clientsStatus.documentString);
+      while (s.hasNextLine()) {
+        String line = s.nextLine();
+        ResponseHistory parsedLine = ResponseHistory.fromString(line);
+        if (parsedLine != null) {
+          history.add(parsedLine);
+        } else {
+          System.err.println("Could not parse clients history line '"
+              + line + "' for fingerprint '" + hashedFingerprint
+              + "'.  Skipping."); 
+        }
+      }
+      s.close();
+    }
+    return history;
+  }
+
+  private void addToHistory(SortedSet<ResponseHistory> history,
+      SortedSet<ResponseHistory> newIntervals) {
+    for (ResponseHistory interval : newIntervals) {
+      if ((history.headSet(interval).isEmpty() ||
+          history.headSet(interval).last().endMillis <=
+          interval.startMillis) &&
+          (history.tailSet(interval).isEmpty() ||
+          history.tailSet(interval).first().startMillis >=
+          interval.endMillis)) {
+        history.add(interval);
+      }
+    }
+  }
+
+  private SortedSet<ResponseHistory> compressHistory(
+      SortedSet<ResponseHistory> history) {
+    SortedSet<ResponseHistory> compressedHistory =
+        new TreeSet<ResponseHistory>();
+    ResponseHistory lastResponses = null;
+    for (ResponseHistory responses : history) {
+      long intervalLengthMillis;
+      if (this.now - responses.endMillis <=
+          92L * 24L * 60L * 60L * 1000L) {
+        intervalLengthMillis = 24L * 60L * 60L * 1000L;
+      } else if (this.now - responses.endMillis <=
+          366L * 24L * 60L * 60L * 1000L) {
+        intervalLengthMillis = 2L * 24L * 60L * 60L * 1000L;
+      } else {
+        intervalLengthMillis = 10L * 24L * 60L * 60L * 1000L;
+      }
+      if (lastResponses != null &&
+          lastResponses.endMillis == responses.startMillis &&
+          ((lastResponses.endMillis - 1L) / intervalLengthMillis) ==
+          ((responses.endMillis - 1L) / intervalLengthMillis)) {
+        lastResponses.addResponses(responses);
+      } else {
+        if (lastResponses != null) {
+          compressedHistory.add(lastResponses);
+        }
+        lastResponses = responses;
+      }
+    }
+    if (lastResponses != null) {
+      compressedHistory.add(lastResponses);
+    }
+    return compressedHistory;
+  }
+
+  private void writeHistory(String hashedFingerprint,
+      SortedSet<ResponseHistory> history) {
+    StringBuilder sb = new StringBuilder();
+    for (ResponseHistory responses : history) {
+      sb.append(responses.toString() + "\n");
+    }
+    ClientsStatus clientsStatus = new ClientsStatus();
+    clientsStatus.documentString = sb.toString();
+    this.documentStore.store(clientsStatus, hashedFingerprint);
+  }
+
+  public void updateDocuments() {
+    for (String hashedFingerprint : this.newResponses.keySet()) {
+      SortedSet<ResponseHistory> history =
+          this.readHistory(hashedFingerprint);
+      ClientsDocument clientsDocument = new ClientsDocument();
+      clientsDocument.documentString = this.formatHistoryString(
+          hashedFingerprint, history);
+      this.documentStore.store(clientsDocument, hashedFingerprint);
+    }
+    Logger.printStatusTime("Wrote clients document files");
+  }
+
+  private String[] graphNames = new String[] {
+      "1_week",
+      "1_month",
+      "3_months",
+      "1_year",
+      "5_years" };
+
+  private long[] graphIntervals = new long[] {
+      7L * 24L * 60L * 60L * 1000L,
+      31L * 24L * 60L * 60L * 1000L,
+      92L * 24L * 60L * 60L * 1000L,
+      366L * 24L * 60L * 60L * 1000L,
+      5L * 366L * 24L * 60L * 60L * 1000L };
+
+  private long[] dataPointIntervals = new long[] {
+      24L * 60L * 60L * 1000L,
+      24L * 60L * 60L * 1000L,
+      24L * 60L * 60L * 1000L,
+      2L * 24L * 60L * 60L * 1000L,
+      10L * 24L * 60L * 60L * 1000L };
+
+  private String formatHistoryString(String hashedFingerprint,
+      SortedSet<ResponseHistory> history) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("{\"fingerprint\":\"" + hashedFingerprint + "\"");
+    sb.append(",\n\"average_clients\":{");
+    int graphIntervalsWritten = 0;
+    for (int graphIntervalIndex = 0; graphIntervalIndex <
+        this.graphIntervals.length; graphIntervalIndex++) {
+      String timeline = this.formatTimeline(graphIntervalIndex, history);
+      if (timeline != null) {
+        sb.append((graphIntervalsWritten++ > 0 ? "," : "") + "\n"
+            + timeline);
+      }
+    }
+    sb.append("}");
+    sb.append("\n}\n");
+    return sb.toString();
+  }
+
+  private String formatTimeline(int graphIntervalIndex,
+      SortedSet<ResponseHistory> history) {
+    String graphName = this.graphNames[graphIntervalIndex];
+    long graphInterval = this.graphIntervals[graphIntervalIndex];
+    long dataPointInterval =
+        this.dataPointIntervals[graphIntervalIndex];
+    List<Double> dataPoints = new ArrayList<Double>();
+    long intervalStartMillis = ((this.now - graphInterval)
+        / dataPointInterval) * dataPointInterval;
+    long millis = 0L;
+    double responses = 0.0, totalResponses = 0.0;
+    SortedMap<String, Double>
+        totalResponsesByCountry = new TreeMap<String, Double>(),
+        totalResponsesByTransport = new TreeMap<String, Double>(),
+        totalResponsesByVersion = new TreeMap<String, Double>();
+    for (ResponseHistory hist : history) {
+      if (hist.endMillis < intervalStartMillis) {
+        continue;
+      }
+      while ((intervalStartMillis / dataPointInterval) !=
+          (hist.endMillis / dataPointInterval)) {
+        dataPoints.add(millis * 2L < dataPointInterval
+            ? -1.0 : responses * ((double) ONE_DAY_MILLIS)
+            / (((double) millis) * 10.0));
+        responses = 0.0;
+        millis = 0L;
+        intervalStartMillis += dataPointInterval;
+      }
+      responses += hist.totalResponses;
+      totalResponses += hist.totalResponses;
+      for (Map.Entry<String, Double> e :
+          hist.responsesByCountry.entrySet()) {
+        if (!totalResponsesByCountry.containsKey(e.getKey())) {
+          totalResponsesByCountry.put(e.getKey(), 0.0);
+        }
+        totalResponsesByCountry.put(e.getKey(), e.getValue()
+            + totalResponsesByCountry.get(e.getKey()));
+      }
+      for (Map.Entry<String, Double> e :
+          hist.responsesByTransport.entrySet()) {
+        if (!totalResponsesByTransport.containsKey(e.getKey())) {
+          totalResponsesByTransport.put(e.getKey(), 0.0);
+        }
+        totalResponsesByTransport.put(e.getKey(), e.getValue()
+            + totalResponsesByTransport.get(e.getKey()));
+      }
+      for (Map.Entry<String, Double> e :
+          hist.responsesByVersion.entrySet()) {
+        if (!totalResponsesByVersion.containsKey(e.getKey())) {
+          totalResponsesByVersion.put(e.getKey(), 0.0);
+        }
+        totalResponsesByVersion.put(e.getKey(), e.getValue()
+            + totalResponsesByVersion.get(e.getKey()));
+      }
+      millis += (hist.endMillis - hist.startMillis);
+    }
+    dataPoints.add(millis * 2L < dataPointInterval
+        ? -1.0 : responses * ((double) ONE_DAY_MILLIS)
+        / (((double) millis) * 10.0));
+    double maxValue = 0.0;
+    int firstNonNullIndex = -1, lastNonNullIndex = -1;
+    for (int dataPointIndex = 0; dataPointIndex < dataPoints.size();
+        dataPointIndex++) {
+      double dataPoint = dataPoints.get(dataPointIndex);
+      if (dataPoint >= 0.0) {
+        if (firstNonNullIndex < 0) {
+          firstNonNullIndex = dataPointIndex;
+        }
+        lastNonNullIndex = dataPointIndex;
+        if (dataPoint > maxValue) {
+          maxValue = dataPoint;
+        }
+      }
+    }
+    if (firstNonNullIndex < 0) {
+      return null;
+    }
+    long firstDataPointMillis = (((this.now - graphInterval)
+        / dataPointInterval) + firstNonNullIndex) * dataPointInterval
+        + dataPointInterval / 2L;
+    if (graphIntervalIndex > 0 && firstDataPointMillis >=
+        this.now - graphIntervals[graphIntervalIndex - 1]) {
+      /* Skip clients history object, because it doesn't contain
+       * anything new that wasn't already contained in the last
+       * clients history object(s). */
+      return null;
+    }
+    long lastDataPointMillis = firstDataPointMillis
+        + (lastNonNullIndex - firstNonNullIndex) * dataPointInterval;
+    double factor = ((double) maxValue) / 999.0;
+    int count = lastNonNullIndex - firstNonNullIndex + 1;
+    StringBuilder sb = new StringBuilder();
+    SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
+        "yyyy-MM-dd HH:mm:ss");
+    dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    sb.append("\"" + graphName + "\":{"
+        + "\"first\":\"" + dateTimeFormat.format(firstDataPointMillis)
+        + "\",\"last\":\"" + dateTimeFormat.format(lastDataPointMillis)
+        + "\",\"interval\":" + String.valueOf(dataPointInterval / 1000L)
+        + ",\"factor\":" + String.format(Locale.US, "%.9f", factor)
+        + ",\"count\":" + String.valueOf(count) + ",\"values\":[");
+    int dataPointsWritten = 0, previousNonNullIndex = -2;
+    boolean foundTwoAdjacentDataPoints = false;
+    for (int dataPointIndex = firstNonNullIndex; dataPointIndex <=
+        lastNonNullIndex; dataPointIndex++) {
+      double dataPoint = dataPoints.get(dataPointIndex);
+      if (dataPoint >= 0.0) {
+        if (dataPointIndex - previousNonNullIndex == 1) {
+          foundTwoAdjacentDataPoints = true;
+        }
+        previousNonNullIndex = dataPointIndex;
+      }
+      sb.append((dataPointsWritten++ > 0 ? "," : "")
+          + (dataPoint < 0.0 ? "null" :
+          String.valueOf((long) ((dataPoint * 999.0) / maxValue))));
+    }
+    sb.append("]");
+    if (!totalResponsesByCountry.isEmpty()) {
+      sb.append(",\"countries\":{");
+      int written = 0;
+      for (Map.Entry<String, Double> e :
+          totalResponsesByCountry.entrySet()) {
+        if (e.getValue() > totalResponses / 100.0) {
+          sb.append((written++ > 0 ? "," : "") + "\"" + e.getKey()
+              + "\":" + String.format(Locale.US, "%.4f",
+              e.getValue() / totalResponses));
+        }
+      }
+      sb.append("}");
+    }
+    if (!totalResponsesByTransport.isEmpty()) {
+      sb.append(",\"transports\":{");
+      int written = 0;
+      for (Map.Entry<String, Double> e :
+          totalResponsesByTransport.entrySet()) {
+        if (e.getValue() > totalResponses / 100.0) {
+          sb.append((written++ > 0 ? "," : "") + "\"" + e.getKey()
+              + "\":" + String.format(Locale.US, "%.4f",
+              e.getValue() / totalResponses));
+        }
+      }
+      sb.append("}");
+    }
+    if (!totalResponsesByVersion.isEmpty()) {
+      sb.append(",\"versions\":{");
+      int written = 0;
+      for (Map.Entry<String, Double> e :
+          totalResponsesByVersion.entrySet()) {
+        if (e.getValue() > totalResponses / 100.0) {
+          sb.append((written++ > 0 ? "," : "") + "\"" + e.getKey()
+              + "\":" + String.format(Locale.US, "%.4f",
+              e.getValue() / totalResponses));
+        }
+      }
+      sb.append("}");
+    }
+    sb.append("}");
+    if (foundTwoAdjacentDataPoints) {
+      return sb.toString();
+    } else {
+      return null;
+    }
+  }
+
+  public String getStatsString() {
+    int newIntervals = 0;
+    for (SortedSet<ResponseHistory> hist : this.newResponses.values()) {
+      newIntervals += hist.size();
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append("    "
+        + Logger.formatDecimalNumber(newIntervals / 2)
+        + " client statistics processed from extra-info descriptors\n");
+    sb.append("    "
+        + Logger.formatDecimalNumber(this.newResponses.size())
+        + " client status files updated\n");
+    sb.append("    "
+        + Logger.formatDecimalNumber(this.newResponses.size())
+        + " client document files updated\n");
+    return sb.toString();
+  }
+}
+
diff --git a/src/org/torproject/onionoo/ClientsDocument.java b/src/org/torproject/onionoo/ClientsDocument.java
new file mode 100644
index 0000000..c8679fc
--- /dev/null
+++ b/src/org/torproject/onionoo/ClientsDocument.java
@@ -0,0 +1,8 @@
+/* Copyright 2014 The Tor Project
+ * See LICENSE for licensing information */
+package org.torproject.onionoo;
+
+class ClientsDocument extends Document {
+
+}
+
diff --git a/src/org/torproject/onionoo/ClientsStatus.java b/src/org/torproject/onionoo/ClientsStatus.java
new file mode 100644
index 0000000..65fb341
--- /dev/null
+++ b/src/org/torproject/onionoo/ClientsStatus.java
@@ -0,0 +1,5 @@
+package org.torproject.onionoo;
+
+class ClientsStatus extends Document {
+}
+
diff --git a/src/org/torproject/onionoo/DocumentStore.java b/src/org/torproject/onionoo/DocumentStore.java
index b78f5ff..1092e25 100644
--- a/src/org/torproject/onionoo/DocumentStore.java
+++ b/src/org/torproject/onionoo/DocumentStore.java
@@ -121,6 +121,9 @@ public class DocumentStore {
     } else if (documentType.equals(WeightsStatus.class)) {
       directory = this.statusDir;
       subdirectory = "weights";
+    } else if (documentType.equals(ClientsStatus.class)) {
+      directory = this.statusDir;
+      subdirectory = "clients";
     } else if (documentType.equals(DetailsDocument.class)) {
       directory = this.outDir;
       subdirectory = "details";
@@ -130,6 +133,9 @@ public class DocumentStore {
     } else if (documentType.equals(WeightsDocument.class)) {
       directory = this.outDir;
       subdirectory = "weights";
+    } else if (documentType.equals(ClientsDocument.class)) {
+      directory = this.outDir;
+      subdirectory = "clients";
     }
     if (directory != null && subdirectory != null) {
       Stack<File> files = new Stack<File>();
@@ -179,7 +185,8 @@ public class DocumentStore {
       documentString = document.documentString;
     } else if (document instanceof DetailsDocument ||
           document instanceof BandwidthDocument ||
-          document instanceof WeightsDocument) {
+          document instanceof WeightsDocument ||
+          document instanceof ClientsDocument) {
       Gson gson = new Gson();
       documentString = gson.toJson(this);
     } else {
@@ -267,7 +274,8 @@ public class DocumentStore {
           documentString);
     } else if (documentType.equals(DetailsDocument.class) ||
         documentType.equals(BandwidthDocument.class) ||
-        documentType.equals(WeightsDocument.class)) {
+        documentType.equals(WeightsDocument.class) ||
+        documentType.equals(ClientsDocument.class)) {
       return this.retrieveParsedDocumentFile(documentType,
           documentString);
     } else {
@@ -368,6 +376,11 @@ public class DocumentStore {
       fileName = String.format("weights/%s/%s/%s",
           fingerprint.substring(0, 1), fingerprint.substring(1, 2),
           fingerprint);
+    } else if (documentType.equals(ClientsStatus.class)) {
+      directory = this.statusDir;
+      fileName = String.format("clients/%s/%s/%s",
+          fingerprint.substring(0, 1), fingerprint.substring(1, 2),
+          fingerprint);
     } else if (documentType.equals(UpdateStatus.class)) {
       directory = this.outDir;
       fileName = "update";
@@ -385,6 +398,9 @@ public class DocumentStore {
     } else if (documentType.equals(WeightsDocument.class)) {
       directory = this.outDir;
       fileName = String.format("weights/%s", fingerprint);
+    } else if (documentType.equals(ClientsDocument.class)) {
+      directory = this.outDir;
+      fileName = String.format("clients/%s", fingerprint);
     }
     if (directory != null && fileName != null) {
       documentFile = new File(directory, fileName);
diff --git a/src/org/torproject/onionoo/Main.java b/src/org/torproject/onionoo/Main.java
index a7e9461..87e7c9a 100644
--- a/src/org/torproject/onionoo/Main.java
+++ b/src/org/torproject/onionoo/Main.java
@@ -36,7 +36,9 @@ public class Main {
     Logger.printStatusTime("Initialized bandwidth data writer");
     WeightsDataWriter wdw = new WeightsDataWriter(dso, ds, t);
     Logger.printStatusTime("Initialized weights data writer");
-    DataWriter[] dws = new DataWriter[] { ndw, bdw, wdw };
+    ClientsDataWriter cdw = new ClientsDataWriter(dso, ds, t);
+    Logger.printStatusTime("Initialized clients data writer");
+    DataWriter[] dws = new DataWriter[] { ndw, bdw, wdw, cdw };
 
     Logger.printStatus("Reading descriptors.");
     dso.readRelayNetworkConsensuses();
diff --git a/src/org/torproject/onionoo/ResourceServlet.java b/src/org/torproject/onionoo/ResourceServlet.java
index 165bc01..d04828f 100644
--- a/src/org/torproject/onionoo/ResourceServlet.java
+++ b/src/org/torproject/onionoo/ResourceServlet.java
@@ -128,6 +128,8 @@ public class ResourceServlet extends HttpServlet {
       resourceType = "bandwidth";
     } else if (uri.startsWith("/weights")) {
       resourceType = "weights";
+    } else if (uri.startsWith("/clients")) {
+      resourceType = "clients";
     } else {
       response.sendError(HttpServletResponse.SC_BAD_REQUEST);
       return;
diff --git a/src/org/torproject/onionoo/ResponseBuilder.java b/src/org/torproject/onionoo/ResponseBuilder.java
index 502b928..b7231db 100644
--- a/src/org/torproject/onionoo/ResponseBuilder.java
+++ b/src/org/torproject/onionoo/ResponseBuilder.java
@@ -348,6 +348,7 @@ public class ResponseBuilder {
         relayFingerprintSummaryLines);
     Map<String, String> filteredBridges = new HashMap<String, String>(
         bridgeFingerprintSummaryLines);
+    filterByResourceType(filteredRelays, filteredBridges);
     filterByType(filteredRelays, filteredBridges);
     filterByRunning(filteredRelays, filteredBridges);
     filterBySearchTerms(filteredRelays, filteredBridges);
@@ -371,6 +372,16 @@ public class ResponseBuilder {
     writeBridges(orderedBridges, pw);
   }
 
+  private void filterByResourceType(Map<String, String> filteredRelays,
+      Map<String, String> filteredBridges) {
+    if (this.resourceType.equals("clients")) {
+      filteredRelays.clear();
+    }
+    if (this.resourceType.equals("weights")) {
+      filteredBridges.clear();
+    }
+  }
+
   private void filterByType(Map<String, String> filteredRelays,
       Map<String, String> filteredBridges) {
     if (this.type == null) {
@@ -580,7 +591,7 @@ public class ResponseBuilder {
         filteredRelays.remove(fingerprint);
       }
     }
-    if (!this.bridgesByFlag.containsKey(flag)) {
+    if (!bridgesByFlag.containsKey(flag)) {
       filteredBridges.clear();
     } else {
       Set<String> bridgesWithFlag = bridgesByFlag.get(flag);
@@ -763,6 +774,8 @@ public class ResponseBuilder {
       return this.writeBandwidthLines(summaryLine);
     } else if (this.resourceType.equals("weights")) {
       return this.writeWeightsLines(summaryLine);
+    } else if (this.resourceType.equals("clients")) {
+      return this.writeClientsLines(summaryLine);
     } else {
       return "";
     }
@@ -892,4 +905,27 @@ public class ResponseBuilder {
       return "";
     }
   }
+
+  private String writeClientsLines(String summaryLine) {
+    String fingerprint = null;
+    if (summaryLine.contains("\"h\":\"")) {
+      fingerprint = summaryLine.substring(summaryLine.indexOf(
+         "\"h\":\"") + "\"h\":\"".length());
+    } else {
+      return "";
+    }
+    fingerprint = fingerprint.substring(0, 40);
+    ClientsDocument clientsDocument = documentStore.retrieve(
+        ClientsDocument.class, false, fingerprint);
+    if (clientsDocument != null &&
+        clientsDocument.documentString != null) {
+      String clientsLines = clientsDocument.documentString;
+      clientsLines = clientsLines.substring(0, clientsLines.length() - 1);
+      return clientsLines;
+    } else {
+      // TODO We should probably log that we didn't find a clients
+      // document that we expected to exist.
+      return "";
+    }
+  }
 }
diff --git a/web/index.html b/web/index.html
index 88745af..c07ca86 100644
--- a/web/index.html
+++ b/web/index.html
@@ -57,6 +57,7 @@ h3 .request-response { padding: 0 !important; }
         <li><a href="#details">Details documents</a></li>
         <li><a href="#bandwidth">Bandwidth documents</a></li>
         <li><a href="#weights">Weights documents</a></li>
+        <li><a href="#clients">Clients documents</a></li>
     </ul>
 
 </div>
@@ -205,6 +206,13 @@ document</a></span>
 document</a></span>
 </li>
 
+<li class="api-request">
+<span class="request-type">GET</span>
+<span class="request-url">https://onionoo.torproject.org/clients</span>
+<span class="request-response">returns a <a href="#clients">clients
+document</a></span>
+</li>
+
 </ul>
 
 <h4>Parameters</h4>
@@ -1719,6 +1727,234 @@ Only included for compatibility reasons with the other document types.
 
 </div> <!-- box -->
 
+<div class="box">
+<a name="clients"></a>
+<h3>Clients documents <a href="#clients">#</a>
+<span class="request-response">
+<a href="clients?limit=4">example request</a>
+</span>
+</h3>
+
+<p>
+<font color="blue">Added on March 10, 2014.</font>
+Clients documents contain estimates of the average number of clients
+connecting to a bridge every day.
+There are no clients documents available for relays, just for bridges.
+Clients documents contain different time intervals and are available for
+all bridges that have been running in the past week.
+Clients documents contain the following fields:
+</p>
+
+<ul class="properties">
+
+<li>
+<b>relays_published</b>
+<code class="typeof">string</code>
+<span class="required-true">required</span>
+<p>
+UTC timestamp (YYYY-MM-DD hh:mm:ss) when
+the last known relay network status consensus started being valid.
+Only included for compatibility reasons with the other document types.
+</p>
+</li>
+
+<li>
+<b>relays</b>
+<code class="typeof">array of objects</code>
+<span class="required-true">required</span>
+<p>
+Empty array of objects that would represent relay clients documents.
+Only included for compatibility reasons with the other document types.
+</p>
+</li>
+
+<li>
+<b>bridges_published</b>
+<code class="typeof">string</code>
+<span class="required-true">required</span>
+<p>
+UTC timestamp (YYYY-MM-DD hh:mm:ss) when
+the last known bridge network status was published.
+Indicates how recent the bridge clients documents in this document are.
+</p>
+</li>
+
+<li>
+<b>bridges</b>
+<code class="typeof">array of objects</code>
+<span class="required-true">required</span>
+<p>
+Array of objects representing bridge clients documents.
+Each array object contains the following key-value pairs:
+</p>
+
+<ul class="properties">
+
+<li>
+<b>fingerprint</b>
+<code class="typeof">string</code>
+<span class="required-true">required</span>
+<p>
+SHA-1 hash of the bridge fingerprint consisting
+of 40 upper-case hexadecimal characters.
+</p>
+</li>
+
+<li>
+<b>average_clients</b>
+<code class="typeof">object</code>
+<span class="required-false">optional</span>
+<p>
+History object containing the average number of clients connecting to
+this bridge.
+Keys are string representation of the time period covered by the clients
+history object.
+Keys are fixed strings <strong>"1_week"</strong>,
+<strong>"1_month"</strong>, <strong>"3_months"</strong>,
+<strong>"1_year"</strong>, and <strong>"5_years"</strong>.
+Keys refer to the last known clients history of a bridge, not to the time
+when the clients document was published.
+A clients history object is only contained if the time period it covers
+is not already contained in another clients history object with shorter
+time period and higher data resolution.
+Each clients history object contains the following key-value pairs:
+</p>
+
+<ul class="properties">
+
+<li>
+<b>first</b>
+<code class="typeof">string</code>
+<span class="required-true">required</span>
+<p>
+UTC timestamp (YYYY-MM-DD hh:mm:ss) of the first data
+data point in the clients history.
+</p>
+</li>
+
+<li>
+<b>last</b>
+<code class="typeof">string</code>
+<span class="required-true">required</span>
+<p>
+UTC timestamp (YYYY-MM-DD hh:mm:ss) of the last data
+data point in the clients history.
+</p>
+</li>
+
+<li>
+<b>interval</b>
+<code class="typeof">number</code>
+<span class="required-true">required</span>
+<p>
+Time interval between two data points in seconds.
+</p>
+</li>
+
+<li>
+<b>factor</b>
+<code class="typeof">number</code>
+<span class="required-true">required</span>
+<p>
+Factor by which subsequent clients values need to
+be multiplied to get the average number of clients.
+The idea is that contained clients values are normalized to a range from 0
+to 999 to reduce document size while still providing sufficient detail for
+both heavily used and mostly unused bridges.
+</p>
+</li>
+
+<li>
+<b>count</b>
+<code class="typeof">number</code>
+<span class="required-false">optional</span>
+<p>
+Number of provided data points, included mostly for
+debugging purposes.
+Can also be derived from the number of elements in the subsequent array.
+</p>
+</li>
+
+<li>
+<b>values</b>
+<code class="typeof">array of numbers</code>
+<span class="required-true">required</span>
+<p>
+Array of normalized clients values.
+May contain null values if the bridge did not report client statistics for
+at least 50% of a given time period.
+Contains at least two subsequent non-null values to enable drawing of line
+graphs.
+</p>
+</li>
+
+<li>
+<b>countries</b>
+<code class="typeof">object</code>
+<span class="required-false">optional</span>
+<p>
+Object containing fractions of clients by country in the considered time
+period.
+Keys are two-letter lower-case country codes as found in a GeoIP database.
+Values are numbers between 0 and 1 standing for the fraction of clients by
+country.
+A country is only included if at least 1% of clients came from this
+country.
+Omitted if the bridge did not report client statistics by country.
+<font color="red"><strong>BETA:</strong> This field breaks compatibility
+with the history objects contained in other documents pretty badly.
+It might be removed in the future without notice.</font>
+</p>
+</li>
+
+<li>
+<b>transports</b>
+<code class="typeof">object</code>
+<span class="required-false">optional</span>
+<p>
+Object containing fractions of clients by transport in the considered time
+period.
+Keys are transport names, or <strong>"<OR>"</strong> for the default
+onion-routing transport protocol.
+Values are numbers between 0 and 1 standing for the fraction of clients by
+transport.
+Omitted if the bridge did not report client statistics by transport.
+<font color="red"><strong>BETA:</strong> This field breaks compatibility
+with the history objects contained in other documents pretty badly.
+It might be removed in the future without notice.</font>
+</p>
+</li>
+
+<li>
+<b>versions</b>
+<code class="typeof">object</code>
+<span class="required-false">optional</span>
+<p>
+Object containing fractions of clients by IP version in the considered
+time period.
+Keys are either <strong>"v4"</strong> for IPv4 or <strong>"v6"</strong>
+for IPv6.
+Values are numbers between 0 and 1 standing for the fraction of clients by
+version.
+Omitted if the bridge did not report client statistics by IP version.
+<font color="red"><strong>BETA:</strong> This field breaks compatibility
+with the history objects contained in other documents pretty badly.
+It might be removed in the future without notice.</font>
+</p>
+</li>
+
+</ul>
+
+</li>
+
+</ul>
+
+</li>
+
+</ul>
+
+</div> <!-- box -->
+
 </body>
 </html>
 






More information about the tor-commits mailing list