tor-commits
Threads by month
- ----- 2025 -----
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
February 2018
- 19 participants
- 1579 discussions

[collector/release] Add webstats module with sync and local import functionality.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 97e577ae73ec631ac5d7448cb9f525594baa0c8a
Author: iwakeh <iwakeh(a)torproject.org>
Date: Mon Oct 9 12:23:53 2017 +0000
Add webstats module with sync and local import functionality.
Implements task-22428.
---
CHANGELOG.md | 6 +-
build.xml | 2 +-
src/main/java/org/torproject/collector/Main.java | 2 +
.../torproject/collector/conf/Configuration.java | 3 +-
.../java/org/torproject/collector/conf/Key.java | 9 +-
.../collector/persist/DescriptorPersistence.java | 2 +
.../persist/WebServerAccessLogPersistence.java | 73 ++++++++
.../torproject/collector/sync/SyncPersistence.java | 7 +
.../torproject/collector/webstats/LogFileMap.java | 115 ++++++++++++
.../torproject/collector/webstats/LogMetadata.java | 87 +++++++++
.../collector/webstats/SanitizeWeblogs.java | 198 +++++++++++++++++++++
src/main/resources/collector.properties | 20 ++-
.../collector/conf/ConfigurationTest.java | 2 +-
.../collector/cron/CollecTorMainTest.java | 1 +
.../collector/sync/SyncPersistenceTest.java | 68 +++----
.../collector/webstats/LogFileMapTest.java | 33 ++++
.../collector/webstats/LogMetadataTest.java | 82 +++++++++
...eotrichon.torproject.org_access.log_20151007.xz | Bin 0 -> 4056 bytes
...meronense.torproject.org_access.log_20170531.gz | Bin 0 -> 388 bytes
19 files changed, 671 insertions(+), 39 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2f4cd21..a0b5d1f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
-# Changes in version 1.?.? - 201?-??-??
+# Changes in version 1.5.0 - 2018-01-31
+
+ * Major changes
+ - Update to metrics-lib 2.2.0.
+ - Add new module for processing and sanitizing Tor web server logs.
* Minor changes
- Exclude lastModifiedMillis in index.json.
diff --git a/build.xml b/build.xml
index f004f29..48f6e33 100644
--- a/build.xml
+++ b/build.xml
@@ -11,7 +11,7 @@
<property name="release.version" value="1.4.1-dev" />
<property name="project-main-class" value="org.torproject.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.1.1" />
+ <property name="metricslibversion" value="2.2.0" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java
index 50cc8be..70cdbfa 100644
--- a/src/main/java/org/torproject/collector/Main.java
+++ b/src/main/java/org/torproject/collector/Main.java
@@ -14,6 +14,7 @@ import org.torproject.collector.exitlists.ExitListDownloader;
import org.torproject.collector.index.CreateIndexJson;
import org.torproject.collector.onionperf.OnionPerfDownloader;
import org.torproject.collector.relaydescs.ArchiveWriter;
+import org.torproject.collector.webstats.SanitizeWeblogs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,6 +52,7 @@ public class Main {
collecTorMains.put(Key.UpdateindexActivated, CreateIndexJson.class);
collecTorMains.put(Key.RelaydescsActivated, ArchiveWriter.class);
collecTorMains.put(Key.OnionPerfActivated, OnionPerfDownloader.class);
+ collecTorMains.put(Key.WebstatsActivated, SanitizeWeblogs.class);
}
private static Configuration conf = new Configuration();
diff --git a/src/main/java/org/torproject/collector/conf/Configuration.java b/src/main/java/org/torproject/collector/conf/Configuration.java
index 57f9731..72bd5fc 100644
--- a/src/main/java/org/torproject/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/collector/conf/Configuration.java
@@ -92,7 +92,8 @@ public class Configuration extends Observable implements Cloneable {
|| this.getBool(Key.BridgedescsActivated)
|| this.getBool(Key.ExitlistsActivated)
|| this.getBool(Key.UpdateindexActivated)
- || this.getBool(Key.OnionPerfActivated))) {
+ || this.getBool(Key.OnionPerfActivated)
+ || this.getBool(Key.WebstatsActivated))) {
throw new ConfigurationException("Nothing is activated!\n"
+ "Please edit collector.properties. Exiting.");
}
diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java
index e0a20a7..6454009 100644
--- a/src/main/java/org/torproject/collector/conf/Key.java
+++ b/src/main/java/org/torproject/collector/conf/Key.java
@@ -28,6 +28,7 @@ public enum Key {
BridgeSources(SourceType[].class),
ExitlistSources(SourceType[].class),
OnionPerfSources(SourceType[].class),
+ WebstatsSources(SourceType[].class),
RelayCacheOrigins(String[].class),
RelayLocalOrigins(Path.class),
RelaySyncOrigins(URL[].class),
@@ -35,6 +36,8 @@ public enum Key {
BridgeLocalOrigins(Path.class),
ExitlistSyncOrigins(URL[].class),
OnionPerfSyncOrigins(URL[].class),
+ WebstatsSyncOrigins(URL[].class),
+ WebstatsLocalOrigins(Path.class),
BridgedescsActivated(Boolean.class),
BridgedescsOffsetMinutes(Integer.class),
BridgedescsPeriodMinutes(Integer.class),
@@ -58,7 +61,11 @@ public enum Key {
KeepDirectoryArchiveImportHistory(Boolean.class),
ReplaceIpAddressesWithHashes(Boolean.class),
BridgeDescriptorMappingsLimit(Integer.class),
- OnionPerfHosts(URL[].class);
+ OnionPerfHosts(URL[].class),
+ WebstatsActivated(Boolean.class),
+ WebstatsLimits(Boolean.class),
+ WebstatsOffsetMinutes(Integer.class),
+ WebstatsPeriodMinutes(Integer.class);
private Class clazz;
private static Set<String> keys;
diff --git a/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
index 3e464fe..01c9fad 100644
--- a/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
@@ -19,6 +19,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> {
protected static final String BRIDGEDESCS = "bridge-descriptors";
protected static final String DASH = "-";
+ protected static final String DOT = ".";
protected static final String MICRODESC = "microdesc";
protected static final String MICRODESCS = "microdescs";
protected static final String RELAYDESCS = "relay-descriptors";
@@ -26,6 +27,7 @@ public abstract class DescriptorPersistence<T extends Descriptor> {
protected static final String EXTRA_INFOS = "extra-infos";
protected static final String SERVERDESC = "server-descriptor";
protected static final String SERVERDESCS = "server-descriptors";
+ protected static final String WEBSTATS = "webstats";
protected final T desc;
protected final byte[] annotation;
diff --git a/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
new file mode 100644
index 0000000..792d3a9
--- /dev/null
+++ b/src/main/java/org/torproject/collector/persist/WebServerAccessLogPersistence.java
@@ -0,0 +1,73 @@
+/* Copyright 2016--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.persist;
+
+import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.descriptor.internal.FileType;
+import org.torproject.descriptor.log.InternalLogDescriptor;
+import org.torproject.descriptor.log.InternalWebServerAccessLog;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.time.format.DateTimeFormatter;
+
+public class WebServerAccessLogPersistence
+ extends DescriptorPersistence<WebServerAccessLog> {
+
+ public static final String SEP = InternalWebServerAccessLog.SEP;
+ public static final FileType COMPRESSION = FileType.XZ;
+ private static final Logger log
+ = LoggerFactory.getLogger(WebServerAccessLogPersistence.class);
+
+ private DateTimeFormatter yearPattern = DateTimeFormatter.ofPattern("yyyy");
+ private DateTimeFormatter monthPattern = DateTimeFormatter.ofPattern("MM");
+ private DateTimeFormatter dayPattern = DateTimeFormatter.ofPattern("dd");
+
+ /** Prepare storing the given descriptor. */
+ public WebServerAccessLogPersistence(WebServerAccessLog desc) {
+ super(desc, new byte[0]);
+ byte[] compressedBytes = null;
+ try { // The descriptor bytes have to be stored compressed.
+ compressedBytes = COMPRESSION.compress(desc.getRawDescriptorBytes());
+ ((InternalLogDescriptor)desc).setRawDescriptorBytes(compressedBytes);
+ } catch (Exception ex) {
+ log.warn("Cannot compress ’{}’. Storing uncompressed.", ex);
+ }
+ calculatePaths();
+ }
+
+ private void calculatePaths() {
+ String name =
+ this.desc.getVirtualHost() + SEP + this.desc.getPhysicalHost()
+ + SEP + "access.log"
+ + SEP + this.desc.getLogDate().format(DateTimeFormatter.BASIC_ISO_DATE)
+ + DOT + COMPRESSION.name().toLowerCase();
+ this.recentPath = Paths.get(WEBSTATS, name).toString();
+ this.storagePath = Paths.get(
+ WEBSTATS,
+ this.desc.getVirtualHost(),
+ this.desc.getLogDate().format(yearPattern), // year
+ this.desc.getLogDate().format(monthPattern), // month
+ this.desc.getLogDate().format(dayPattern), // day
+ name).toString();
+ }
+
+ /** Logs are not appended. */
+ @Override
+ public boolean storeAll(String recentRoot, String outRoot) {
+ return storeAll(recentRoot, outRoot, StandardOpenOption.CREATE_NEW,
+ StandardOpenOption.CREATE_NEW);
+ }
+
+ /** Logs are not appended. */
+ @Override
+ public boolean storeRecent(String recentRoot) {
+ return storeRecent(recentRoot, StandardOpenOption.CREATE_NEW);
+ }
+
+}
+
diff --git a/src/main/java/org/torproject/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/collector/sync/SyncPersistence.java
index e230fca..142be7a 100644
--- a/src/main/java/org/torproject/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/collector/sync/SyncPersistence.java
@@ -18,6 +18,7 @@ import org.torproject.collector.persist.PersistenceUtils;
import org.torproject.collector.persist.ServerDescriptorPersistence;
import org.torproject.collector.persist.StatusPersistence;
import org.torproject.collector.persist.VotePersistence;
+import org.torproject.collector.persist.WebServerAccessLogPersistence;
import org.torproject.descriptor.BridgeExtraInfoDescriptor;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgeServerDescriptor;
@@ -28,6 +29,7 @@ import org.torproject.descriptor.RelayNetworkStatusConsensus;
import org.torproject.descriptor.RelayNetworkStatusVote;
import org.torproject.descriptor.RelayServerDescriptor;
import org.torproject.descriptor.TorperfResult;
+import org.torproject.descriptor.WebServerAccessLog;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -132,6 +134,10 @@ public class SyncPersistence {
case "TorperfResult":
descPersist = new OnionPerfPersistence((TorperfResult) desc);
break;
+ case "WebServerAccessLog":
+ descPersist = new WebServerAccessLogPersistence(
+ (WebServerAccessLog) desc);
+ break;
default:
log.trace("Invalid descriptor type {} for sync-merge.",
clazz.getName());
@@ -149,3 +155,4 @@ public class SyncPersistence {
}
}
}
+
diff --git a/src/main/java/org/torproject/collector/webstats/LogFileMap.java b/src/main/java/org/torproject/collector/webstats/LogFileMap.java
new file mode 100644
index 0000000..c1a6802
--- /dev/null
+++ b/src/main/java/org/torproject/collector/webstats/LogFileMap.java
@@ -0,0 +1,115 @@
+/* Copyright 2017--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.webstats;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.LocalDate;
+import java.util.Optional;
+import java.util.TreeMap;
+import java.util.stream.Stream;
+
+/**
+ * Processes the given path and stores metadata for log files.
+ */
+public class LogFileMap
+ extends TreeMap<String, TreeMap<String, TreeMap<LocalDate, LogMetadata>>> {
+
+ private static final Logger log = LoggerFactory.getLogger(LogFileMap.class);
+
+ /**
+ * The map to keep track of the logfiles by virtual host,
+ * physical host, and date.
+ */
+ public LogFileMap(Path startDir) {
+ collectFiles(this, startDir);
+ }
+
+ private void collectFiles(final LogFileMap logFileMap, Path startDir) {
+ try {
+ Files.walkFileTree(startDir, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path path, BasicFileAttributes att)
+ throws IOException {
+ Optional<LogMetadata> optionalMetadata = LogMetadata.create(path);
+ if (optionalMetadata.isPresent()) {
+ logFileMap.add(optionalMetadata.get());
+ }
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFileFailed(Path path, IOException ex)
+ throws IOException {
+ return logIfError(path, ex);
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path path, IOException ex)
+ throws IOException {
+ return logIfError(path, ex);
+ }
+
+ private FileVisitResult logIfError(Path path, IOException ex) {
+ if (null != ex) {
+ log.warn("Cannot process '{}'.", path, ex);
+ }
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ } catch (IOException ex) {
+ log.error("Cannot read directory '{}'.", startDir, ex);
+ }
+ }
+
+ /** Add log metadata to the map structure. */
+ public void add(LogMetadata metadata) {
+ TreeMap<String, TreeMap<LocalDate, LogMetadata>> virtualHosts
+ = this.get(metadata.virtualHost);
+ if (null == virtualHosts) {
+ virtualHosts = new TreeMap<String, TreeMap<LocalDate, LogMetadata>>();
+ this.put(metadata.virtualHost, virtualHosts);
+ }
+ TreeMap<LocalDate, LogMetadata> physicalHosts
+ = virtualHosts.get(metadata.physicalHost);
+ if (null == physicalHosts) {
+ physicalHosts = new TreeMap<LocalDate, LogMetadata>();
+ virtualHosts.put(metadata.physicalHost, physicalHosts);
+ }
+ physicalHosts.put(metadata.date, metadata);
+ }
+
+ /**
+ * Takes the given metadata and returns the LogMetadata for the entry
+ * of the next day.
+ */
+ public Optional<LogMetadata> nextDayLogFor(LogMetadata metadata) {
+ TreeMap<String, TreeMap<LocalDate, LogMetadata>> virtualHosts
+ = this.get(metadata.virtualHost);
+ if (null == virtualHosts) {
+ return Optional.empty();
+ }
+ TreeMap<LocalDate, LogMetadata> physicalHosts
+ = virtualHosts.get(metadata.physicalHost);
+ if (null == physicalHosts) {
+ return Optional.empty();
+ }
+ return Optional.ofNullable(physicalHosts.get(metadata.date.plusDays(1)));
+ }
+
+ /** Returns a stream of all contained log metadata. */
+ public Stream<LogMetadata> metadataStream() {
+ return this.values().stream()
+ .flatMap((virtualHosts) -> virtualHosts.values().stream())
+ .flatMap((physicalHosts) -> physicalHosts.values().stream());
+ }
+}
+
diff --git a/src/main/java/org/torproject/collector/webstats/LogMetadata.java b/src/main/java/org/torproject/collector/webstats/LogMetadata.java
new file mode 100644
index 0000000..ee0db1a
--- /dev/null
+++ b/src/main/java/org/torproject/collector/webstats/LogMetadata.java
@@ -0,0 +1,87 @@
+/* Copyright 2017--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.webstats;
+
+import static org.torproject.descriptor.log.WebServerAccessLogImpl.MARKER;
+
+import org.torproject.descriptor.internal.FileType;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Path;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class LogMetadata {
+
+ private static final Logger log
+ = LoggerFactory.getLogger(LogMetadata.class);
+
+ /** The mandatory web server log descriptor file name pattern. */
+ public static final Pattern filenamePattern
+ = Pattern.compile("(\\S*)-" + MARKER
+ + "-(\\d{8})(?:\\.?)([a-zA-Z0-9]+)$");
+
+ /** The path of the log file to be imported. */
+ public final Path path;
+
+ /** The date the log entries were created. */
+ public final LocalDate date;
+
+ /** The log's compression type. */
+ public final FileType fileType;
+
+ /** The name of the physical host. */
+ public final String physicalHost;
+
+ /** The name of the virtual host. */
+ public final String virtualHost;
+
+ private LogMetadata(Path logPath, String physicalHost, String virtualHost,
+ LocalDate logDate, FileType fileType) {
+ this.path = logPath;
+ this.date = logDate;
+ this.fileType = fileType;
+ this.physicalHost = physicalHost;
+ this.virtualHost = virtualHost;
+ }
+
+ /**
+ * Only way to create a LogMetadata object from a given log path.
+ */
+ public static Optional<LogMetadata> create(Path logPath) {
+ LogMetadata metadata = null;
+ try {
+ Path parentPath = logPath.getName(logPath.getNameCount() - 2);
+ Path file = logPath.getFileName();
+ if (null != parentPath && null != file) {
+ String physicalHost = parentPath.toString();
+ Matcher mat = filenamePattern.matcher(file.toString());
+ if (mat.find()) {
+ String virtualHost = mat.group(1);
+ // verify date given
+ LocalDate logDate
+ = LocalDate.parse(mat.group(2), DateTimeFormatter.BASIC_ISO_DATE);
+ if (null == virtualHost || null == physicalHost || null == logDate
+ || virtualHost.isEmpty() || physicalHost.isEmpty()) {
+ log.debug("Non-matching file encountered: '{}/{}'.",
+ parentPath, file);
+ } else {
+ metadata = new LogMetadata(logPath, physicalHost, virtualHost,
+ logDate, FileType.findType(mat.group(3)));
+ }
+ }
+ }
+ } catch (Throwable ex) {
+ metadata = null;
+ log.debug("Problem parsing path '{}'.", logPath, ex);
+ }
+ return Optional.ofNullable(metadata);
+ }
+}
+
diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
new file mode 100644
index 0000000..88d62fa
--- /dev/null
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -0,0 +1,198 @@
+/* Copyright 2017--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.webstats;
+
+import org.torproject.collector.conf.Configuration;
+import org.torproject.collector.conf.ConfigurationException;
+import org.torproject.collector.conf.Key;
+import org.torproject.collector.conf.SourceType;
+import org.torproject.collector.cron.CollecTorMain;
+
+import org.torproject.collector.persist.PersistenceUtils;
+import org.torproject.collector.persist.WebServerAccessLogPersistence;
+import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.descriptor.log.InternalLogDescriptor;
+import org.torproject.descriptor.log.InternalWebServerAccessLog;
+import org.torproject.descriptor.log.WebServerAccessLogImpl;
+import org.torproject.descriptor.log.WebServerAccessLogLine;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.StringJoiner;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * This module processes web-logs for CollecTor according to the weblog
+ * sanitation specification published on metrics.torproject.org</p>
+ */
+public class SanitizeWeblogs extends CollecTorMain {
+
+ private static final Logger log =
+ LoggerFactory.getLogger(SanitizeWeblogs.class);
+ private static final int LIMIT = 2;
+
+ private static final String WEBSTATS = "webstats";
+ private String outputPathName;
+ private String recentPathName;
+ private boolean limits;
+
+ /**
+ * Possibly privacy impacting data is replaced by dummy data producing a
+ * log-file (or files) that confirm(s) to Apache's Combined Log Format.
+ */
+ public SanitizeWeblogs(Configuration conf) {
+ super(conf);
+ this.mapPathDescriptors.put("recent/webstats", WebServerAccessLog.class);
+ }
+
+ @Override
+ public String module() {
+ return WEBSTATS;
+ }
+
+ @Override
+ protected String syncMarker() {
+ return "Webstats";
+ }
+
+ @Override
+ protected void startProcessing() throws ConfigurationException {
+ try {
+ Files.createDirectories(this.config.getPath(Key.OutputPath));
+ Files.createDirectories(this.config.getPath(Key.RecentPath));
+ this.outputPathName = this.config.getPath(Key.OutputPath).toString();
+ this.recentPathName = this.config.getPath(Key.RecentPath).toString();
+ this.limits = this.config.getBool(Key.WebstatsLimits);
+ Set<SourceType> sources = this.config.getSourceTypeSet(
+ Key.WebstatsSources);
+ if (sources.contains(SourceType.Local)) {
+ findCleanWrite(this.config.getPath(Key.WebstatsLocalOrigins));
+ PersistenceUtils.cleanDirectory(this.config.getPath(Key.RecentPath));
+ }
+ } catch (Exception e) {
+ log.error("Cannot sanitize web-logs: " + e.getMessage(), e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void findCleanWrite(Path dir) {
+ LogFileMap fileMapIn = new LogFileMap(dir);
+ log.info("Found log files for {} virtual hosts.", fileMapIn.size());
+ for (Map.Entry<String,TreeMap<String,TreeMap<LocalDate,LogMetadata>>>
+ virtualEntry : fileMapIn.entrySet()) {
+ String virtualHost = virtualEntry.getKey();
+ for (Map.Entry<String, TreeMap<LocalDate, LogMetadata>> physicalEntry
+ : virtualEntry.getValue().entrySet()) {
+ String physicalHost = physicalEntry.getKey();
+ log.info("Processing logs for {} on {}.", virtualHost, physicalHost);
+ Map<LocalDate, List<WebServerAccessLogLine>> linesByDate
+ = physicalEntry.getValue().values().stream().parallel()
+ .flatMap((LogMetadata metadata)
+ -> lineStream(metadata).filter((line) -> line.isValid()))
+ .collect(Collectors.groupingBy(WebServerAccessLogLine::getDate,
+ Collectors.toList()));
+ LocalDate[] interval = determineInterval(linesByDate.keySet());
+ linesByDate.entrySet().stream()
+ .filter((entry) -> entry.getKey().isAfter(interval[0])
+ && entry.getKey().isBefore(interval[1]))
+ .forEach((entry) -> storeSanitized(virtualHost, physicalHost,
+ entry.getKey(), entry.getValue()));
+ }
+ }
+ }
+
+ private void storeSanitized(String virtualHost, String physicalHost,
+ LocalDate date, List<WebServerAccessLogLine> lines) {
+ String name = new StringJoiner(InternalLogDescriptor.SEP)
+ .add(virtualHost).add(physicalHost)
+ .add(InternalWebServerAccessLog.MARKER)
+ .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
+ log.debug("Sanitizing {}.", name);
+ List<String> retainedLines = lines
+ .stream().map((line) -> sanitize(line, date))
+ .filter((line) -> line.isPresent()).map((line) -> line.get())
+ .collect(Collectors.toList());
+ retainedLines.sort(null);
+ try {
+ WebServerAccessLogPersistence walp
+ = new WebServerAccessLogPersistence(
+ new WebServerAccessLogImpl(retainedLines, name));
+ log.debug("Storing {}.", name);
+ walp.storeOut(this.outputPathName);
+ walp.storeRecent(this.recentPathName);
+ } catch (DescriptorParseException dpe) {
+ log.error("Cannot store log desriptor {}.", name, dpe);
+ }
+ }
+
+ static Optional<String> sanitize(WebServerAccessLogLine logLine,
+ LocalDate date) {
+ if (!logLine.isValid()
+ || !("GET".equals(logLine.getMethod())
+ || "HEAD".equals(logLine.getMethod()))
+ || !logLine.getProtocol().startsWith("HTTP")
+ || 400 == logLine.getResponse() || 404 == logLine.getResponse()) {
+ return Optional.empty();
+ }
+ if (!logLine.getIp().startsWith("0.0.0.")) {
+ logLine.setIp("0.0.0.0");
+ }
+ int queryStart = logLine.getRequest().indexOf("?");
+ if (queryStart > 0) {
+ logLine.setRequest(logLine.getRequest().substring(0, queryStart));
+ }
+ return Optional.of(logLine.toLogString());
+ }
+
+ LocalDate[] determineInterval(Set<LocalDate> dates) {
+ SortedSet<LocalDate> sorted = new TreeSet<>();
+ sorted.addAll(dates);
+ if (this.limits) {
+ for (int i = 0; i < LIMIT - 1; i++) {
+ sorted.remove(sorted.last());
+ }
+ }
+ if (sorted.isEmpty()) {
+ return new LocalDate[]{LocalDate.MAX, LocalDate.MIN};
+ }
+ if (!this.limits) {
+ sorted.add(sorted.first().minusDays(1));
+ sorted.add(sorted.last().plusDays(1));
+ }
+ return new LocalDate[]{sorted.first(), sorted.last()};
+ }
+
+ private Stream<WebServerAccessLogLine> lineStream(LogMetadata metadata) {
+ log.debug("Processing file {}.", metadata.path);
+ try (BufferedReader br
+ = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(
+ metadata.fileType.decompress(Files.readAllBytes(metadata.path)))))) {
+ return br.lines()
+ .map((String line) -> WebServerAccessLogLine.makeLine(line))
+ .collect(Collectors.toList()).stream();
+ } catch (Exception ex) {
+ log.debug("Skipping log-file {}.", metadata.path, ex);
+ }
+ return Stream.empty();
+ }
+
+}
+
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index 0a9f932..30dda2a 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -41,6 +41,12 @@ UpdateindexActivated = false
UpdateindexPeriodMinutes = 2
# offset in minutes since the epoch and
UpdateindexOffsetMinutes = 0
+# the following defines, if this module is activated
+WebstatsActivated = false
+# period in minutes
+WebstatsPeriodMinutes = 360
+# offset in minutes since the epoch and
+WebstatsOffsetMinutes = 31
##########################################
## All below can be changed at runtime.
#####
@@ -154,4 +160,16 @@ OnionPerfSyncOrigins = https://collector.torproject.org
## the second, etc.:
## OnionPerfHosts = http://first.torproject.org/, http://second.torproject.org/
OnionPerfHosts = https://op-us.onionperf.torproject.net/
-
+######## Tor Weblogs ########
+#
+## Define descriptor sources
+# possible values: Local, Sync
+WebstatsSources = Local
+# Retrieve files from the following CollecTor instances.
+# List of URLs separated by comma.
+WebstatsSyncOrigins = https://collector.torproject.org
+## Relative path to directory to import logfiles from.
+WebstatsLocalOrigins = in/webstats
+# Default 'true' behaves as stated in section 4 of
+# https://metrics.torproject.org/web-server-logs.html
+WebstatsLimits = true
diff --git a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
index dfb06b2..fcaa71f 100644
--- a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
@@ -40,7 +40,7 @@ public class ConfigurationTest {
public void testKeyCount() throws Exception {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 45, Key.values().length);
+ 52, Key.values().length);
}
@Test()
diff --git a/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
index 79c1bd7..025f96c 100644
--- a/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
+++ b/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
@@ -71,6 +71,7 @@ public class CollecTorMainTest {
case "Bridge":
case "Exitlist":
case "OnionPerf":
+ case "Webstats":
assertNotNull("Property '" + key
+ "' not specified in " + Main.CONF_FILE + ".",
props.getProperty(key));
diff --git a/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java b/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
index 2774c8d..489a413 100644
--- a/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
+++ b/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
@@ -28,6 +28,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
+import java.util.stream.Collectors;
@RunWith(Parameterized.class)
public class SyncPersistenceTest {
@@ -49,6 +50,26 @@ public class SyncPersistenceTest {
Integer.valueOf(1),
Integer.valueOf(1)},
+ {"webstats/archive.torproject.org_"
+ + "archeotrichon.torproject.org_access.log_20151007.xz",
+ new String[]{"webstats/archive.torproject.org/2015/10/07/"
+ + "archive.torproject.org_archeotrichon.torproject.org"
+ + "_access.log_20151007.xz"},
+ "archeotrichon.torproject.org/archive.torproject.org_"
+ + "archeotrichon.torproject.org_access.log_20151007.xz",
+ Integer.valueOf(1),
+ Integer.valueOf(1)},
+
+ {"webstats/metrics.torproject.org_"
+ + "meronense.torproject.org_access.log_20170531.xz",
+ new String[]{"webstats/metrics.torproject.org/2017/05/31/"
+ + "metrics.torproject.org_meronense.torproject.org"
+ + "_access.log_20170531.xz"},
+ "meronense.torproject.org/metrics.torproject.org_"
+ + "meronense.torproject.org_access.log_20170531.gz",
+ Integer.valueOf(1),
+ Integer.valueOf(1)},
+
{"relay-descriptors/server-descriptors/"
+ "2016-10-05-19-06-17-server-descriptors",
new String[]{"relay-descriptors/server-descriptor/2016/10/e/3/"
@@ -266,6 +287,9 @@ public class SyncPersistenceTest {
@Test()
public void testRecentFileContent() throws Exception {
+ if (this.filename.contains(".log")) {
+ return; // Skip this test, because logs are compressed and sanitized.
+ }
conf = new Configuration();
makeTemporaryFolders();
DescriptorParser dp = DescriptorSourceFactory.createDescriptorParser();
@@ -292,6 +316,9 @@ public class SyncPersistenceTest {
@Test()
public void testOutFileContent() throws Exception {
+ if (this.filename.contains(".log")) {
+ return; // Skip this test, because logs are compressed and sanitized.
+ }
conf = new Configuration();
makeTemporaryFolders();
DescriptorParser dp = DescriptorSourceFactory.createDescriptorParser();
@@ -305,9 +332,8 @@ public class SyncPersistenceTest {
List<String> expContent = linesFromResource(filename);
int expContentSize = expContent.size();
for (File file : outputList) {
- List<String> content = Files.readAllLines(file.toPath(),
- StandardCharsets.UTF_8);
- for (String line : content) {
+ for (String line : Files.readAllLines(file.toPath(),
+ StandardCharsets.UTF_8)) {
assertTrue("Couldn't remove " + line + ".", expContent.remove(line));
assertEquals(--expContentSize, expContent.size());
}
@@ -325,49 +351,25 @@ public class SyncPersistenceTest {
}
private byte[] bytesFromResource() throws Exception {
- StringBuilder sb = new StringBuilder();
- BufferedReader br = new BufferedReader(new InputStreamReader(getClass()
- .getClassLoader().getResourceAsStream(filename)));
- String line = br.readLine();
- while (null != line) {
- sb.append(line).append('\n');
- line = br.readLine();
- }
- return sb.toString().getBytes();
+ return Files.readAllBytes((new File(getClass()
+ .getClassLoader().getResource(filename).toURI())).toPath());
}
private String stringFromResource() throws Exception {
- StringBuilder sb = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(getClass()
.getClassLoader().getResourceAsStream(filename)));
- String line = br.readLine();
- while (null != line) {
- sb.append(line).append('\n');
- line = br.readLine();
- }
- return sb.toString();
+ return br.lines().collect(Collectors.joining("\n", "", "\n"));
}
private String stringFromFile(File file) throws Exception {
- StringBuilder sb = new StringBuilder();
- List<String> lines = Files.readAllLines(file.toPath(),
- StandardCharsets.UTF_8);
- for (String line : lines) {
- sb.append(line).append('\n');
- }
- return sb.toString();
+ return Files.lines(file.toPath(), StandardCharsets.UTF_8)
+ .collect(Collectors.joining("\n", "", "\n"));
}
private List<String> linesFromResource(String filename) throws Exception {
- List<String> res = new ArrayList<>();
BufferedReader br = new BufferedReader(new InputStreamReader(getClass()
.getClassLoader().getResourceAsStream(filename)));
- String line = br.readLine();
- while (null != line) {
- res.add(line);
- line = br.readLine();
- }
- return res;
+ return br.lines().collect(Collectors.toList());
}
}
diff --git a/src/test/java/org/torproject/collector/webstats/LogFileMapTest.java b/src/test/java/org/torproject/collector/webstats/LogFileMapTest.java
new file mode 100644
index 0000000..d55ba40
--- /dev/null
+++ b/src/test/java/org/torproject/collector/webstats/LogFileMapTest.java
@@ -0,0 +1,33 @@
+/* Copyright 2017--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.webstats;
+
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.nio.file.Paths;
+import java.util.Optional;
+
+public class LogFileMapTest {
+
+ @Rule
+ public TemporaryFolder tmpf = new TemporaryFolder();
+
+ @Test
+ public void makeLogFileMap() throws Exception {
+ LogFileMap lfm = new LogFileMap(tmpf.newFolder().toPath());
+ for (String path : new String[] {"in/ph1/vh1-access.log-20170901.gz",
+ "in/ph1/vh1-access.log-20170902.xz"}) {
+ Optional<LogMetadata> element
+ = LogMetadata.create(Paths.get(path));
+ assertTrue(element.isPresent());
+ lfm.add(element.get());
+ }
+ }
+
+}
+
diff --git a/src/test/java/org/torproject/collector/webstats/LogMetadataTest.java b/src/test/java/org/torproject/collector/webstats/LogMetadataTest.java
new file mode 100644
index 0000000..6121e8d
--- /dev/null
+++ b/src/test/java/org/torproject/collector/webstats/LogMetadataTest.java
@@ -0,0 +1,82 @@
+/* Copyright 2017--2018 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.collector.webstats;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Optional;
+
+(a)RunWith(Parameterized.class)
+public class LogMetadataTest {
+
+ /** Path and expected values of LogMetadata. */
+ @Parameters
+ public static Collection<Object[]> pathResult() {
+ return Arrays.asList(new Object[][] {
+ {Paths.get("in", "ph1", "vh1-error.log-20170902.xz"),
+ "10001010", Boolean.FALSE,
+ "Non-access logs should be discarded."},
+ {Paths.get("in", "ph1", "vh1-access.log-2017.xz"),
+ "10001010", Boolean.FALSE,
+ "Log file should be discarded, because of wrong date format."},
+ {Paths.get("in", "ph1", "vh1-access.log.xz"),
+ "10001010", Boolean.FALSE,
+ "Log file should be discarded, because of the missing date."},
+ {Paths.get("vh1-access.log-20170901.gz"),
+ "10001010", Boolean.FALSE,
+ "Should be discarded because of missing physical host information."},
+ {Paths.get("in", "ph1", "vh1-access.log-20170901.gz"),
+ "20170901", Boolean.TRUE,
+ "Should have been accepted."},
+ {Paths.get("", "vh1-access.log-20170901.gz"),
+ "20170901", Boolean.FALSE,
+ "Should not result in metadata."},
+ {Paths.get("x", "vh1-access.log-.gz"),
+ "20170901", Boolean.FALSE,
+ "Should not result in metadata."},
+ {Paths.get("/collection/download/in/ph2", "vh2-access.log-20180901.xz"),
+ "20180901", Boolean.TRUE,
+ "Should have been accepted."}
+ });
+ }
+
+ private Path path;
+ private LocalDate date;
+ private boolean valid;
+ private String failureMessage;
+
+ /** Set all test values. */
+ public LogMetadataTest(Path path, String dateString, boolean valid,
+ String message) {
+ this.path = path;
+ this.date = LocalDate.parse(dateString, DateTimeFormatter.BASIC_ISO_DATE);
+ this.valid = valid;
+ this.failureMessage = message;
+ }
+
+ @Test
+ public void testCreate() throws Exception {
+ Optional<LogMetadata> element = LogMetadata.create(this.path);
+ assertEquals(this.failureMessage, this.valid, element.isPresent());
+ if (!this.valid) {
+ return;
+ }
+ LogMetadata lmd = element.get();
+ assertEquals(this.date, lmd.date);
+ assertEquals(this.path, lmd.path);
+ }
+
+}
+
diff --git a/src/test/resources/archeotrichon.torproject.org/archive.torproject.org_ar… b/src/test/resources/archeotrichon.torproject.org/archive.torproject.org_ar…
new file mode 100644
index 0000000..b459742
Binary files /dev/null and b/src/test/resources/archeotrichon.torproject.org/archive.torproject.org_ar… differ
diff --git a/src/test/resources/meronense.torproject.org/metrics.torproject.org_merone… b/src/test/resources/meronense.torproject.org/metrics.torproject.org_merone…
new file mode 100644
index 0000000..8c2333b
Binary files /dev/null and b/src/test/resources/meronense.torproject.org/metrics.torproject.org_merone… differ
1
0
commit 7f01208aedf150822f589375bf2cf45b3a6af5c0
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Tue Jan 9 10:23:10 2018 +0100
Update copyright to 2018.
---
src/main/java/org/torproject/collector/Main.java | 2 +-
.../org/torproject/collector/bridgedescs/BridgeDescriptorParser.java | 2 +-
.../java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java | 2 +-
.../org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java | 2 +-
src/main/java/org/torproject/collector/conf/Annotation.java | 2 +-
src/main/java/org/torproject/collector/conf/Configuration.java | 2 +-
src/main/java/org/torproject/collector/conf/ConfigurationException.java | 2 +-
src/main/java/org/torproject/collector/conf/Key.java | 2 +-
src/main/java/org/torproject/collector/conf/SourceType.java | 2 +-
src/main/java/org/torproject/collector/cron/CollecTorMain.java | 2 +-
src/main/java/org/torproject/collector/cron/Scheduler.java | 2 +-
src/main/java/org/torproject/collector/cron/ShutdownHook.java | 2 +-
.../java/org/torproject/collector/exitlists/ExitListDownloader.java | 2 +-
src/main/java/org/torproject/collector/index/CreateIndexJson.java | 2 +-
.../java/org/torproject/collector/onionperf/OnionPerfDownloader.java | 2 +-
.../org/torproject/collector/persist/BridgeExtraInfoPersistence.java | 2 +-
.../torproject/collector/persist/BridgeServerDescriptorPersistence.java | 2 +-
.../java/org/torproject/collector/persist/ConsensusPersistence.java | 2 +-
.../java/org/torproject/collector/persist/DescriptorPersistence.java | 2 +-
src/main/java/org/torproject/collector/persist/ExitlistPersistence.java | 2 +-
.../java/org/torproject/collector/persist/ExtraInfoPersistence.java | 2 +-
.../org/torproject/collector/persist/MicroConsensusPersistence.java | 2 +-
.../java/org/torproject/collector/persist/OnionPerfPersistence.java | 2 +-
src/main/java/org/torproject/collector/persist/PersistenceUtils.java | 2 +-
.../org/torproject/collector/persist/ServerDescriptorPersistence.java | 2 +-
src/main/java/org/torproject/collector/persist/StatusPersistence.java | 2 +-
src/main/java/org/torproject/collector/persist/VotePersistence.java | 2 +-
src/main/java/org/torproject/collector/persist/package-info.java | 2 +-
src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java | 2 +-
src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java | 2 +-
.../torproject/collector/relaydescs/CachedRelayDescriptorReader.java | 2 +-
src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java | 2 +-
.../org/torproject/collector/relaydescs/RelayDescriptorDownloader.java | 2 +-
.../java/org/torproject/collector/relaydescs/RelayDescriptorParser.java | 2 +-
src/main/java/org/torproject/collector/sync/Criterium.java | 2 +-
src/main/java/org/torproject/collector/sync/ProcessCriterium.java | 2 +-
src/main/java/org/torproject/collector/sync/SyncManager.java | 2 +-
src/main/java/org/torproject/collector/sync/SyncPersistence.java | 2 +-
src/main/java/org/torproject/collector/sync/package-info.java | 2 +-
src/main/resources/bootstrap-development.sh | 2 +-
src/main/resources/create-tarballs.sh | 2 +-
src/test/java/org/torproject/collector/MainTest.java | 2 +-
.../torproject/collector/bridgedescs/BridgeDescriptorParserTest.java | 2 +-
.../java/org/torproject/collector/bridgedescs/DescriptorBuilder.java | 2 +-
.../torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java | 2 +-
.../java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java | 2 +-
.../torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java | 2 +-
.../org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java | 2 +-
src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java | 2 +-
src/test/java/org/torproject/collector/conf/ConfigurationTest.java | 2 +-
src/test/java/org/torproject/collector/cron/Broken.java | 2 +-
src/test/java/org/torproject/collector/cron/CollecTorMainTest.java | 2 +-
src/test/java/org/torproject/collector/cron/SchedulerTest.java | 2 +-
src/test/java/org/torproject/collector/persist/PersistUtilsTest.java | 2 +-
.../java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java | 2 +-
src/test/java/org/torproject/collector/sync/FileCollector.java | 2 +-
src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java | 2 +-
57 files changed, 57 insertions(+), 57 deletions(-)
diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java
index 1377fec..50cc8be 100644
--- a/src/main/java/org/torproject/collector/Main.java
+++ b/src/main/java/org/torproject/collector/Main.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector;
diff --git a/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java b/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java
index fae2ba3..1b115ad 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/BridgeDescriptorParser.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java b/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java
index 54aa891..487aac8 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/BridgeSnapshotReader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 12b42ac..e257245 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/main/java/org/torproject/collector/conf/Annotation.java b/src/main/java/org/torproject/collector/conf/Annotation.java
index 23f485c..6a8094e 100644
--- a/src/main/java/org/torproject/collector/conf/Annotation.java
+++ b/src/main/java/org/torproject/collector/conf/Annotation.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/main/java/org/torproject/collector/conf/Configuration.java b/src/main/java/org/torproject/collector/conf/Configuration.java
index 1a62bc1..57f9731 100644
--- a/src/main/java/org/torproject/collector/conf/Configuration.java
+++ b/src/main/java/org/torproject/collector/conf/Configuration.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/main/java/org/torproject/collector/conf/ConfigurationException.java b/src/main/java/org/torproject/collector/conf/ConfigurationException.java
index 723c3d8..1d55341 100644
--- a/src/main/java/org/torproject/collector/conf/ConfigurationException.java
+++ b/src/main/java/org/torproject/collector/conf/ConfigurationException.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java
index 72af4bb..e0a20a7 100644
--- a/src/main/java/org/torproject/collector/conf/Key.java
+++ b/src/main/java/org/torproject/collector/conf/Key.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/main/java/org/torproject/collector/conf/SourceType.java b/src/main/java/org/torproject/collector/conf/SourceType.java
index 9b42e9f..0ae18ce 100644
--- a/src/main/java/org/torproject/collector/conf/SourceType.java
+++ b/src/main/java/org/torproject/collector/conf/SourceType.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/main/java/org/torproject/collector/cron/CollecTorMain.java b/src/main/java/org/torproject/collector/cron/CollecTorMain.java
index 42530b7..d2a92f6 100644
--- a/src/main/java/org/torproject/collector/cron/CollecTorMain.java
+++ b/src/main/java/org/torproject/collector/cron/CollecTorMain.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/main/java/org/torproject/collector/cron/Scheduler.java b/src/main/java/org/torproject/collector/cron/Scheduler.java
index 4e1a726..ca8bc0a 100644
--- a/src/main/java/org/torproject/collector/cron/Scheduler.java
+++ b/src/main/java/org/torproject/collector/cron/Scheduler.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/main/java/org/torproject/collector/cron/ShutdownHook.java b/src/main/java/org/torproject/collector/cron/ShutdownHook.java
index 1e359b1..b9bade6 100644
--- a/src/main/java/org/torproject/collector/cron/ShutdownHook.java
+++ b/src/main/java/org/torproject/collector/cron/ShutdownHook.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java b/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java
index b49189d..587f563 100644
--- a/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java
+++ b/src/main/java/org/torproject/collector/exitlists/ExitListDownloader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.exitlists;
diff --git a/src/main/java/org/torproject/collector/index/CreateIndexJson.java b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
index c4399b8..fb693a7 100644
--- a/src/main/java/org/torproject/collector/index/CreateIndexJson.java
+++ b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
@@ -1,4 +1,4 @@
-/* Copyright 2015--2017 The Tor Project
+/* Copyright 2015--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.index;
diff --git a/src/main/java/org/torproject/collector/onionperf/OnionPerfDownloader.java b/src/main/java/org/torproject/collector/onionperf/OnionPerfDownloader.java
index 36b006a..16a05e7 100644
--- a/src/main/java/org/torproject/collector/onionperf/OnionPerfDownloader.java
+++ b/src/main/java/org/torproject/collector/onionperf/OnionPerfDownloader.java
@@ -1,4 +1,4 @@
-/* Copyright 2012-2017 The Tor Project
+/* Copyright 2012--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.onionperf;
diff --git a/src/main/java/org/torproject/collector/persist/BridgeExtraInfoPersistence.java b/src/main/java/org/torproject/collector/persist/BridgeExtraInfoPersistence.java
index 5bab4c0..1b8a602 100644
--- a/src/main/java/org/torproject/collector/persist/BridgeExtraInfoPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/BridgeExtraInfoPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/BridgeServerDescriptorPersistence.java b/src/main/java/org/torproject/collector/persist/BridgeServerDescriptorPersistence.java
index 3b4227e..8ff7a1a 100644
--- a/src/main/java/org/torproject/collector/persist/BridgeServerDescriptorPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/BridgeServerDescriptorPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/ConsensusPersistence.java b/src/main/java/org/torproject/collector/persist/ConsensusPersistence.java
index cf8b027..4425634 100644
--- a/src/main/java/org/torproject/collector/persist/ConsensusPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/ConsensusPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java b/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
index e7bce5b..3e464fe 100644
--- a/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/DescriptorPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/ExitlistPersistence.java b/src/main/java/org/torproject/collector/persist/ExitlistPersistence.java
index 73895e4..d992cd5 100644
--- a/src/main/java/org/torproject/collector/persist/ExitlistPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/ExitlistPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/ExtraInfoPersistence.java b/src/main/java/org/torproject/collector/persist/ExtraInfoPersistence.java
index a6ae774..93bdfe5 100644
--- a/src/main/java/org/torproject/collector/persist/ExtraInfoPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/ExtraInfoPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/MicroConsensusPersistence.java b/src/main/java/org/torproject/collector/persist/MicroConsensusPersistence.java
index a3e29a2..1858334 100644
--- a/src/main/java/org/torproject/collector/persist/MicroConsensusPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/MicroConsensusPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/OnionPerfPersistence.java b/src/main/java/org/torproject/collector/persist/OnionPerfPersistence.java
index 22093eb..d8e1b33 100644
--- a/src/main/java/org/torproject/collector/persist/OnionPerfPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/OnionPerfPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2017 The Tor Project
+/* Copyright 2017--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/PersistenceUtils.java b/src/main/java/org/torproject/collector/persist/PersistenceUtils.java
index 7d20882..0537145 100644
--- a/src/main/java/org/torproject/collector/persist/PersistenceUtils.java
+++ b/src/main/java/org/torproject/collector/persist/PersistenceUtils.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/ServerDescriptorPersistence.java b/src/main/java/org/torproject/collector/persist/ServerDescriptorPersistence.java
index e1adfbc..cd5d92c 100644
--- a/src/main/java/org/torproject/collector/persist/ServerDescriptorPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/ServerDescriptorPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/StatusPersistence.java b/src/main/java/org/torproject/collector/persist/StatusPersistence.java
index cca7201..165cf6c 100644
--- a/src/main/java/org/torproject/collector/persist/StatusPersistence.java
+++ b/src/main/java/org/torproject/collector/persist/StatusPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/VotePersistence.java b/src/main/java/org/torproject/collector/persist/VotePersistence.java
index 26f9a72..45f117f 100644
--- a/src/main/java/org/torproject/collector/persist/VotePersistence.java
+++ b/src/main/java/org/torproject/collector/persist/VotePersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/persist/package-info.java b/src/main/java/org/torproject/collector/persist/package-info.java
index 8473821..6f75116 100644
--- a/src/main/java/org/torproject/collector/persist/package-info.java
+++ b/src/main/java/org/torproject/collector/persist/package-info.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java b/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java
index 88b2525..c665acb 100644
--- a/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java
+++ b/src/main/java/org/torproject/collector/relaydescs/ArchiveReader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java b/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java
index 6660213..e8d717a 100644
--- a/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java
+++ b/src/main/java/org/torproject/collector/relaydescs/ArchiveWriter.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java b/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java
index 34da5fc..4986509 100644
--- a/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java
+++ b/src/main/java/org/torproject/collector/relaydescs/CachedRelayDescriptorReader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java b/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java
index 843156c..43706dd 100644
--- a/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java
+++ b/src/main/java/org/torproject/collector/relaydescs/ReferenceChecker.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java
index 80955be..453a466 100644
--- a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java
+++ b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorDownloader.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java
index 537572b..be26040 100644
--- a/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java
+++ b/src/main/java/org/torproject/collector/relaydescs/RelayDescriptorParser.java
@@ -1,4 +1,4 @@
-/* Copyright 2010--2017 The Tor Project
+/* Copyright 2010--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/main/java/org/torproject/collector/sync/Criterium.java b/src/main/java/org/torproject/collector/sync/Criterium.java
index dbb7d8e..b612794 100644
--- a/src/main/java/org/torproject/collector/sync/Criterium.java
+++ b/src/main/java/org/torproject/collector/sync/Criterium.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/main/java/org/torproject/collector/sync/ProcessCriterium.java b/src/main/java/org/torproject/collector/sync/ProcessCriterium.java
index c128e14..8edcf15 100644
--- a/src/main/java/org/torproject/collector/sync/ProcessCriterium.java
+++ b/src/main/java/org/torproject/collector/sync/ProcessCriterium.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/main/java/org/torproject/collector/sync/SyncManager.java b/src/main/java/org/torproject/collector/sync/SyncManager.java
index 0c85a47..c7300b4 100644
--- a/src/main/java/org/torproject/collector/sync/SyncManager.java
+++ b/src/main/java/org/torproject/collector/sync/SyncManager.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/main/java/org/torproject/collector/sync/SyncPersistence.java b/src/main/java/org/torproject/collector/sync/SyncPersistence.java
index baca83d..e230fca 100644
--- a/src/main/java/org/torproject/collector/sync/SyncPersistence.java
+++ b/src/main/java/org/torproject/collector/sync/SyncPersistence.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/main/java/org/torproject/collector/sync/package-info.java b/src/main/java/org/torproject/collector/sync/package-info.java
index ede4727..241fde7 100644
--- a/src/main/java/org/torproject/collector/sync/package-info.java
+++ b/src/main/java/org/torproject/collector/sync/package-info.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/main/resources/bootstrap-development.sh b/src/main/resources/bootstrap-development.sh
index 28fb975..d301e25 100755
--- a/src/main/resources/bootstrap-development.sh
+++ b/src/main/resources/bootstrap-development.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Copyright 2016--2017 The Tor Project
+# Copyright 2016--2018 The Tor Project
# See LICENSE for licensing information
#
# Use for submodule initialization and checkout.
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 976b15e..c20303e 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright 2016--2017 The Tor Project
+# Copyright 2016--2018 The Tor Project
# See LICENSE for licensing information.
#
# Script for creating descriptor tarballs on a CollecTor instance,
diff --git a/src/test/java/org/torproject/collector/MainTest.java b/src/test/java/org/torproject/collector/MainTest.java
index b9cf5f8..28d3571 100644
--- a/src/test/java/org/torproject/collector/MainTest.java
+++ b/src/test/java/org/torproject/collector/MainTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/BridgeDescriptorParserTest.java b/src/test/java/org/torproject/collector/bridgedescs/BridgeDescriptorParserTest.java
index 5e55c76..2d4e175 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/BridgeDescriptorParserTest.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/BridgeDescriptorParserTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 429a0ea..eaa6335 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java
index 9fe35ec..2c77d47 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/ExtraInfoDescriptorBuilder.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java
index 1008ff8..31cccf7 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/NetworkStatusBuilder.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java b/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
index 3afe9cb..7e58497 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriterTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java
index 4b5a73e..e49e126 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/ServerDescriptorBuilder.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java b/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java
index aad4690..c85e93b 100644
--- a/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java
+++ b/src/test/java/org/torproject/collector/bridgedescs/TarballBuilder.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.bridgedescs;
diff --git a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
index 634bca5..dfb06b2 100644
--- a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.conf;
diff --git a/src/test/java/org/torproject/collector/cron/Broken.java b/src/test/java/org/torproject/collector/cron/Broken.java
index 93f59e0..95fec3b 100644
--- a/src/test/java/org/torproject/collector/cron/Broken.java
+++ b/src/test/java/org/torproject/collector/cron/Broken.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java b/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
index 49f07ef..79c1bd7 100644
--- a/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
+++ b/src/test/java/org/torproject/collector/cron/CollecTorMainTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/test/java/org/torproject/collector/cron/SchedulerTest.java b/src/test/java/org/torproject/collector/cron/SchedulerTest.java
index 062009f..5e38a36 100644
--- a/src/test/java/org/torproject/collector/cron/SchedulerTest.java
+++ b/src/test/java/org/torproject/collector/cron/SchedulerTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.cron;
diff --git a/src/test/java/org/torproject/collector/persist/PersistUtilsTest.java b/src/test/java/org/torproject/collector/persist/PersistUtilsTest.java
index 83e93d0..d2801be 100644
--- a/src/test/java/org/torproject/collector/persist/PersistUtilsTest.java
+++ b/src/test/java/org/torproject/collector/persist/PersistUtilsTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.persist;
diff --git a/src/test/java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java b/src/test/java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java
index d076e26..adb0b48 100644
--- a/src/test/java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java
+++ b/src/test/java/org/torproject/collector/relaydescs/ReferenceCheckerTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.relaydescs;
diff --git a/src/test/java/org/torproject/collector/sync/FileCollector.java b/src/test/java/org/torproject/collector/sync/FileCollector.java
index c5b88cb..681b113 100644
--- a/src/test/java/org/torproject/collector/sync/FileCollector.java
+++ b/src/test/java/org/torproject/collector/sync/FileCollector.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
diff --git a/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java b/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
index f0d30bc..2774c8d 100644
--- a/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
+++ b/src/test/java/org/torproject/collector/sync/SyncPersistenceTest.java
@@ -1,4 +1,4 @@
-/* Copyright 2016--2017 The Tor Project
+/* Copyright 2016--2018 The Tor Project
* See LICENSE for licensing information */
package org.torproject.collector.sync;
1
0

26 Feb '18
commit b23232bd44c82defee92cbe1d697cafb7862205a
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Dec 14 10:13:11 2017 +0100
Exclude lastModifiedMillis in index.json.
Fixes #24621.
---
CHANGELOG.md | 6 ++++++
src/main/java/org/torproject/collector/index/CreateIndexJson.java | 3 ++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c924f5e..2f4cd21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+# Changes in version 1.?.? - 201?-??-??
+
+ * Minor changes
+ - Exclude lastModifiedMillis in index.json.
+
+
# Changes in version 1.4.1 - 2017-10-26
* Medium changes
diff --git a/src/main/java/org/torproject/collector/index/CreateIndexJson.java b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
index 5c4daf9..c4399b8 100644
--- a/src/main/java/org/torproject/collector/index/CreateIndexJson.java
+++ b/src/main/java/org/torproject/collector/index/CreateIndexJson.java
@@ -163,7 +163,8 @@ public class CreateIndexJson extends CollecTorMain {
private void writeIndex(IndexNode indexNode) throws Exception {
indexJsonFile.getParentFile().mkdirs();
- Gson gson = new GsonBuilder().create();
+ Gson gson = new GsonBuilder().excludeFieldsWithoutExposeAnnotation()
+ .create();
String indexNodeString = gson.toJson(indexNode);
for (String filename : new String[] {indexJsonFile.toString(),
indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) {
1
0
commit 60dfface9783b5715717dd10fbd90c4dc93e4321
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Thu Oct 26 10:16:35 2017 +0200
Bump version to 1.4.1-dev.
---
build.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/build.xml b/build.xml
index fb87709..f004f29 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
<property name="javadoc-title" value="CollecTor API Documentation"/>
<property name="implementation-title" value="CollecTor" />
- <property name="release.version" value="1.4.1" />
+ <property name="release.version" value="1.4.1-dev" />
<property name="project-main-class" value="org.torproject.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.1.1" />
1
0
commit ee7f1353a22b3d19857722b5b68604e2517012c5
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Dec 15 17:01:27 2017 +0100
Update metrics-base.
---
src/build | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/build b/src/build
index 4b34756..23c6e0b 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit 4b34756ddd71ccaf0fc30e5f5bf0a813a297d4a3
+Subproject commit 23c6e0be5fab9463f137615053ef412e4da2315e
1
0

26 Feb '18
commit 15db1e2a793ac7e67a1e7aa87c2ea857825a98a2
Author: iwakeh <iwakeh(a)torproject.org>
Date: Wed Jan 31 13:31:25 2018 +0000
Parallelize two more processing steps.
---
src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index a3d2a7e..4496861 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -113,7 +113,7 @@ public class SanitizeWeblogs extends CollecTorMain {
LocalDate[] interval = determineInterval(linesByDate.keySet());
linesByDate.entrySet().stream()
.filter((entry) -> entry.getKey().isAfter(interval[0])
- && entry.getKey().isBefore(interval[1]))
+ && entry.getKey().isBefore(interval[1])).parallel()
.forEach((entry) -> storeSanitized(virtualHost, physicalHost,
entry.getKey(), entry.getValue()));
}
@@ -128,7 +128,7 @@ public class SanitizeWeblogs extends CollecTorMain {
.add(date.format(DateTimeFormatter.BASIC_ISO_DATE)).toString();
log.debug("Sanitizing {}.", name);
List<String> retainedLines = lines
- .stream().map((line) -> sanitize(line, date))
+ .stream().parallel().map((line) -> sanitize(line, date))
.filter((line) -> line.isPresent()).map((line) -> line.get())
.collect(Collectors.toList());
retainedLines.sort(null);
1
0
commit ddfa7bad243dc8e0a3105b14418794c87786e88f
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Mon Feb 26 14:23:05 2018 +0100
Prepare for 1.5.0 release.
---
CERT | 20 ++++++++++----------
CHANGELOG.md | 2 +-
build.xml | 2 +-
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/CERT b/CERT
index d94c0c3..43be56e 100644
--- a/CERT
+++ b/CERT
@@ -1,8 +1,8 @@
-----BEGIN CERTIFICATE-----
-MIIDaTCCAlGgAwIBAgIEZTniETANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
+MIIDaTCCAlGgAwIBAgIEIk6NnzANBgkqhkiG9w0BAQsFADBlMQswCQYDVQQGEwJV
UzELMAkGA1UECBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBU
-b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTcw
-ODE3MTg1MDQ0WhcNMTcxMTE1MTg1MDQ0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
+b3IgUHJvamVjdCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwHhcNMTgw
+MjI2MTQwMzUzWhcNMTgwNTI3MTQwMzUzWjBlMQswCQYDVQQGEwJVUzELMAkGA1UE
CBMCV0ExEDAOBgNVBAcTB1NlYXR0bGUxHTAbBgNVBAoTFFRoZSBUb3IgUHJvamVj
dCwgSW5jMRgwFgYDVQQDEw9LYXJzdGVuIExvZXNpbmcwggEiMA0GCSqGSIb3DQEB
AQUAA4IBDwAwggEKAoIBAQChXn+IUp+o6G+k4ffxk3TkxZb3iXfiG7byNsG63olU
@@ -11,11 +11,11 @@ Qw+VAhKTcEIv4yiR0BWapQyR07pgmKirYVjN6s6ef8NJzUptpxLlaYJ3ZfQfc4aE
MXzScgaccwDFIWQ661lzLGCfeSxxa3Xy4wWsGwzNzLITYrrABcbg7yogLo2btNvD
oEwGL3/baQdhl0dra6biVCZr9ydn3Hg57S55pUU0rBY25id78zUO8xrfNHw54wwX
lOblGt75OOkahP/ZZSBxxoiknJ6y5VQV8y+noA4vigXFAgMBAAGjITAfMB0GA1Ud
-DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAbsAc
-gwl5KJH3pVKw4b+ACCOMgW+27MisCFbT1Izq2Wx+JcLMt3N//MoIpYOZWhsIeazW
-/NE0fNbkLi0IYA0F1nUC9pHl44Hd8Gjfqa/YQUi9ALtgsY7l6W0sceW8WnZ8bu8J
-DfrqnmB0bD2xc9ZjOn58al8dVjVWs95M87D9WCRU6LiaKFj5c45wciABQsTmC0qD
-pyHYOaSGtXxXKDw5pAntdtHkCbowV5tDi/QQ8Tg7i5O7xwSh71Q7TZiNFMpLomBL
-QllHfTZryFmoHyGn5MfngBUVCVHig5nXmk0dUMGuLiK4789dkgiPRz0vpB5Yf8Yy
-CCE2jB6VBi2g5fMx0w==
+DgQWBBSeh60M+/wMYyYhlxtuff2Hk9n7bzANBgkqhkiG9w0BAQsFAAOCAQEAlUkU
+qqf+4yfXwAWFr2q6iijr54NDDEQwybCblIzVnsuGHPUDuie3ZWSHirtblBs/uJ9x
+RxmwkBrJr9IGMmGhN2GKXIPeUH0EZBYo7bsgo5d+E61OCnd/O+1JZzdG9dK+0kfq
+MLfo6ltFZZouHIIXfvOm8sLLRrdkXPrLQ/E8fTHB7dL6T8Hqg6pHRrRZDtuSM9CO
+zSYropxqlFzzlzciOdTU05D8Cnx2j/RtaycxHxFS7QtriDB0uOfqvyiVeqpr72wG
+qetlu3h46fXj3ALGVSXy+YZpYxcRNZsQyiBXdlXbgY0OfOVPFOH3HiZuv3zhfRJW
+2DiJiA8BLxZToe2XDA==
-----END CERTIFICATE-----
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a0b5d1f..eb14839 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# Changes in version 1.5.0 - 2018-01-31
+# Changes in version 1.5.0 - 2018-02-26
* Major changes
- Update to metrics-lib 2.2.0.
diff --git a/build.xml b/build.xml
index 48f6e33..ff8302b 100644
--- a/build.xml
+++ b/build.xml
@@ -8,7 +8,7 @@
<property name="javadoc-title" value="CollecTor API Documentation"/>
<property name="implementation-title" value="CollecTor" />
- <property name="release.version" value="1.4.1-dev" />
+ <property name="release.version" value="1.5.0" />
<property name="project-main-class" value="org.torproject.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.2.0" />
1
0

[collector/release] Circumvent Collection (integer) size limit.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit d05b4e4aee3bc15c3e4d5bac660dfcee5bc26279
Author: iwakeh <iwakeh(a)torproject.org>
Date: Tue Feb 20 16:30:14 2018 +0000
Circumvent Collection (integer) size limit.
Clean log lines immediately when they are read and also make use of sanitized
log's high redundancy immediately, i.e., continue with maps of
<LocalDate, <Map<String, Long>>.
Rename method(s) to reflect what they do.
---
.../collector/webstats/SanitizeWeblogs.java | 89 ++++++++++++++++------
1 file changed, 65 insertions(+), 24 deletions(-)
diff --git a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
index 1f2e922..5a270dd 100644
--- a/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
+++ b/src/main/java/org/torproject/collector/webstats/SanitizeWeblogs.java
@@ -4,8 +4,10 @@
package org.torproject.collector.webstats;
import static java.util.stream.Collectors.counting;
+import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.groupingByConcurrent;
-import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.reducing;
+import static java.util.stream.Collectors.summingLong;
import org.torproject.collector.conf.Configuration;
import org.torproject.collector.conf.ConfigurationException;
@@ -35,6 +37,8 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -111,35 +115,36 @@ public class SanitizeWeblogs extends CollecTorMain {
: virtualEntry.getValue().entrySet()) {
String physicalHost = physicalEntry.getKey();
log.info("Processing logs for {} on {}.", virtualHost, physicalHost);
- Map<LocalDate, List<WebServerAccessLogLine>> linesByDate
+ Map<LocalDate, Map<String, Long>> linesByDate
= physicalEntry.getValue().values().stream().parallel()
- .flatMap((LogMetadata metadata) -> lineStream(metadata)
- .filter((line) -> line.isValid())).parallel()
- .collect(groupingByConcurrent(WebServerAccessLogLine::getDate));
+ .flatMap(metadata -> sanitzedLineStream(metadata).entrySet()
+ .stream())
+ .collect(groupingBy(Map.Entry::getKey,
+ reducing(Collections.emptyMap(), Map.Entry::getValue,
+ (e1, e2) -> Stream.concat(e1.entrySet().stream(), e2.entrySet()
+ .stream())
+ .collect(groupingByConcurrent(Map.Entry::getKey,
+ summingLong(Map.Entry::getValue))))));
LocalDate[] interval = determineInterval(linesByDate.keySet());
linesByDate.entrySet().stream()
.filter((entry) -> entry.getKey().isAfter(interval[0])
&& entry.getKey().isBefore(interval[1])).parallel()
- .forEach((entry) -> storeSanitized(virtualHost, physicalHost,
+ .forEach((entry) -> storeSortedAndForget(virtualHost, physicalHost,
entry.getKey(), entry.getValue()));
}
}
}
- private void storeSanitized(String virtualHost, String physicalHost,
- LocalDate date, List<WebServerAccessLogLine> lines) {
+ private void storeSortedAndForget(String virtualHost, String physicalHost,
+ LocalDate date, Map<String, Long> lineCounts) {
String name = new StringJoiner(InternalLogDescriptor.SEP)
.add(virtualHost).add(physicalHost)
.add(InternalWebServerAccessLog.MARKER)
.add(date.format(DateTimeFormatter.BASIC_ISO_DATE))
.toString() + "." + FileType.XZ.name().toLowerCase();
- log.debug("Sanitizing {}.", name);
- Map<String, Long> retainedLines = new TreeMap<>(lines
- .stream().parallel().map((line) -> sanitize(line, date))
- .filter((line) -> line.isPresent())
- .map((line) -> line.get())
- .collect(groupingByConcurrent(line -> line, counting())));
- lines.clear(); // not needed anymore
+ log.debug("Storing {}.", name);
+ Map<String, Long> retainedLines = new TreeMap<>(lineCounts);
+ lineCounts.clear(); // not needed anymore
try {
WebServerAccessLogPersistence walp
= new WebServerAccessLogPersistence(
@@ -187,8 +192,8 @@ public class SanitizeWeblogs extends CollecTorMain {
.collect(Collectors.joining("\n", "", "\n")).getBytes();
}
- static Optional<String> sanitize(WebServerAccessLogLine logLine,
- LocalDate date) {
+ static Optional<WebServerAccessLogLine>
+ sanitize(WebServerAccessLogLine logLine) {
if (!logLine.isValid()
|| !(Method.GET == logLine.getMethod()
|| Method.HEAD == logLine.getMethod())
@@ -203,10 +208,13 @@ public class SanitizeWeblogs extends CollecTorMain {
if (queryStart > 0) {
logLine.setRequest(logLine.getRequest().substring(0, queryStart));
}
- return Optional.of(logLine.toLogString());
+ return Optional.of(logLine);
}
LocalDate[] determineInterval(Set<LocalDate> dates) {
+ if (dates.isEmpty()) { // return the empty interval
+ return new LocalDate[]{LocalDate.MAX, LocalDate.MIN};
+ }
SortedSet<LocalDate> sorted = new TreeSet<>();
sorted.addAll(dates);
if (this.limits) {
@@ -214,7 +222,7 @@ public class SanitizeWeblogs extends CollecTorMain {
sorted.remove(sorted.last());
}
}
- if (sorted.isEmpty()) {
+ if (sorted.isEmpty()) { // return the empty interval
return new LocalDate[]{LocalDate.MAX, LocalDate.MIN};
}
if (!this.limits) {
@@ -224,18 +232,51 @@ public class SanitizeWeblogs extends CollecTorMain {
return new LocalDate[]{sorted.first(), sorted.last()};
}
- private Stream<WebServerAccessLogLine> lineStream(LogMetadata metadata) {
+ private static final int LISTLIMIT = Integer.MAX_VALUE / 2;
+
+ private Map<LocalDate, Map<String, Long>>
+ sanitzedLineStream(LogMetadata metadata) {
log.debug("Processing file {}.", metadata.path);
try (BufferedReader br
= new BufferedReader(new InputStreamReader(
metadata.fileType.decompress(Files.newInputStream(metadata.path))))) {
- return br.lines()
- .map((String line) -> WebServerAccessLogLine.makeLine(line))
- .collect(toList()).stream();
+ List<List<WebServerAccessLogLine>> lists = new ArrayList<>();
+ List<WebServerAccessLogLine> currentList = new ArrayList<>();
+ lists.add(currentList);
+ String lineStr = br.readLine();
+ int count = 0;
+ while (null != lineStr) {
+ WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr);
+ if (wsal.isValid()) {
+ currentList.add(wsal);
+ count++;
+ }
+ if (count >= LISTLIMIT) {
+ currentList = new ArrayList<>();
+ lists.add(currentList);
+ count = 0;
+ }
+ lineStr = br.readLine();
+ }
+ br.close();
+ return lists.parallelStream()
+ .map(list -> list.stream()
+ .map(line -> sanitize(line))
+ .filter(line -> line.isPresent())
+ .map(line -> line.get())
+ .collect(groupingBy(WebServerAccessLogLine::getDate,
+ groupingBy(WebServerAccessLogLine::toLogString, counting()))))
+ .flatMap(map -> map.entrySet().stream()).parallel()
+ .collect(groupingByConcurrent(Map.Entry::getKey,
+ reducing(Collections.emptyMap(), Map.Entry::getValue,
+ (e1, e2) -> Stream.concat(e1.entrySet().stream(),
+ e2.entrySet().stream()).parallel()
+ .collect(groupingByConcurrent(Map.Entry::getKey,
+ summingLong(Map.Entry::getValue))))));
} catch (Exception ex) {
log.debug("Skipping log-file {}.", metadata.path, ex);
}
- return Stream.empty();
+ return Collections.emptyMap();
}
}
1
0

[collector/release] Add hasContent method to make even more use of DescriptorBuilder.
by karsten@torproject.org 26 Feb '18
by karsten@torproject.org 26 Feb '18
26 Feb '18
commit 5b68aaf8aa7c5f3769544061344e75f7884e87ef
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:19 2017 +0000
Add hasContent method to make even more use of DescriptorBuilder.
---
.../collector/bridgedescs/DescriptorBuilder.java | 4 +++
.../bridgedescs/SanitizedBridgesWriter.java | 42 +++++++++++-----------
2 files changed, 26 insertions(+), 20 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index f530368..9d23adf 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -80,6 +80,10 @@ class DescriptorBuilder {
return this;
}
+ public boolean hasContent() {
+ return this.parts.size() > 1 || lastPart.length() > 0;
+ }
+
@Override
public String toString() {
if (!this.finalized) {
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 22bf8f7..af54e03 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -462,7 +462,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
boolean includesFingerprintLine = false;
SortedMap<String, String> scrubbedLines = new TreeMap<>();
try {
- StringBuilder scrubbed = new StringBuilder();
+ DescriptorBuilder scrubbed = new DescriptorBuilder();
BufferedReader br = new BufferedReader(new StringReader(new String(
data, "US-ASCII")));
String line = null;
@@ -499,10 +499,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("r ")) {
/* Clear buffer from previously scrubbed lines. */
- if (scrubbed.length() > 0) {
+ if (scrubbed.hasContent()) {
String scrubbedLine = scrubbed.toString();
scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
+ scrubbed = new DescriptorBuilder();
}
/* Parse the relevant parts of this r line. */
@@ -549,11 +549,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
fingerprintBytes, descPublicationTime);
String scrubbedDirPort = this.scrubTcpPort(dirPort,
fingerprintBytes, descPublicationTime);
- scrubbed.append("r " + nickname + " "
- + hashedBridgeIdentityBase64 + " "
- + hashedDescriptorIdentifier + " " + descPublicationTime
- + " " + scrubbedAddress + " " + scrubbedOrPort + " "
- + scrubbedDirPort + "\n");
+ scrubbed.append("r ").append(nickname).space()
+ .append(hashedBridgeIdentityBase64).space()
+ .append(hashedDescriptorIdentifier).space()
+ .append(descPublicationTime).space()
+ .append(scrubbedAddress).space()
+ .append(scrubbedOrPort).space()
+ .append(scrubbedDirPort).newLine();
/* Sanitize any addresses in a lines using the fingerprint and
* descriptor publication time from the previous r line. */
@@ -562,7 +564,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
line.substring("a ".length()), fingerprintBytes,
descPublicationTime);
if (scrubbedOrAddress != null) {
- scrubbed.append("a " + scrubbedOrAddress + "\n");
+ scrubbed.append("a ").append(scrubbedOrAddress).newLine();
} else {
logger.warn("Invalid address in line '{}' "
+ "in bridge network status. Skipping line!", line);
@@ -572,7 +574,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
} else if (line.startsWith("s ") || line.equals("s")
|| line.startsWith("w ") || line.equals("w")
|| line.startsWith("p ") || line.equals("p")) {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* There should be nothing else but r, a, w, p, and s lines in the
* network status. If there is, we should probably learn before
@@ -584,10 +586,10 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
}
br.close();
- if (scrubbed.length() > 0) {
+ if (scrubbed.hasContent()) {
String scrubbedLine = scrubbed.toString();
scrubbedLines.put(hashedBridgeIdentityHex, scrubbedLine);
- scrubbed = new StringBuilder();
+ scrubbed = new DescriptorBuilder();
}
if (!includesFingerprintLine) {
header.append("fingerprint ").append(authorityFingerprint).newLine();
@@ -1119,7 +1121,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
BufferedReader br = new BufferedReader(new StringReader(new String(
data, "US-ASCII")));
String line = null;
- StringBuilder scrubbed = null;
+ DescriptorBuilder scrubbed = null;
String hashedBridgeIdentity = null;
String masterKeyEd25519 = null;
while ((line = br.readLine()) != null) {
@@ -1135,12 +1137,12 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
hashedBridgeIdentity = DigestUtils.sha1Hex(Hex.decodeHex(
parts[2].toCharArray())).toLowerCase();
- scrubbed = new StringBuilder("extra-info " + parts[1] + " "
- + hashedBridgeIdentity.toUpperCase() + "\n");
+ scrubbed = new DescriptorBuilder("extra-info ").append(parts[1])
+ .space().append(hashedBridgeIdentity.toUpperCase()).newLine();
/* Parse the publication time to determine the file name. */
} else if (line.startsWith("published ")) {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
published = line.substring("published ".length());
if (published.compareTo(maxExtraInfoDescriptorPublishedTime)
> 0) {
@@ -1155,7 +1157,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ "Skipping descriptor.", line);
return;
}
- scrubbed.append("transport " + parts[1] + "\n");
+ scrubbed.append("transport ").append(parts[1]).newLine();
/* Skip transport-info lines entirely. */
} else if (line.startsWith("transport-info ")) {
@@ -1177,8 +1179,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
DigestUtils.sha256(Base64.decodeBase64(
masterKeyEd25519FromIdentityEd25519 + "=")))
.replaceAll("=", "");
- scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
- + "\n");
+ scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+ .newLine();
if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
masterKeyEd25519FromIdentityEd25519)) {
logger.warn("Mismatch between identity-ed25519 and "
@@ -1213,7 +1215,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
|| line.startsWith("exit-")
|| line.startsWith("hidserv-")
|| line.startsWith("padding-counts ")) {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* When we reach the signature, we're done. Write the sanitized
* descriptor to disk below. */
1
0

26 Feb '18
commit 4e61bb792bc4cd4db9df6eb49ab88890b34ff489
Author: iwakeh <iwakeh(a)torproject.org>
Date: Fri Oct 27 17:35:17 2017 +0000
Use DescriptorBuilder more often.
Add convenience constructor accepting the first string as argument.
---
.../torproject/collector/bridgedescs/DescriptorBuilder.java | 5 +++++
.../collector/bridgedescs/SanitizedBridgesWriter.java | 12 ++++++------
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
index 9c47b5e..f530368 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/DescriptorBuilder.java
@@ -34,6 +34,11 @@ class DescriptorBuilder {
this.parts.add(this.lastPart);
}
+ public DescriptorBuilder(String firstString) {
+ this();
+ this.append(firstString);
+ }
+
private void throwExceptionIfFinalized() {
if (this.finalized) {
throw new IllegalStateException("This DescriptorBuilder is finalized and"
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index 1d264a5..b4cd49e 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -461,7 +461,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
/* Parse the given network status line by line. */
- StringBuilder header = new StringBuilder();
+ DescriptorBuilder header = new DescriptorBuilder();
boolean includesFingerprintLine = false;
SortedMap<String, String> scrubbedLines = new TreeMap<>();
try {
@@ -483,7 +483,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Additional header lines don't have to be cleaned up. */
} else if (line.startsWith("flag-thresholds ")) {
- header.append(line + "\n");
+ header.append(line).newLine();
/* The authority fingerprint in the "fingerprint" line can go in
* unscrubbed. */
@@ -494,7 +494,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ "\"fingerprint\" line (\"" + line + "\").");
return;
}
- header.append(line + "\n");
+ header.append(line).newLine();
includesFingerprintLine = true;
/* r lines contain sensitive information that needs to be removed
@@ -593,7 +593,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
scrubbed = new StringBuilder();
}
if (!includesFingerprintLine) {
- header.append("fingerprint ").append(authorityFingerprint).append("\n");
+ header.append("fingerprint ").append(authorityFingerprint).newLine();
}
/* Check if we can tell from the descriptor publication times
@@ -879,13 +879,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
/* Replace node fingerprints in the family line with their hashes
* and leave nicknames unchanged. */
} else if (line.startsWith("family ")) {
- StringBuilder familyLine = new StringBuilder("family");
+ DescriptorBuilder familyLine = new DescriptorBuilder("family");
for (String s : line.substring(7).split(" ")) {
if (s.startsWith("$")) {
familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
s.substring(1).toCharArray())).toUpperCase());
} else {
- familyLine.append(" ").append(s);
+ familyLine.space().append(s);
}
}
scrubbed.append(familyLine.toString()).newLine();
1
0