tor-commits
Threads by month
- ----- 2025 -----
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
November 2019
- 20 participants
- 2923 discussions
commit 8763af44465445b2257f9740d4ce341f8334eead
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Nov 1 09:17:42 2019 +0100
Bump version to 2.9.0-dev.
---
CHANGELOG.md | 3 +++
build.xml | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab5c711..99048e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+# Changes in version 2.?.? - 2019-1?-??
+
+
# Changes in version 2.9.0 - 2019-11-01
* Medium changes
diff --git a/build.xml b/build.xml
index ff5db58..cbf4c8d 100644
--- a/build.xml
+++ b/build.xml
@@ -7,7 +7,7 @@
<project default="usage" name="metrics-lib" basedir="."
xmlns:ivy="antlib:org.apache.ivy.ant">
- <property name="release.version" value="2.9.0" />
+ <property name="release.version" value="2.9.0-dev" />
<property name="javadoc-title" value="Tor Metrics Library API Documentation"/>
<property name="javadoc-excludes" value="**/impl/** **/index/** **/internal/** **/log/**" />
<property name="implementation-title" value="Tor Metrics Library" />
1
0

09 Nov '19
commit fba9137499ef6255f040d134f488403cae214793
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 9 12:20:36 2019 +0100
Ignore unknown fields in index.json.
This is required for processing index.json files produced by CollecTor
1.13.0 or higher. We don't need those newly added fields or any other
fields added in the future. But we must not fail when fields are
added.
---
CHANGELOG.md | 5 ++++-
src/main/java/org/torproject/descriptor/index/IndexNode.java | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99048e8..2fd361d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,7 @@
-# Changes in version 2.?.? - 2019-1?-??
+# Changes in version 2.9.1 - 2019-11-09
+
+ * Minor changes
+ - Do not fail when processing index.json files with unknown fields.
# Changes in version 2.9.0 - 2019-11-01
diff --git a/src/main/java/org/torproject/descriptor/index/IndexNode.java b/src/main/java/org/torproject/descriptor/index/IndexNode.java
index b6855e3..9136b65 100644
--- a/src/main/java/org/torproject/descriptor/index/IndexNode.java
+++ b/src/main/java/org/torproject/descriptor/index/IndexNode.java
@@ -10,6 +10,7 @@ import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
@@ -54,7 +55,8 @@ public class IndexNode {
.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE)
.setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
- .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
+ .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY)
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
/** The created date-time is exposed in JSON as 'index_created' field. */
@JsonProperty("index_created")
1
0
commit 6c8470a0b79a5262162898087a5aa126f366f51e
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 9 12:25:11 2019 +0100
Prepare for 2.9.1 release.
---
build.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/build.xml b/build.xml
index cbf4c8d..0f8ac3c 100644
--- a/build.xml
+++ b/build.xml
@@ -7,7 +7,7 @@
<project default="usage" name="metrics-lib" basedir="."
xmlns:ivy="antlib:org.apache.ivy.ant">
- <property name="release.version" value="2.9.0-dev" />
+ <property name="release.version" value="2.9.1" />
<property name="javadoc-title" value="Tor Metrics Library API Documentation"/>
<property name="javadoc-excludes" value="**/impl/** **/index/** **/internal/** **/log/**" />
<property name="implementation-title" value="Tor Metrics Library" />
1
0

09 Nov '19
commit fba9137499ef6255f040d134f488403cae214793
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 9 12:20:36 2019 +0100
Ignore unknown fields in index.json.
This is required for processing index.json files produced by CollecTor
1.13.0 or higher. We don't need those newly added fields or any other
fields added in the future. But we must not fail when fields are
added.
---
CHANGELOG.md | 5 ++++-
src/main/java/org/torproject/descriptor/index/IndexNode.java | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99048e8..2fd361d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,7 @@
-# Changes in version 2.?.? - 2019-1?-??
+# Changes in version 2.9.1 - 2019-11-09
+
+ * Minor changes
+ - Do not fail when processing index.json files with unknown fields.
# Changes in version 2.9.0 - 2019-11-01
diff --git a/src/main/java/org/torproject/descriptor/index/IndexNode.java b/src/main/java/org/torproject/descriptor/index/IndexNode.java
index b6855e3..9136b65 100644
--- a/src/main/java/org/torproject/descriptor/index/IndexNode.java
+++ b/src/main/java/org/torproject/descriptor/index/IndexNode.java
@@ -10,6 +10,7 @@ import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
@@ -54,7 +55,8 @@ public class IndexNode {
.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE)
.setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
- .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
+ .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY)
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
/** The created date-time is exposed in JSON as 'index_created' field. */
@JsonProperty("index_created")
1
0
commit 6c8470a0b79a5262162898087a5aa126f366f51e
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 9 12:25:11 2019 +0100
Prepare for 2.9.1 release.
---
build.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/build.xml b/build.xml
index cbf4c8d..0f8ac3c 100644
--- a/build.xml
+++ b/build.xml
@@ -7,7 +7,7 @@
<project default="usage" name="metrics-lib" basedir="."
xmlns:ivy="antlib:org.apache.ivy.ant">
- <property name="release.version" value="2.9.0-dev" />
+ <property name="release.version" value="2.9.1" />
<property name="javadoc-title" value="Tor Metrics Library API Documentation"/>
<property name="javadoc-excludes" value="**/impl/** **/index/** **/internal/** **/log/**" />
<property name="implementation-title" value="Tor Metrics Library" />
1
0

[translation/tpo-web] https://gitweb.torproject.org/translation.git/commit/?h=tpo-web
by translation@torproject.org 09 Nov '19
by translation@torproject.org 09 Nov '19
09 Nov '19
commit e20ca5e1153f3bbcb1efa5401f91ae7fcbe4cea8
Author: Translation commit bot <translation(a)torproject.org>
Date: Sat Nov 9 11:22:34 2019 +0000
https://gitweb.torproject.org/translation.git/commit/?h=tpo-web
---
contents+nl.po | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/contents+nl.po b/contents+nl.po
index df2264f6e..400068308 100644
--- a/contents+nl.po
+++ b/contents+nl.po
@@ -8,8 +8,8 @@
# erinm, 2019
# Bert Massop <bert.massop(a)gmail.com>, 2019
# IDRASSI Mounir <mounir.idrassi(a)idrix.fr>, 2019
-# Tonnes <tonnes.mb(a)gmail.com>, 2019
# Meteor 0id, 2019
+# Tonnes <tonnes.mb(a)gmail.com>, 2019
#
msgid ""
msgstr ""
@@ -17,7 +17,7 @@ msgstr ""
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2019-11-06 15:48+CET\n"
"PO-Revision-Date: 2019-03-09 10:41+0000\n"
-"Last-Translator: Meteor 0id, 2019\n"
+"Last-Translator: Tonnes <tonnes.mb(a)gmail.com>, 2019\n"
"Language-Team: Dutch (https://www.transifex.com/otf/teams/1519/nl/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
@@ -227,9 +227,9 @@ msgid ""
"add more. Want to help us translate? [See "
"here](https://community.torproject.org/localization/)"
msgstr ""
-"We willen dat iedereen van Tor Browser kan genieten in zijn eigen taal. Tor "
-"Browser is nu beschikbaar in 32 verschillende talen en we werken er aan om "
-"daar meer aan toe te voegen. Wil je ons helpen vertalen? [Kijk "
+"We willen dat iedereen in de eigen taal van Tor Browser kan genieten. Tor "
+"Browser is nu beschikbaar in 32 verschillende talen, en we werken eraan om "
+"daar meer aan toe te voegen. Wilt u ons helpen vertalen? [Kijk "
"hier](https://community.torproject.org/localization/)"
#: https//www.torproject.org/download/tor/
@@ -312,9 +312,9 @@ msgid ""
" the most popular and widely used free, open source privacy technologies: "
"Tor Browser and the Tor network."
msgstr ""
-"Het Tor Project en de bijbehorende gemeenschap ontwikkelen en verspreiden de"
-" meest populaire en wijdverspreid gebruikte vrije openbron "
-"privacytechnologieën: tor Browser en het Tor-netwerk."
+"Het Tor Project en de bijbehorende gemeenschap ontwikkelen en verspreiden "
+"enkele van de meest populaire en veelgebruikte vrije en open source "
+"privacytechnologieën: Tor Browser en het Tor-netwerk."
#: https//www.torproject.org/about/cy-pres/
#: (content/about/cy-pres/contents+en.lrpage.body)
1
0
commit de8ab5f1e9ca1b0c995be6b858aad7ae3aa6f5cd
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Sat Nov 9 11:35:02 2019 +0100
Bump version to 1.13.0-dev.
---
CHANGELOG.md | 3 +++
build.xml | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ac0b3c..2edc44d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+# Changes in version 1.1?.? - 2019-1?-??
+
+
# Changes in version 1.13.0 - 2019-11-09
* Medium changes
diff --git a/build.xml b/build.xml
index c1c8699..9c72447 100644
--- a/build.xml
+++ b/build.xml
@@ -9,7 +9,7 @@
<property name="javadoc-title" value="CollecTor API Documentation"/>
<property name="implementation-title" value="CollecTor" />
- <property name="release.version" value="1.13.0" />
+ <property name="release.version" value="1.13.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.9.0" />
1
0
commit 0e57dae0bfd6f3f7e0c5b5c78773818343ba9550
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Oct 18 11:11:38 2019 +0200
Bump version to 1.12.0-dev.
---
CHANGELOG.md | 3 +++
build.xml | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f30ea3e..bb328ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+# Changes in version 1.??.? - 2019-??-??
+
+
# Changes in version 1.12.0 - 2019-10-18
* Medium changes
diff --git a/build.xml b/build.xml
index c78f5c6..8276e63 100644
--- a/build.xml
+++ b/build.xml
@@ -9,7 +9,7 @@
<property name="javadoc-title" value="CollecTor API Documentation"/>
<property name="implementation-title" value="CollecTor" />
- <property name="release.version" value="1.12.0" />
+ <property name="release.version" value="1.12.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.8.0" />
1
0

09 Nov '19
commit 31034e8781c76df218426db030cac964566e3d34
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Fri Nov 1 17:40:11 2019 +0100
Tweak BridgeDB metrics file names.
We were using the same path for BridgeDB metrics in out/ and recent/,
and file names didn't contain the "-bridgedb-metrics" suffix that we
intended to add.
We're now using paths generated by BridgedbMetricsPersistence.
Also update create-tarballs.sh to create BridgeDB metrics tarballs.
Still part of #19332.
---
.../collector/bridgedb/BridgedbMetricsProcessor.java | 20 +++++++-------------
src/main/resources/create-tarballs.sh | 7 +++++++
2 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
index 7ae4502..9bb4f36 100644
--- a/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
+++ b/src/main/java/org/torproject/metrics/collector/bridgedb/BridgedbMetricsProcessor.java
@@ -11,6 +11,7 @@ import org.torproject.metrics.collector.conf.Configuration;
import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import org.torproject.metrics.collector.persist.BridgedbMetricsPersistence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -21,7 +22,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
-import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Stack;
@@ -50,12 +50,6 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
private String recentPathName;
/**
- * File name format.
- */
- private DateTimeFormatter filenameFormat = DateTimeFormatter.ofPattern(
- "uuuu/MM/dd/uuuu-MM-dd-HH-mm-ss");
-
- /**
* Initialize this class with the given configuration.
*/
public BridgedbMetricsProcessor(Configuration config) {
@@ -101,10 +95,12 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
.readDescriptors(this.inputDirectory)) {
if (descriptor instanceof BridgedbMetrics) {
BridgedbMetrics bridgedbMetrics = (BridgedbMetrics) descriptor;
+ BridgedbMetricsPersistence persistence
+ = new BridgedbMetricsPersistence(bridgedbMetrics);
Path tarballPath = Paths.get(this.outputPathName,
- bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+ persistence.getStoragePath());
Path rsyncPath = Paths.get(this.recentPathName,
- bridgedbMetrics.bridgedbMetricsEnd().format(this.filenameFormat));
+ persistence.getRecentPath());
this.writeDescriptor(bridgedbMetrics.getRawDescriptorBytes(),
tarballPath, rsyncPath);
} else if (descriptor instanceof UnparseableDescriptor) {
@@ -127,10 +123,8 @@ public class BridgedbMetricsProcessor extends CollecTorMain {
* storing them in instance attributes.
*/
private void initializeConfiguration() throws ConfigurationException {
- this.outputPathName = Paths.get(config.getPath(Key.OutputPath).toString(),
- "bridgedb-metrics").toString();
- this.recentPathName = Paths.get(config.getPath(Key.RecentPath).toString(),
- "bridgedb-metrics").toString();
+ this.outputPathName = config.getPath(Key.OutputPath).toString();
+ this.recentPathName = config.getPath(Key.RecentPath).toString();
this.inputDirectory =
config.getPath(Key.BridgedbMetricsLocalOrigins).toFile();
}
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 695bb24..3927960 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -63,6 +63,8 @@ TARBALLS=(
snowflakes-$YEARTWO-$MONTHTWO
bridge-pool-assignments-$YEARONE-$MONTHONE
bridge-pool-assignments-$YEARTWO-$MONTHTWO
+ bridgedb-metrics-$YEARONE-$MONTHONE
+ bridgedb-metrics-$YEARTWO-$MONTHTWO
)
TARBALLS=($(printf "%s\n" "${TARBALLS[@]}" | uniq))
@@ -94,6 +96,8 @@ DIRECTORIES=(
$OUTDIR/snowflakes/$YEARTWO/$MONTHTWO/
$OUTDIR/bridge-pool-assignments/$YEARONE/$MONTHONE/
$OUTDIR/bridge-pool-assignments/$YEARTWO/$MONTHTWO/
+ $OUTDIR/bridgedb-metrics/$YEARONE/$MONTHONE/
+ $OUTDIR/bridgedb-metrics/$YEARTWO/$MONTHTWO/
)
DIRECTORIES=($(printf "%s\n" "${DIRECTORIES[@]}" | uniq))
@@ -183,4 +187,7 @@ ln -f -s -t $ARCHIVEDIR/snowflakes/ $TARBALLTARGETDIR/snowflakes-20??-??.tar.xz
mkdir -p $ARCHIVEDIR/bridge-pool-assignments/
ln -f -s -t $ARCHIVEDIR/bridge-pool-assignments/ $TARBALLTARGETDIR/bridge-pool-assignments-20??-??.tar.xz
+mkdir -p $ARCHIVEDIR/bridgedb-metrics/
+ln -f -s -t $ARCHIVEDIR/bridgedb-metrics/ $TARBALLTARGETDIR/bridgedb-metrics-20??-??.tar.xz
+
echo `date` "Finished."
1
0

[collector/release] Extend index.json by additional file meta data.
by karsten@torproject.org 09 Nov '19
by karsten@torproject.org 09 Nov '19
09 Nov '19
commit 500b7c5ad3d94a0f0f8b8c7fdb110813895c25f0
Author: Karsten Loesing <karsten.loesing(a)gmx.net>
Date: Wed Oct 30 21:56:41 2019 +0100
Extend index.json by additional file meta data.
Implements #31204.
---
CHANGELOG.md | 13 +
build.xml | 2 +-
src/build | 2 +-
.../org/torproject/metrics/collector/conf/Key.java | 5 +-
.../metrics/collector/indexer/CreateIndexJson.java | 678 +++++++++++++++++----
.../metrics/collector/indexer/DirectoryNode.java | 36 ++
.../metrics/collector/indexer/FileNode.java | 125 ++++
.../metrics/collector/indexer/IndexNode.java | 30 +
.../metrics/collector/indexer/IndexerTask.java | 225 +++++++
src/main/resources/collector.properties | 14 +-
src/main/resources/create-tarballs.sh | 2 +-
.../metrics/collector/conf/ConfigurationTest.java | 2 +-
.../collector/indexer/CreateIndexJsonTest.java | 522 ++++++++++++++++
.../metrics/collector/indexer/IndexerTaskTest.java | 226 +++++++
14 files changed, 1756 insertions(+), 126 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb328ad..38754c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
# Changes in version 1.??.? - 2019-??-??
+ * Medium changes
+ - Extend index.json by including descriptor types, first and last
+ publication timestamp, and SHA-256 file digest. Requires making
+ configuration changes in collector.properties:
+ 1) IndexedPath is a new directory with subdirectories for
+ archived and recent descriptors,
+ 2) ArchivePath and IndexPath are hard-wired to be subdirectories
+ of IndexedPath,
+ 3) RecentPath must be set to be a subdirectory of IndexedPath,
+ 4) ContribPath has disappeared, and
+ 5) HtdocsPath is a new directory with files served by the web
+ server.
+
# Changes in version 1.12.0 - 2019-10-18
diff --git a/build.xml b/build.xml
index 8276e63..019461d 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
<property name="release.version" value="1.12.0-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
- <property name="metricslibversion" value="2.8.0" />
+ <property name="metricslibversion" value="2.9.0" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >
diff --git a/src/build b/src/build
index d82fff9..eb16cb3 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit d82fff984634fe006ac7b0b102e7f48a52ca20d9
+Subproject commit eb16cb359db41722e6089bafb1e26808df4338df
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index 390feed..d59438b 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -18,13 +18,12 @@ public enum Key {
RunOnce(Boolean.class),
ExitlistUrl(URL.class),
InstanceBaseUrl(String.class),
- ArchivePath(Path.class),
- ContribPath(Path.class),
+ IndexedPath(Path.class),
RecentPath(Path.class),
OutputPath(Path.class),
- IndexPath(Path.class),
StatsPath(Path.class),
SyncPath(Path.class),
+ HtdocsPath(Path.class),
RelaySources(SourceType[].class),
BridgeSources(SourceType[].class),
BridgePoolAssignmentsSources(SourceType[].class),
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
index a40798e..15aa31d 100644
--- a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
+++ b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
@@ -1,73 +1,190 @@
-/* Copyright 2015--2018 The Tor Project
+/* Copyright 2015--2019 The Tor Project
* See LICENSE for licensing information */
package org.torproject.metrics.collector.indexer;
-import org.torproject.descriptor.index.DirectoryNode;
-import org.torproject.descriptor.index.FileNode;
-import org.torproject.descriptor.index.IndexNode;
-import org.torproject.descriptor.internal.FileType;
import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
+import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Locale;
+import java.io.OutputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.TemporalAmount;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
import java.util.Properties;
+import java.util.Set;
+import java.util.SortedMap;
import java.util.SortedSet;
-import java.util.TimeZone;
+import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
-/* Create a fresh index.json containing all directories and files in the
- * archive/ and recent/ directories.
+/**
+ * Create an index file called {@code index.json} containing metadata of all
+ * files in the {@code indexed/} directory and update the {@code htdocs/}
+ * directory to contain all files to be served via the web server.
*
- * Note that if this ever takes longer than a few seconds, we'll have to
- * cache index parts of directories or files that haven't changed.
- * Example: if we parse include cryptographic hashes or @type information,
- * we'll likely have to do that. */
+ * <p>File metadata includes:</p>
+ * <ul>
+ * <li>Path for downloading this file from the web server.</li>
+ * <li>Size of the file in bytes.</li>
+ * <li>Timestamp when the file was last modified.</li>
+ * <li>Descriptor types as found in {@code @type} annotations of contained
+ * descriptors.</li>
+ * <li>Earliest and latest publication timestamp of contained
+ * descriptors.</li>
+ * <li>SHA-256 digest of the file.</li>
+ * </ul>
+ *
+ * <p>This class maintains its own working directory {@code htdocs/} with
+ * subdirectories like {@code htdocs/archive/} or {@code htdocs/recent/} and
+ * another subdirectory {@code htdocs/index/}. The first two subdirectories
+ * contain (hard) links created and deleted by this class, the third
+ * subdirectory contains the {@code index.json} file in uncompressed and
+ * compressed forms.</p>
+ *
+ * <p>The main reason for having the {@code htdocs/} directory is that indexing
+ * a large descriptor file can be time consuming. New or updated files in
+ * {@code indexed/} first need to be indexed before their metadata can be
+ * included in {@code index.json}. Another reason is that files removed from
+ * {@code indexed/} shall still be available for download for a limited period
+ * of time after disappearing from {@code index.json}.</p>
+ *
+ * <p>The reason for creating (hard) links in {@code htdocs/}, rather than
+ * copies, is that links do not consume additional disk space. All directories
+ * must be located on the same file system. Storing symbolic links in
+ * {@code htdocs/} would not have worked with replaced or deleted files in the
+ * original directories. Symbolic links in original directories are allowed as
+ * long as they target to the same file system.</p>
+ *
+ * <p>This class does not write, modify, or delete any files in the
+ * {@code indexed/} directory. At the same time it does not expect any other
+ * classes to write, modify, or delete contents in the {@code htdocs/}
+ * directory.</p>
+ */
public class CreateIndexJson extends CollecTorMain {
+ /**
+ * Class logger.
+ */
private static final Logger logger =
LoggerFactory.getLogger(CreateIndexJson.class);
- private static File indexJsonFile;
+ /**
+ * Delay between finding out that a file has been deleted and deleting its
+ * link.
+ */
+ private static final TemporalAmount deletionDelay = Duration.ofHours(2L);
- private static String basePath;
+ /**
+ * Index tarballs with no more than this many threads at a time.
+ */
+ private static final int tarballIndexerThreads = 3;
- private static File[] indexedDirectories;
+ /**
+ * Index flat files with no more than this many threads at a time.
+ */
+ private static final int flatFileIndexerThreads = 3;
- private static final String dateTimePattern = "yyyy-MM-dd HH:mm";
+ /**
+ * Parser and formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
- private static final Locale dateTimeLocale = Locale.US;
+ /**
+ * Object mapper for parsing and formatting {@code index.json} files.
+ */
+ private static ObjectMapper objectMapper = new ObjectMapper()
+ .setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE)
+ .setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
+ .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
+ .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
- private static final TimeZone dateTimezone = TimeZone.getTimeZone("UTC");
+ /**
+ * Path to the {@code indexed/} directory.
+ */
+ private Path indexedPath;
- private static String buildRevision = null;
+ /**
+ * Path to the {@code htdocs/} directory.
+ */
+ private Path htdocsPath;
- /** Creates indexes of directories containing archived and recent
- * descriptors and write index files to disk. */
- public CreateIndexJson(Configuration conf) {
- super(conf);
- Properties buildProperties = new Properties();
- try (InputStream is = getClass().getClassLoader()
- .getResourceAsStream("collector.buildrevision.properties")) {
- buildProperties.load(is);
- buildRevision = buildProperties.getProperty("collector.build.revision",
- null);
- } catch (Exception ex) {
- // This doesn't hamper the index creation: only log a warning.
- logger.warn("No build revision available.", ex);
- buildRevision = null;
- }
+ /**
+ * Path to the uncompressed {@code index.json} file.
+ */
+ private Path indexJsonPath;
+
+ /**
+ * Base URL of all resources included in {@code index.json}.
+ */
+ private String basePathString;
+
+ /**
+ * Git revision of this software to be included in {@code index.json} or
+ * omitted if unknown.
+ */
+ private String buildRevisionString;
+
+ /**
+ * Index containing metadata of files in {@code indexed/}, including new or
+ * updated files that still need to be indexed and deleted files that are
+ * still linked in {@code htdocs/}.
+ *
+ * <p>This map is initialized by reading the last known {@code index.json}
+ * file and remains available in memory between executions until shutdown.</p>
+ */
+ private SortedMap<Path, FileNode> index;
+
+ /**
+ * Executor for indexing tarballs.
+ */
+ private ExecutorService tarballsExecutor
+ = Executors.newFixedThreadPool(tarballIndexerThreads);
+
+ /**
+ * Executor for indexing flat files (non-tarballs).
+ */
+ private ExecutorService flatFilesExecutor
+ = Executors.newFixedThreadPool(flatFileIndexerThreads);
+
+ /**
+ * Initialize this class with the given {@code configuration}.
+ *
+ * @param configuration Configuration values.
+ */
+ public CreateIndexJson(Configuration configuration) {
+ super(configuration);
}
@Override
@@ -80,96 +197,433 @@ public class CreateIndexJson extends CollecTorMain {
return "IndexJson";
}
+
+ /**
+ * Run the indexer by (1) adding new files from {@code indexed/} to the index,
+ * (2) adding old files from {@code htdocs/} for which only links exist to the
+ * index, (3) scheduling new tasks and updating links in {@code htdocs/} to
+ * reflect what's contained in the in-memory index, and (4) writing new
+ * uncompressed and compressed {@code index.json} files to disk.
+ */
@Override
- protected void startProcessing() {
+ public void startProcessing() {
+ this.startProcessing(Instant.now());
+ }
+
+ /**
+ * Helper method to {@link #startProcessing()} that accepts the current
+ * execution time and which is used by tests.
+ *
+ * @param now Current execution time.
+ */
+ protected void startProcessing(Instant now) {
+ try {
+ this.basePathString = this.config.getProperty(Key.InstanceBaseUrl.name());
+ this.indexedPath = config.getPath(Key.IndexedPath);
+ this.htdocsPath = config.getPath(Key.HtdocsPath);
+ } catch (ConfigurationException e) {
+ logger.error("Unable to read one or more configuration values. Not "
+ + "indexing in this execution.", e);
+ }
+ this.buildRevisionString = this.obtainBuildRevision();
+ this.indexJsonPath = this.htdocsPath
+ .resolve(Paths.get("index", "index.json"));
try {
- indexJsonFile = new File(config.getPath(Key.IndexPath).toFile(),
- "index.json");
- basePath = config.getProperty(Key.InstanceBaseUrl.name());
- indexedDirectories = new File[] {
- config.getPath(Key.ArchivePath).toFile(),
- config.getPath(Key.ContribPath).toFile(),
- config.getPath(Key.RecentPath).toFile() };
- writeIndex(indexDirectories());
- } catch (Exception e) {
- logger.error("Cannot run index creation: {}", e.getMessage(), e);
- throw new RuntimeException(e);
+ this.prepareHtdocsDirectory();
+ if (null == this.index) {
+ logger.info("Reading index.json file from last execution.");
+ this.index = this.readIndex();
+ }
+ logger.info("Going through indexed/ and adding new files to the index.");
+ this.addNewFilesToIndex(this.indexedPath);
+ logger.info("Going through htdocs/ and adding links to deleted files to "
+ + "the index.");
+ this.addOldLinksToIndex();
+ logger.info("Going through the index, scheduling tasks, and updating "
+ + "links.");
+ this.scheduleTasksAndUpdateLinks(now);
+ logger.info("Writing uncompressed and compressed index.json files to "
+ + "disk.");
+ this.writeIndex(this.index, now);
+ logger.info("Pausing until next index update run.");
+ } catch (IOException e) {
+ logger.error("I/O error while updating index.json files. Trying again in "
+ + "the next execution.", e);
}
}
- private static DateFormat dateTimeFormat;
-
- static {
- dateTimeFormat = new SimpleDateFormat(dateTimePattern,
- dateTimeLocale);
- dateTimeFormat.setLenient(false);
- dateTimeFormat.setTimeZone(dateTimezone);
+ /**
+ * Prepare the {@code htdocs/} directory by checking whether all required
+ * subdirectories exist and by creating them if not.
+ *
+ * @throws IOException Thrown if one or more directories could not be created.
+ */
+ private void prepareHtdocsDirectory() throws IOException {
+ for (Path requiredPath : new Path[] {
+ this.htdocsPath,
+ this.indexJsonPath.getParent() }) {
+ if (!Files.exists(requiredPath)) {
+ Files.createDirectories(requiredPath);
+ }
+ }
}
- private IndexNode indexDirectories() {
- SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
- logger.trace("indexing: {} {}", indexedDirectories[0],
- indexedDirectories[1]);
- for (File directory : indexedDirectories) {
- if (directory.exists() && directory.isDirectory()) {
- DirectoryNode dn = indexDirectory(directory);
- if (null != dn) {
- directoryNodes.add(dn);
+ /**
+ * Read the {@code index.json} file written by the previous execution and
+ * populate our index with its contents, or leave the index empty if this is
+ * the first execution and that file does not yet exist.
+ *
+ * @return Index read from disk, or empty map if {@code index.json} does not
+ * exist.
+ */
+ private SortedMap<Path, FileNode> readIndex() throws IOException {
+ SortedMap<Path, FileNode> index = new TreeMap<>();
+ if (Files.exists(this.indexJsonPath)) {
+ IndexNode indexNode = objectMapper.readValue(
+ Files.newInputStream(this.indexJsonPath), IndexNode.class);
+ SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+ directoryNodes.put(Paths.get(""), indexNode);
+ while (!directoryNodes.isEmpty()) {
+ Path directoryPath = directoryNodes.firstKey();
+ DirectoryNode directoryNode = directoryNodes.remove(directoryPath);
+ if (null != directoryNode.files) {
+ for (FileNode fileNode : directoryNode.files) {
+ Path filePath = this.indexedPath.resolve(directoryPath)
+ .resolve(Paths.get(fileNode.path));
+ index.put(filePath, fileNode);
+ }
}
+ if (null != directoryNode.directories) {
+ boolean isRootDirectory = directoryNode == indexNode;
+ for (DirectoryNode subdirectoryNode : directoryNode.directories) {
+ Path subdirectoryPath = isRootDirectory
+ ? Paths.get(subdirectoryNode.path)
+ : directoryPath.resolve(Paths.get(subdirectoryNode.path));
+ directoryNodes.put(subdirectoryPath, subdirectoryNode);
+ }
+ }
+ }
+ }
+ return index;
+ }
+
+ /**
+ * Obtain and return the build revision string that was generated during the
+ * build process with {@code git rev-parse --short HEAD} and written to
+ * {@code collector.buildrevision.properties}, or return {@code null} if the
+ * build revision string cannot be obtained.
+ *
+ * @return Build revision string.
+ */
+ protected String obtainBuildRevision() {
+ String buildRevision = null;
+ Properties buildProperties = new Properties();
+ String propertiesFile = "collector.buildrevision.properties";
+ try (InputStream is = getClass().getClassLoader()
+ .getResourceAsStream(propertiesFile)) {
+ if (null == is) {
+ logger.warn("File {}, which is supposed to contain the build revision "
+ + "string, does not exist in our class path. Writing index.json "
+ + "without the \"build_revision\" field.", propertiesFile);
+ return null;
}
+ buildProperties.load(is);
+ buildRevision = buildProperties.getProperty(
+ "collector.build.revision", null);
+ } catch (IOException e) {
+ logger.warn("I/O error while trying to obtain build revision string. "
+ + "Writing index.json without the \"build_revision\" field.");
}
- return new IndexNode(dateTimeFormat.format(
- System.currentTimeMillis()), buildRevision, basePath, null,
- directoryNodes);
- }
-
- private DirectoryNode indexDirectory(File directory) {
- SortedSet<FileNode> fileNodes = new TreeSet<>();
- SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
- logger.trace("indexing: {}", directory);
- File[] fileList = directory.listFiles();
- if (null == fileList) {
- logger.warn("Indexing dubious directory: {}", directory);
- return null;
+ return buildRevision;
+ }
+
+ /**
+ * Walk the given file tree and add all previously unknown files to the
+ * in-memory index (except for files starting with "." or ending with ".tmp").
+ *
+ * @param path File tree to walk.
+ */
+ private void addNewFilesToIndex(Path path) throws IOException {
+ if (!Files.exists(path)) {
+ return;
}
- for (File fileOrDirectory : fileList) {
- if (fileOrDirectory.getName().startsWith(".")
- || fileOrDirectory.getName().endsWith(".tmp")) {
- continue;
+ Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path filePath,
+ BasicFileAttributes basicFileAttributes) {
+ if (!filePath.toString().startsWith(".")
+ && !filePath.toString().endsWith(".tmp")) {
+ index.putIfAbsent(filePath, new FileNode());
+ }
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+
+ /**
+ * Walk the file tree of the {@code htdocs/} directory and add all previously
+ * unknown links to the in-memory index to ensure their deletion when they're
+ * known to be deleted from their original directories.
+ */
+ private void addOldLinksToIndex() throws IOException {
+ Path htdocsIndexPath = this.indexJsonPath.getParent();
+ Files.walkFileTree(this.htdocsPath, new SimpleFileVisitor<Path>() {
+ @Override
+ public FileVisitResult visitFile(Path linkPath,
+ BasicFileAttributes basicFileAttributes) {
+ if (!linkPath.startsWith(htdocsIndexPath)) {
+ Path filePath = indexedPath.resolve(htdocsPath.relativize(linkPath));
+ index.putIfAbsent(filePath, new FileNode());
+ }
+ return FileVisitResult.CONTINUE;
}
- if (fileOrDirectory.isFile()) {
- fileNodes.add(indexFile(fileOrDirectory));
+ });
+ }
+
+ /**
+ * Go through the index, schedule tasks to index files, and update links.
+ *
+ * @throws IOException Thrown if an I/O exception occurs while creating or
+ * deleting links.
+ */
+ private void scheduleTasksAndUpdateLinks(Instant now) throws IOException {
+ int queuedIndexerTasks = 0;
+ Map<Path, FileNode> indexingResults = new HashMap<>();
+ SortedSet<Path> filesToIndex = new TreeSet<>();
+ Map<Path, Path> linksToCreate = new HashMap<>();
+ Set<FileNode> linksToMarkForDeletion = new HashSet<>();
+ Map<Path, Path> linksToDelete = new HashMap<>();
+ for (Map.Entry<Path, FileNode> e : this.index.entrySet()) {
+ Path filePath = e.getKey();
+ Path linkPath = this.htdocsPath
+ .resolve(this.indexedPath.relativize(filePath));
+ FileNode fileNode = e.getValue();
+ if (Files.exists(filePath)) {
+ if (null != fileNode.indexerResult) {
+ if (!fileNode.indexerResult.isDone()) {
+ /* This file is currently being indexed, so we should just skip it
+ * and wait until the indexer is done. */
+ queuedIndexerTasks++;
+ continue;
+ }
+ try {
+ /* Indexing is done, obtain index results. */
+ fileNode = fileNode.indexerResult.get();
+ indexingResults.put(filePath, fileNode);
+ } catch (InterruptedException | ExecutionException ex) {
+ /* Clear index result, so that we can give this file another try
+ * next time. */
+ fileNode.indexerResult = null;
+ }
+ }
+ String originalLastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(filePath).toInstant());
+ if (!originalLastModified.equals(fileNode.lastModified)) {
+ /* We either don't have any index results for this file, or we only
+ * have index results for an older version of this file. */
+ filesToIndex.add(filePath);
+ } else if (!Files.exists(linkPath)) {
+ /* We do have index results, but we don't have a link yet, so we're
+ * going to create a link. */
+ linksToCreate.put(linkPath, filePath);
+ } else {
+ String linkLastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(linkPath).toInstant());
+ if (!linkLastModified.equals(fileNode.lastModified)) {
+ /* We do have index results plus a link to an older version of this
+ * file, so we'll have to update the link. */
+ linksToCreate.put(linkPath, filePath);
+ }
+ }
} else {
- DirectoryNode dn = indexDirectory(fileOrDirectory);
- if (null != dn) {
- directoryNodes.add(dn);
+ if (null == fileNode.markedForDeletion) {
+ /* We're noticing just now that the file doesn't exist anymore, so
+ * we're going to mark it for deletion but not deleting the link right
+ * away. */
+ linksToMarkForDeletion.add(fileNode);
+ } else if (fileNode.markedForDeletion
+ .isBefore(now.minus(deletionDelay))) {
+ /* The file doesn't exist anymore, and we found out long enough ago,
+ * so we can now go ahead and delete the link. */
+ linksToDelete.put(linkPath, filePath);
+ }
+ }
+ }
+ if (queuedIndexerTasks > 0) {
+ logger.info("Counting {} file(s) being currently indexed or in the "
+ + "queue.", queuedIndexerTasks);
+ }
+ this.updateIndex(indexingResults);
+ this.scheduleTasks(filesToIndex);
+ this.createLinks(linksToCreate);
+ this.markForDeletion(linksToMarkForDeletion, now);
+ this.deleteLinks(linksToDelete);
+ }
+
+ /**
+ * Update index with index results.
+ */
+ private void updateIndex(Map<Path, FileNode> indexResults) {
+ if (!indexResults.isEmpty()) {
+ logger.info("Updating {} index entries with index results.",
+ indexResults.size());
+ this.index.putAll(indexResults);
+ }
+ }
+
+ /**
+ * Schedule indexing the given set of descriptor files, using different queues
+ * for tarballs and flat files.
+ *
+ * @param filesToIndex Paths to descriptor files to index.
+ */
+ private void scheduleTasks(SortedSet<Path> filesToIndex) {
+ if (!filesToIndex.isEmpty()) {
+ logger.info("Scheduling {} indexer task(s).", filesToIndex.size());
+ for (Path fileToIndex : filesToIndex) {
+ IndexerTask indexerTask = this.createIndexerTask(fileToIndex);
+ if (fileToIndex.getFileName().toString().endsWith(".tar.xz")) {
+ this.index.get(fileToIndex).indexerResult
+ = this.tarballsExecutor.submit(indexerTask);
+ } else {
+ this.index.get(fileToIndex).indexerResult
+ = this.flatFilesExecutor.submit(indexerTask);
}
}
}
- return new DirectoryNode(
- directory.getName(), fileNodes.isEmpty() ? null : fileNodes,
- directoryNodes.isEmpty() ? null : directoryNodes);
- }
-
- private FileNode indexFile(File file) {
- return new FileNode(file.getName(), file.length(),
- dateTimeFormat.format(file.lastModified()));
- }
-
- private void writeIndex(IndexNode indexNode) throws Exception {
- indexJsonFile.getParentFile().mkdirs();
- String indexNodeString = IndexNode.makeJsonString(indexNode);
- for (String filename : new String[] {indexJsonFile.toString(),
- indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) {
- FileType type = FileType.valueOf(
- filename.substring(filename.lastIndexOf(".") + 1).toUpperCase());
- try (BufferedWriter bufferedWriter
- = new BufferedWriter(new OutputStreamWriter(type.outputStream(
- new FileOutputStream(filename))))) {
- bufferedWriter.write(indexNodeString);
+ }
+
+ /**
+ * Create an indexer task for indexing the given file.
+ *
+ * <p>The reason why this is a separate method is that it can be overriden by
+ * tests that don't actually want to index files but instead provide their own
+ * index results.</p>
+ *
+ * @param fileToIndex File to index.
+ * @return Indexer task.
+ */
+ protected IndexerTask createIndexerTask(Path fileToIndex) {
+ return new IndexerTask(fileToIndex);
+ }
+
+ /**
+ * Create links in {@code htdocs/}, including all necessary parent
+ * directories.
+ *
+ * @param linksToCreate Map of links to be created with keys being link paths
+ * and values being original file paths.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void createLinks(Map<Path, Path> linksToCreate) throws IOException {
+ if (!linksToCreate.isEmpty()) {
+ logger.info("Creating {} new link(s).", linksToCreate.size());
+ for (Map.Entry<Path, Path> e : linksToCreate.entrySet()) {
+ Path linkPath = e.getKey();
+ Path originalPath = e.getValue();
+ Files.createDirectories(linkPath.getParent());
+ Files.deleteIfExists(linkPath);
+ Files.createLink(linkPath, originalPath);
+ }
+ }
+ }
+
+ /**
+ * Mark the given links for deletion in the in-memory index.
+ *
+ * @param linksToMarkForDeletion Files to be marked for deletion.
+ */
+ private void markForDeletion(Set<FileNode> linksToMarkForDeletion,
+ Instant now) {
+ if (!linksToMarkForDeletion.isEmpty()) {
+ logger.info("Marking {} old link(s) for deletion.",
+ linksToMarkForDeletion.size());
+ for (FileNode fileNode : linksToMarkForDeletion) {
+ fileNode.markedForDeletion = now;
+ }
+ }
+ }
+
+ /**
+ * Delete the given links from {@code htdocs/}.
+ *
+ * @param linksToDelete Map of links to be deleted with keys being link paths
+ * and values being original file paths.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void deleteLinks(Map<Path, Path> linksToDelete) throws IOException {
+ if (!linksToDelete.isEmpty()) {
+ logger.info("Deleting {} old link(s).", linksToDelete.size());
+ for (Map.Entry<Path, Path> e : linksToDelete.entrySet()) {
+ Path linkPath = e.getKey();
+ Path originalPath = e.getValue();
+ Files.deleteIfExists(linkPath);
+ index.remove(originalPath);
}
}
}
+ /**
+ * Write the in-memory index to {@code index.json} and its compressed
+ * variants, but exclude files that have not yet been indexed or that are
+ * marked for deletion.
+ *
+ * @throws IOException Thrown if an I/O error occurs while writing files.
+ */
+ private void writeIndex(SortedMap<Path, FileNode> index,
+ Instant now) throws IOException {
+ IndexNode indexNode = new IndexNode();
+ indexNode.indexCreated = dateTimeFormatter.format(now);
+ indexNode.buildRevision = this.buildRevisionString;
+ indexNode.path = this.basePathString;
+ SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+ for (Map.Entry<Path, FileNode> indexEntry : index.entrySet()) {
+ Path filePath = this.indexedPath.relativize(indexEntry.getKey());
+ FileNode fileNode = indexEntry.getValue();
+ if (null == fileNode.lastModified || null != fileNode.markedForDeletion) {
+ /* Skip unindexed or deleted files. */
+ continue;
+ }
+ Path directoryPath = null;
+ DirectoryNode parentDirectoryNode = indexNode;
+ if (null != filePath.getParent()) {
+ for (Path pathPart : filePath.getParent()) {
+ directoryPath = null == directoryPath ? pathPart
+ : directoryPath.resolve(pathPart);
+ DirectoryNode directoryNode = directoryNodes.get(directoryPath);
+ if (null == directoryNode) {
+ directoryNode = new DirectoryNode();
+ directoryNode.path = pathPart.toString();
+ if (null == parentDirectoryNode.directories) {
+ parentDirectoryNode.directories = new ArrayList<>();
+ }
+ parentDirectoryNode.directories.add(directoryNode);
+ directoryNodes.put(directoryPath, directoryNode);
+ }
+ parentDirectoryNode = directoryNode;
+ }
+ }
+ if (null == parentDirectoryNode.files) {
+ parentDirectoryNode.files = new ArrayList<>();
+ }
+ parentDirectoryNode.files.add(fileNode);
+ }
+ Path htdocsIndexPath = this.indexJsonPath.getParent();
+ try (OutputStream uncompressed
+ = Files.newOutputStream(htdocsIndexPath.resolve(".index.json.tmp"));
+ OutputStream bz2Compressed = new BZip2CompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.bz2")));
+ OutputStream gzCompressed = new GzipCompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.gz")));
+ OutputStream xzCompressed = new XZCompressorOutputStream(
+ Files.newOutputStream(htdocsIndexPath.resolve("index.json.xz")))) {
+ objectMapper.writeValue(uncompressed, indexNode);
+ objectMapper.writeValue(bz2Compressed, indexNode);
+ objectMapper.writeValue(gzCompressed, indexNode);
+ objectMapper.writeValue(xzCompressed, indexNode);
+ }
+ Files.move(htdocsIndexPath.resolve(".index.json.tmp"), this.indexJsonPath,
+ StandardCopyOption.REPLACE_EXISTING);
+ }
}
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
new file mode 100644
index 0000000..a369d08
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
@@ -0,0 +1,36 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.util.List;
+
+/**
+ * Directory node in {@code index.json} which is discarded after reading and
+ * re-created before writing that file.
+ */
+@JsonPropertyOrder({ "path", "files", "directories" })
+class DirectoryNode {
+
+ /**
+ * Relative path of the directory.
+ */
+ @JsonProperty("path")
+ String path;
+
+ /**
+ * List of file objects of files available from this directory.
+ */
+ @JsonProperty("files")
+ List<FileNode> files;
+
+ /**
+ * List of directory objects of directories available from this directory.
+ */
+ @JsonProperty("directories")
+ List<DirectoryNode> directories;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
new file mode 100644
index 0000000..c007196
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
@@ -0,0 +1,125 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.time.Instant;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Future;
+
+/**
+ * File node in {@code index.json}, also used for storing volatile metadata
+ * like whether a descriptor file is currently being indexed or whether its
+ * link in {@code htdocs/} is marked for deletion.
+ */
+@JsonPropertyOrder({ "path", "size", "last_modified", "types",
+ "first_published", "last_published", "sha256" })
+class FileNode {
+
+ /**
+ * Relative path of the file.
+ */
+ @JsonProperty("path")
+ String path;
+
+ /**
+ * Size of the file in bytes.
+ */
+ @JsonProperty("size")
+ Long size;
+
+ /**
+ * Timestamp when the file was last modified using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("last_modified")
+ String lastModified;
+
+ /**
+ * Descriptor types as found in {@code @type} annotations of contained
+ * descriptors.
+ */
+ @JsonProperty("types")
+ SortedSet<String> types;
+
+ /**
+ * Earliest publication timestamp of contained descriptors using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("first_published")
+ String firstPublished;
+
+ /**
+ * Latest publication timestamp of contained descriptors using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("last_published")
+ String lastPublished;
+
+ /**
+ * SHA-256 digest of this file.
+ */
+ @JsonProperty("sha256")
+ String sha256;
+
+ /**
+ * Indexer result that will be available as soon as the indexer has completed
+ * its task.
+ */
+ @JsonIgnore
+ Future<FileNode> indexerResult;
+
+ /**
+ * Timestamp when this file was first not found anymore in {@code indexed/},
+ * used to keep the link in {@code htdocs/} around for another 2 hours before
+ * deleting it, too.
+ *
+ * <p>This field is ignored when writing {@code index.json}, because it's an
+ * internal detail that nobody else cares about. The effect is that links
+ * might be around for longer than 2 hours in case of a restart, which seems
+ * acceptable.</p>
+ */
+ @JsonIgnore
+ Instant markedForDeletion;
+
+ /**
+ * Create and return a {@link FileNode} instance with the given values.
+ *
+ * @param path Relative path of the file.
+ * @param size Size of the file in bytes.
+ * @param lastModified Timestamp when the file was last modified using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ * @param types Descriptor types as found in {@code @type} annotations of
+ * contained descriptors.
+ * @param firstPublished Earliest publication timestamp of contained
+ * descriptors using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC
+ * timezone.
+ * @param lastPublished Latest publication timestamp of contained descriptors
+ * using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ * @param sha256 SHA-256 digest of this file.
+ *
+ * @return {@link FileNode} instance with the given values.
+ */
+ static FileNode of(String path, Long size, String lastModified,
+ Iterable<String> types, String firstPublished, String lastPublished,
+ String sha256) {
+ FileNode fileNode = new FileNode();
+ fileNode.path = path;
+ fileNode.size = size;
+ fileNode.lastModified = lastModified;
+ fileNode.types = new TreeSet<>();
+ for (String type : types) {
+ fileNode.types.add(type);
+ }
+ fileNode.firstPublished = firstPublished;
+ fileNode.lastPublished = lastPublished;
+ fileNode.sha256 = sha256;
+ return fileNode;
+ }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
new file mode 100644
index 0000000..8b7a46b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
@@ -0,0 +1,30 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+/**
+ * Root node in {@code index.json} containing additional information about index
+ * creation time or Git revision used for creating it.
+ */
+@JsonPropertyOrder({ "index_created", "build_revision", "path", "files",
+ "directories" })
+class IndexNode extends DirectoryNode {
+
+ /**
+ * Timestamp when this index was created using pattern
+ * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+ */
+ @JsonProperty("index_created")
+ String indexCreated;
+
+ /**
+ * Git revision of this software.
+ */
+ @JsonProperty("build_revision")
+ String buildRevision;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
new file mode 100644
index 0000000..03c750b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
@@ -0,0 +1,225 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import org.torproject.descriptor.BandwidthFile;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.BridgePoolAssignment;
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirectoryKeyCertificate;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.RelayDirectory;
+import org.torproject.descriptor.RelayNetworkStatus;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+import org.torproject.descriptor.SnowflakeStats;
+import org.torproject.descriptor.TorperfResult;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.descriptor.WebServerAccessLog;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Callable;
+
+/**
+ * Callable task that indexes a given descriptor file.
+ */
+class IndexerTask implements Callable<FileNode> {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger logger
+ = LoggerFactory.getLogger(IndexerTask.class);
+
+ /**
+ * Formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+ /**
+ * Path to the descriptor file to index.
+ */
+ private Path path;
+
+ /**
+ * Index results object, which starts out empty and gets populated as indexing
+ * proceeds.
+ */
+ private FileNode indexResult;
+
+ /**
+ * Create a new instance to parse the given descriptor file, but don't start
+ * parsing just yet.
+ *
+ * @param path Descriptor file to index.
+ */
+ IndexerTask(Path path) {
+ this.path = path;
+ }
+
+ /**
+ * Index the given file and return index results when done.
+ *
+ * @return Index results.
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Override
+ public FileNode call() throws IOException {
+ this.indexResult = new FileNode();
+ this.requestBasicFileAttributes();
+ this.computeFileDigest();
+ this.parseDescriptorFile();
+ return this.indexResult;
+ }
+
+ /**
+ * Request and store basic file attributes like file name, last-modified time,
+ * and size.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void requestBasicFileAttributes() throws IOException {
+ this.indexResult.path = this.path.getFileName().toString();
+ this.indexResult.lastModified = dateTimeFormatter
+ .format(Files.getLastModifiedTime(this.path).toInstant());
+ this.indexResult.size = Files.size(this.path);
+ }
+
+ /**
+ * Compute and store the file's SHA-256 digest.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ private void computeFileDigest() throws IOException {
+ try (InputStream stream = Files.newInputStream(this.path)) {
+ this.indexResult.sha256
+ = Base64.encodeBase64String(DigestUtils.sha256(stream));
+ }
+ }
+
+ /**
+ * Parse the descriptor file to extract contained descriptor types and first
+ * and last published time.
+ */
+ private void parseDescriptorFile() {
+ Long firstPublishedMillis = null;
+ Long lastPublishedMillis = null;
+ this.indexResult.types = new TreeSet<>();
+ SortedSet<String> unknownDescriptorSubclasses = new TreeSet<>();
+ for (Descriptor descriptor : DescriptorSourceFactory
+ .createDescriptorReader().readDescriptors(this.path.toFile())) {
+ if (descriptor instanceof UnparseableDescriptor) {
+ /* Skip unparseable descriptor. */
+ continue;
+ }
+ for (String annotation : descriptor.getAnnotations()) {
+ if (annotation.startsWith("@type ")) {
+ this.indexResult.types.add(annotation.substring(6));
+ }
+ }
+ Long publishedMillis;
+ if (descriptor instanceof BandwidthFile) {
+ BandwidthFile bandwidthFile = (BandwidthFile) descriptor;
+ LocalDateTime fileCreatedOrTimestamp
+ = bandwidthFile.fileCreated().isPresent()
+ ? bandwidthFile.fileCreated().get()
+ : bandwidthFile.timestamp();
+ publishedMillis = fileCreatedOrTimestamp
+ .toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof BridgeNetworkStatus) {
+ publishedMillis = ((BridgeNetworkStatus) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof BridgePoolAssignment) {
+ publishedMillis = ((BridgePoolAssignment) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof BridgedbMetrics) {
+ publishedMillis = ((BridgedbMetrics) descriptor)
+ .bridgedbMetricsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof DirectoryKeyCertificate) {
+ publishedMillis = ((DirectoryKeyCertificate) descriptor)
+ .getDirKeyPublishedMillis();
+ } else if (descriptor instanceof ExitList) {
+ publishedMillis = ((ExitList) descriptor)
+ .getDownloadedMillis();
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ publishedMillis = ((ExtraInfoDescriptor) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof Microdescriptor) {
+ /* Microdescriptors don't contain useful timestamps for this purpose,
+ * but we already knew that, so there's no need to log a warning
+ * further down below. */
+ continue;
+ } else if (descriptor instanceof RelayDirectory) {
+ publishedMillis = ((RelayDirectory) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof RelayNetworkStatus) {
+ publishedMillis = ((RelayNetworkStatus) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+ publishedMillis = ((RelayNetworkStatusConsensus) descriptor)
+ .getValidAfterMillis();
+ } else if (descriptor instanceof RelayNetworkStatusVote) {
+ publishedMillis = ((RelayNetworkStatusVote) descriptor)
+ .getValidAfterMillis();
+ } else if (descriptor instanceof ServerDescriptor) {
+ publishedMillis = ((ServerDescriptor) descriptor)
+ .getPublishedMillis();
+ } else if (descriptor instanceof SnowflakeStats) {
+ publishedMillis = ((SnowflakeStats) descriptor)
+ .snowflakeStatsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+ } else if (descriptor instanceof TorperfResult) {
+ publishedMillis = ((TorperfResult) descriptor)
+ .getStartMillis();
+ } else if (descriptor instanceof WebServerAccessLog) {
+ publishedMillis = ((WebServerAccessLog) descriptor)
+ .getLogDate().atStartOfDay(ZoneOffset.UTC)
+ .toInstant().toEpochMilli();
+ } else {
+ /* Skip published timestamp if descriptor type is unknown or doesn't
+ * contain such a timestamp. */
+ unknownDescriptorSubclasses.add(
+ descriptor.getClass().getSimpleName());
+ continue;
+ }
+ if (null == firstPublishedMillis
+ || publishedMillis < firstPublishedMillis) {
+ firstPublishedMillis = publishedMillis;
+ }
+ if (null == lastPublishedMillis
+ || publishedMillis > lastPublishedMillis) {
+ lastPublishedMillis = publishedMillis;
+ }
+ }
+ if (!unknownDescriptorSubclasses.isEmpty()) {
+ logger.warn("Ran into unknown/unexpected Descriptor subclass(es) in "
+ + "{}: {}. Ignoring for index.json, but maybe worth looking into.",
+ this.path, unknownDescriptorSubclasses);
+ }
+ this.indexResult.firstPublished = null == firstPublishedMillis ? null
+ : dateTimeFormatter.format(Instant.ofEpochMilli(firstPublishedMillis));
+ this.indexResult.lastPublished = null == lastPublishedMillis ? null
+ : dateTimeFormatter.format(Instant.ofEpochMilli(lastPublishedMillis));
+ }
+}
+
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index e7cadf7..65e0f99 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -76,16 +76,11 @@ BridgedbMetricsOffsetMinutes = 340
# The URL of this instance. This will be the base URL
# written to index.json, i.e. please change this to the mirrors url!
InstanceBaseUrl = https://collector.torproject.org
-# The target location for index.json and its compressed
-# versions index.json.gz, index.json.bz2, and index.json.xz
-IndexPath = index
# The top-level directory for archived descriptors.
-ArchivePath = archive
-# The top-level directory for third party data.
-ContribPath = contrib
+IndexedPath = indexed
# The top-level directory for the recent descriptors that were
# published in the last 72 hours.
-RecentPath = recent
+RecentPath = indexed/recent
# The top-level directory for the retrieved descriptors that will
# be archived.
OutputPath = out
@@ -93,6 +88,11 @@ OutputPath = out
StatsPath = stats
# Path for descriptors downloaded from other instances
SyncPath = sync
+# Directory served via an external web server and managed by us which contains
+# (hard) links to files in ArchivePath and RecentPath and which therefore must
+# be located on the same file system. Also contains index.json and its
+# compressed versions index.json.gz, index.json.bz2, and index.json.xz.
+HtdocsPath = htdocs
######## Relay descriptors ########
#
## Define descriptor sources
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 5802020..695bb24 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -10,7 +10,7 @@
# Configuration section:
# The following path should be adjusted, if the CollecTor server layout differs.
# All paths should be given absolute.
-ARCHIVEDIR="/srv/collector.torproject.org/collector/archive"
+ARCHIVEDIR="/srv/collector.torproject.org/collector/indexed/archive"
WORKDIR="/srv/collector.torproject.org/collector/tarballs"
OUTDIR="/srv/collector.torproject.org/collector/out"
TARBALLTARGETDIR="/srv/collector.torproject.org/collector/data"
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 7e9ea28..887f3ae 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -39,7 +39,7 @@ public class ConfigurationTest {
public void testKeyCount() {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 71, Key.values().length);
+ 70, Key.values().length);
}
@Test()
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
new file mode 100644
index 0000000..db00032
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
@@ -0,0 +1,522 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.Key;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.FileTime;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Test class for {@link CreateIndexJson}.
+ */
+public class CreateIndexJsonTest {
+
+ /**
+ * Mocked indexer task that does not actually index a file but instead waits
+ * for the test class to set index results.
+ */
+ static class MockedIndexerTask extends IndexerTask {
+
+ /**
+ * Index result, to be set by the test.
+ */
+ private FileNode result;
+
+ /**
+ * Create a new mocked indexer task for the given path.
+ *
+ * @param path Path to index.
+ */
+ MockedIndexerTask(Path path) {
+ super(path);
+ }
+
+ /**
+ * Set index results.
+ *
+ * @param result Index results.
+ */
+ synchronized void setResult(FileNode result) {
+ this.result = result;
+ this.notifyAll();
+ }
+
+ /**
+ * Execute the task by waiting for the test to set index results.
+ *
+ * @return Index results provided by the test.
+ */
+ @Override
+ public FileNode call() {
+ synchronized (this) {
+ while (null == result) {
+ try {
+ wait();
+ } catch (InterruptedException e) {
+ /* Don't care about being interrupted, just keep waiting. */
+ }
+ }
+ return this.result;
+ }
+ }
+ }
+
+ /**
+ * List of mocked indexer tasks in the order of creation.
+ */
+ private List<MockedIndexerTask> indexerTasks = new ArrayList<>();
+
+ /**
+ * Testable version of the class under test.
+ */
+ class TestableCreateIndexJson extends CreateIndexJson {
+
+ /**
+ * Create a new instance with the given configuration.
+ *
+ * @param configuration Configuration for this test.
+ */
+ TestableCreateIndexJson(Configuration configuration) {
+ super(configuration);
+ }
+
+ /**
+ * Create an indexer task that doesn't actually index a file but that can
+ * be controlled by the test, and add that task to the list of tasks.
+ *
+ * @param fileToIndex File to index.
+ * @return Created (mocked) indexer task.
+ */
+ @Override
+ protected IndexerTask createIndexerTask(Path fileToIndex) {
+ MockedIndexerTask indexerTask = new MockedIndexerTask(fileToIndex);
+ indexerTasks.add(indexerTask);
+ return indexerTask;
+ }
+
+ /**
+ * Return {@code null} as build revision string to make it easier to compare
+ * written {@code index.json} files in tests.
+ *
+ * @return Always {@code null}.
+ */
+ @Override
+ protected String obtainBuildRevision() {
+ return null;
+ }
+ }
+
+ /**
+ * Temporary folder containing all files for this test.
+ */
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ /**
+ * Path to recent exit list file in {@code indexed/recent/}.
+ */
+ private Path recentExitListFilePath;
+
+ /**
+ * Path to archived exit list file in {@code indexed/archive/}.
+ */
+ private Path archiveExitListFilePath;
+
+ /**
+ * Path to exit list link in {@code htdocs/recent/}.
+ */
+ private Path recentExitListLinkPath;
+
+ /**
+ * Path to {@code index.json} file in {@code htdocs/index/}.
+ */
+ private Path indexJsonPath;
+
+ /**
+ * Class under test.
+ */
+ private CreateIndexJson cij;
+
+ /**
+ * Prepares the temporary folder and configuration for this test.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Before
+ public void prepareDirectoriesAndConfiguration() throws IOException {
+ Path indexedPath = this.temporaryFolder.newFolder("indexed").toPath();
+ this.recentExitListFilePath = indexedPath.resolve(
+ Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+ this.archiveExitListFilePath = indexedPath.resolve(
+ Paths.get("archive", "exit-lists", "exit-list-2016-09.tar.xz"));
+ Path htdocsPath = this.temporaryFolder.newFolder("htdocs").toPath();
+ this.recentExitListLinkPath = htdocsPath.resolve(
+ Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+ this.indexJsonPath = htdocsPath.resolve(
+ Paths.get("index", "index.json"));
+ Configuration configuration = new Configuration();
+ configuration.setProperty(Key.IndexedPath.name(),
+ indexedPath.toAbsolutePath().toString());
+ configuration.setProperty(Key.HtdocsPath.name(),
+ htdocsPath.toAbsolutePath().toString());
+ configuration.setProperty(Key.InstanceBaseUrl.name(),
+ "https://collector.torproject.org");
+ this.cij = new TestableCreateIndexJson(configuration);
+ }
+
+ /**
+ * First execution time.
+ */
+ private static final Instant firstExecution
+ = Instant.parse("2016-09-20T13:04:00Z");
+
+ /**
+ * Second execution time, two minutes after the first execution time, which is
+ * the default rate for executing this module.
+ */
+ private static final Instant secondExecution
+ = Instant.parse("2016-09-20T13:06:00Z");
+
+ /**
+ * Third execution, three hours later than the second execution time, to see
+ * if links to files that have been marked for deletion are actually deleted.
+ */
+ private static final Instant thirdExecution
+ = Instant.parse("2016-09-20T16:06:00Z");
+
+ /**
+ * Index result from indexing recent exit list.
+ */
+ private FileNode recentExitListFileNode = FileNode.of(
+ "2016-09-20-13-02-00", 177_090L, "2016-09-20 13:02",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-20 13:02",
+ "2016-09-20 13:02", "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=");
+
+ /**
+ * Index result from indexing archived exit list.
+ */
+ private FileNode archiveExitListFileNode = FileNode.of(
+ "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-04 03:31",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+ "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+ /**
+ * Index result from indexing <i>updated</i> archived exit list.
+ */
+ private FileNode updatedArchiveExitListFileNode = FileNode.of(
+ "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-07 03:31",
+ Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+ "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+ /**
+ * Finish the oldest indexer task by providing the given file node as index
+ * result.
+ *
+ * @param fileNode Index result.
+ */
+ private void finishIndexing(FileNode fileNode) {
+ assertFalse(this.indexerTasks.isEmpty());
+ this.indexerTasks.remove(0).setResult(fileNode);
+ }
+
+ /**
+ * (Almost) empty {@code index.json} file.
+ */
+ private static final String emptyIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\"}";
+
+ /**
+ * {@code index.json} file containing a single recent exit list.
+ */
+ private static final String recentExitListIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+ + "\"path\":\"recent\",\"directories\":[{"
+ + "\"path\":\"exit-lists\",\"files\":[{"
+ + "\"path\":\"2016-09-20-13-02-00\",\"size\":177090,"
+ + "\"last_modified\":\"2016-09-20 13:02\","
+ + "\"types\":[\"tordnsel 1.0\"],"
+ + "\"first_published\":\"2016-09-20 13:02\","
+ + "\"last_published\":\"2016-09-20 13:02\","
+ + "\"sha256\":\"4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=\"}]}]}]}";
+
+ /**
+ * {@code index.json} file containing a single archived exit list with a
+ * placeholder for the last-modified time.
+ */
+ private static final String archiveExitListIndexJsonString
+ = "{\"index_created\":\"2016-09-20 13:06\","
+ + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+ + "\"path\":\"archive\",\"directories\":[{"
+ + "\"path\":\"exit-lists\",\"files\":[{"
+ + "\"path\":\"exit-list-2016-09.tar.xz\",\"size\":1008748,"
+ + "\"last_modified\":\"%s\","
+ + "\"types\":[\"tordnsel 1.0\"],"
+ + "\"first_published\":\"2016-09-01 00:02\","
+ + "\"last_published\":\"2016-09-30 23:02\","
+ + "\"sha256\":\"P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=\"}]}]}]}";
+
+ /**
+ * Delete the given file.
+ *
+ * @param fileToDelete Path to file to delete.
+ */
+ private static void deleteFile(Path fileToDelete) {
+ try {
+ Files.delete(fileToDelete);
+ } catch (IOException e) {
+ fail(String.format("I/O error while deleting %s.", fileToDelete));
+ }
+ }
+
+ /**
+ * Create the given file.
+ *
+ * @param fileToCreate Path to file to create.
+ * @param lastModified Last-modified time of file to create.
+ */
+ private static void createFile(Path fileToCreate, Instant lastModified) {
+ try {
+ Files.createDirectories(fileToCreate.getParent());
+ Files.createFile(fileToCreate);
+ Files.setLastModifiedTime(fileToCreate, FileTime.from(lastModified));
+ } catch (IOException e) {
+ fail(String.format("I/O error while creating %s.", fileToCreate));
+ }
+ }
+
+ /**
+ * Return whether the given file exists.
+ *
+ * @param fileToCheck Path to file to check.
+ * @return Whether the file exists.
+ */
+ private boolean fileExists(Path fileToCheck) {
+ return Files.exists(fileToCheck);
+ }
+
+ /**
+ * Change last-modified time of the given file.
+ *
+ * @param fileToChange File to change.
+ * @param lastModified New last-modified time.
+ */
+ private void changeLastModified(Path fileToChange, Instant lastModified) {
+ try {
+ Files.setLastModifiedTime(fileToChange, FileTime.from(lastModified));
+ } catch (IOException e) {
+ fail(String.format("I/O error while changing last-modified time of %s.",
+ fileToChange));
+ }
+ }
+
+ /**
+ * Write the given string to the {@code index.json} file.
+ *
+ * @param indexJsonString String to write.
+ * @param formatArguments Optional format arguments.
+ */
+ private void writeIndexJson(String indexJsonString,
+ Object ... formatArguments) {
+ try {
+ Files.createDirectories(indexJsonPath.getParent());
+ Files.write(indexJsonPath,
+ String.format(indexJsonString, formatArguments).getBytes());
+ } catch (IOException e) {
+ fail("I/O error while writing index.json file.");
+ }
+ }
+
+ /**
+ * Read and return the first line from the {@code index.json} file.
+ *
+ * @return First line from the {@code index.json} file.
+ */
+ private String readIndexJson() {
+ try {
+ return Files.readAllLines(indexJsonPath).get(0);
+ } catch (IOException e) {
+ fail("I/O error while reading index.json file.");
+ return null;
+ }
+ }
+
+ /**
+ * Run the module with the given system time.
+ *
+ * @param now Time when running the module.
+ */
+ private void startProcessing(Instant now) {
+ this.cij.startProcessing(now);
+ }
+
+ /**
+ * Test whether two executions on an empty {@code indexed/} directory produce
+ * an {@code index.json} file without any files or directories.
+ */
+ @Test
+ public void testEmptyDirs() {
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a new exit list in {@code indexed/recent/} gets indexed and
+ * then included in {@code index.json}.
+ */
+ @Test
+ public void testNewRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(this.recentExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(recentExitListIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether an existing exit list in {@code indexed/recent/} that is
+ * already contained in {@code index.json} gets ignored by the indexers.
+ */
+ @Test
+ public void testExistingRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ writeIndexJson(recentExitListIndexJsonString);
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(recentExitListIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a deleted exit list in {@code indexed/recent/} is first
+ * removed from {@code index.json} and later deleted from
+ * {@code htdocs/recent/}.
+ */
+ @Test
+ public void testDeletedRecentExitList() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ writeIndexJson(recentExitListIndexJsonString);
+ startProcessing(firstExecution);
+ assertTrue(fileExists(recentExitListLinkPath));
+ deleteFile(recentExitListFilePath);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ fileExists(recentExitListLinkPath);
+ assertTrue(fileExists(recentExitListLinkPath));
+ startProcessing(thirdExecution);
+ assertFalse(fileExists(recentExitListLinkPath));
+ }
+
+ /**
+ * Test whether a link in {@code htdocs/recent/} for which no corresponding
+ * file in {@code indexed/recent/} exists is eventually deleted.
+ */
+ @Test
+ public void testDeletedLink() {
+ createFile(recentExitListLinkPath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ assertTrue(Files.exists(recentExitListLinkPath));
+ startProcessing(secondExecution);
+ assertTrue(Files.exists(recentExitListLinkPath));
+ startProcessing(thirdExecution);
+ assertFalse(Files.exists(recentExitListLinkPath));
+ }
+
+ /**
+ * Test whether a tarball that gets deleted while being indexed is not
+ * included in {@code index.json} even after indexing is completed.
+ */
+ @Test
+ public void testIndexingDisappearingTarball() {
+ createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+ startProcessing(firstExecution);
+ deleteFile(recentExitListFilePath);
+ finishIndexing(recentExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated in {@code indexed/archive/} gets
+ * re-indexed and updated in {@code index.json}.
+ */
+ @Test
+ public void testUpdatedFile() {
+ writeIndexJson(archiveExitListIndexJsonString, "2016-10-04 03:31");
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(updatedArchiveExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-07 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated while being indexed is not
+ * included in {@code index.json} even after indexing is completed.
+ */
+ @Test
+ public void testUpdateFileWhileIndexing() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(firstExecution);
+ changeLastModified(archiveExitListFilePath,
+ Instant.parse("2016-10-07T03:31:00Z"));
+ finishIndexing(archiveExitListFileNode);
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-04 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a tarball that gets updated after being indexed but before
+ * being included in {@code index.json} is not being updated in
+ * {@code index.json} until the updated file is being indexed. */
+ @Test
+ public void testUpdateFileAfterIndexing() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+ startProcessing(firstExecution);
+ finishIndexing(archiveExitListFileNode);
+ changeLastModified(archiveExitListFilePath,
+ Instant.parse("2016-10-07T03:31:00Z"));
+ startProcessing(secondExecution);
+ assertEquals(String.format(archiveExitListIndexJsonString,
+ "2016-10-04 03:31"), readIndexJson());
+ }
+
+ /**
+ * Test whether a long-running indexer task is being given the time to finish,
+ * rather than starting another task for the same file.
+ */
+ @Test
+ public void testLongRunningIndexerTask() {
+ createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+ startProcessing(firstExecution);
+ startProcessing(secondExecution);
+ assertEquals(emptyIndexJsonString, readIndexJson());
+ finishIndexing(archiveExitListFileNode);
+ startProcessing(thirdExecution);
+ assertTrue(this.indexerTasks.isEmpty());
+ }
+}
+
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
new file mode 100644
index 0000000..8e5e6f4
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
@@ -0,0 +1,226 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.FileTime;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Test class for {@link IndexerTask}.
+ */
+(a)RunWith(Parameterized.class)
+public class IndexerTaskTest {
+
+ @Parameterized.Parameter
+ public String path;
+
+ @Parameterized.Parameter(1)
+ public Long size;
+
+ @Parameterized.Parameter(2)
+ public String lastModified;
+
+ @Parameterized.Parameter(3)
+ public String[] types;
+
+ @Parameterized.Parameter(4)
+ public String firstPublished;
+
+ @Parameterized.Parameter(5)
+ public String lastPublished;
+
+ @Parameterized.Parameter(6)
+ public String sha256;
+
+ /**
+ * Initialize test parameters.
+ *
+ * @return Test parameters.
+ */
+ @Parameterized.Parameters
+ public static Collection<Object[]> pathFilename() {
+ return Arrays.asList(new Object[][]{
+
+ {"2016-09-20-13-00-00-consensus", /* Path in src/test/resources/ */
+ 1_618_103L, /* Size in bytes */
+ "2017-09-07 12:13", /* Last-modified time */
+ new String[] { "network-status-consensus-3 1.17" }, /* Types */
+ "2016-09-20 13:00", /* First published */
+ "2016-09-20 13:00", /* Last published */
+ "3mLpDZmP/NSgOgmuPDyljxh0Lup1L6FtD16266ZCGAw="}, /* SHA-256 */
+
+ {"2016-09-20-13-00-00-vote-49015F787433103580E3B66A1707A00E60F2D15B-"
+ + "60ADC6BEC262AE921A1037D54C8A3976367DBE87",
+ 3_882_514L,
+ "2017-09-07 12:13",
+ new String[] { "network-status-vote-3 1.17" },
+ "2016-09-20 13:00",
+ "2016-09-20 13:00",
+ "UCnSSrvdm26dJOriFgEQNQVrBLpVKbH/fF0VPRX3TGc="},
+
+ {"2016-09-20-13-02-00",
+ 177_090L,
+ "2017-01-13 16:55",
+ new String[] { "tordnsel 1.0" },
+ "2016-09-20 13:02",
+ "2016-09-20 13:02",
+ "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw="},
+
+ {"2016-10-01-16-00-00-vote-0232AF901C31A04EE9848595AF9BB7620D4C5B2E-"
+ + "FEE63B4AB7CE5A6BDD09E9A5C4F01BD61EB7E4F1",
+ 3_226_152L,
+ "2017-01-13 16:55",
+ new String[] { "network-status-vote-3 1.0" },
+ "2016-10-01 16:00",
+ "2016-10-01 16:00",
+ "bilv6zEXr0Y9f5o24RMN0lUujsJJiSQAn9LkG0XJrZE="},
+
+ {"2016-10-02-17-00-00-consensus-microdesc",
+ 1_431_627L,
+ "2017-09-07 12:13",
+ new String[] { "network-status-microdesc-consensus-3 1.17" },
+ "2016-10-02 17:00",
+ "2016-10-02 17:00",
+ "rrkxuLahYENLExX99Jio587/kUz9NtOoaYyKXxvX5EA="},
+
+ {"20160920-063816-1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1",
+ 339_256L,
+ "2017-09-07 12:13",
+ new String[] { "bridge-network-status 1.17" },
+ "2016-09-20 06:38",
+ "2016-09-20 06:38",
+ "sMAcyFrZ2rxj50b6iGe3icCNMC4gBSA1y9ZH4EWTa8s="},
+
+ {"bridge-2016-10-02-08-09-00-extra-infos",
+ 11_561L,
+ "2017-09-07 12:13",
+ new String[] { "bridge-extra-info 1.3" },
+ "2016-10-02 06:09",
+ "2016-10-02 06:09",
+ "hat+vbyE04eH9JBQa0s6ezB6sLaStUUhvUj8CZ1aoEY="},
+
+ {"bridge-2016-10-02-16-09-00-server-descriptors",
+ 5_336L,
+ "2017-01-13 16:55",
+ new String[] { "bridge-server-descriptor 1.2" },
+ "2016-10-02 14:09",
+ "2016-10-02 14:09",
+ "6CtHdo+eRFOi5xBjJcOVszC1hibC5gTB+YWvn1VmIIc="},
+
+ {"moria-1048576-2016-10-05.tpf",
+ 20_405L,
+ "2017-09-07 12:13",
+ new String[0],
+ null,
+ null,
+ "DZyk6c0lQQ7OVZo1cmA+SuxPA+1thmuiooVifQPPOiA="},
+
+ {"op-nl-1048576-2017-04-11.tpf",
+ 4_220L,
+ "2017-09-20 12:14",
+ new String[] { "torperf 1.1" },
+ "2017-04-11 06:24",
+ "2017-04-11 15:54",
+ "Gwex5yN3+s2PrhekjA68XmPg+UorOfx7mUa4prd7Dt8="},
+
+ {"relay-2016-10-02-08-05-00-extra-infos",
+ 20_541L,
+ "2017-01-13 16:55",
+ new String[] { "extra-info 1.0" },
+ "2016-10-02 07:01",
+ "2016-10-02 07:01",
+ "3ZSO3+9ed9OwMVPx2LcVIiJfC+O30eEXEdbz64Hrp0w="},
+
+ {"relay-2016-10-02-16-05-00-server-descriptors",
+ 17_404L,
+ "2017-01-13 16:55",
+ new String[] { "server-descriptor 1.0" },
+ "2016-10-02 14:58",
+ "2016-10-02 15:01",
+ "uWKHHzq4+oVNdOGh0mfkLUSjwGrBlLtEtN2DtF5qcLU="},
+
+ {"siv-1048576-2016-10-03.tpf",
+ 39_193L,
+ "2017-01-13 16:55",
+ new String[] { "torperf 1.0" },
+ "2016-10-03 00:02",
+ "2016-10-03 23:32",
+ "paaFPI6BVuIDQ32aIuHYNCuKmBvFxsDvVCCwp+oM0GE="},
+
+ {"torperf-51200-2016-10-02.tpf",
+ 233_763L,
+ "2017-01-13 16:55",
+ new String[] { "torperf 1.0" },
+ "2016-10-02 00:00",
+ "2016-10-02 23:55",
+ "fqeVAXamvB4yQ/8UlZAxhJx0+1Y7IfipqIpOUqQ57rE="}
+ });
+ }
+
+ /**
+ * Formatter for all timestamps found in {@code index.json}.
+ */
+ private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+ .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+ /**
+ * Temporary folder containing all files for this test.
+ */
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ /**
+ * Test indexing a file.
+ *
+ * @throws IOException Thrown if an I/O error occurs.
+ */
+ @Test
+ public void testIndexFile() throws IOException {
+ Path indexedDirectory = this.temporaryFolder.newFolder().toPath();
+ Path temporaryFile = indexedDirectory.resolve(this.path);
+ try (InputStream is = getClass()
+ .getClassLoader().getResourceAsStream(this.path)) {
+ if (null == is) {
+ fail(String.format("Unable to read test resource %s.", this.path));
+ return;
+ }
+ Files.copy(is, temporaryFile);
+ }
+ Files.setLastModifiedTime(temporaryFile,
+ FileTime.from(LocalDateTime.parse(this.lastModified, dateTimeFormatter)
+ .toInstant(ZoneOffset.UTC)));
+ assertTrue(Files.exists(temporaryFile));
+ IndexerTask indexerTask = new IndexerTask(temporaryFile);
+ FileNode indexResult = indexerTask.call();
+ assertEquals(this.path, indexResult.path);
+ assertEquals(this.size, indexResult.size);
+ assertEquals(this.lastModified, indexResult.lastModified);
+ SortedSet<String> expectedTypes = new TreeSet<>(Arrays.asList(this.types));
+ assertEquals(expectedTypes, indexResult.types);
+ assertEquals(this.firstPublished, indexResult.firstPublished);
+ assertEquals(this.lastPublished, indexResult.lastPublished);
+ assertEquals(this.sha256, indexResult.sha256);
+ }
+}
+
1
0