[tor-commits] [collector/master] Extend index.json by additional file meta data.

karsten at torproject.org karsten at torproject.org
Sat Nov 9 10:01:47 UTC 2019


commit 500b7c5ad3d94a0f0f8b8c7fdb110813895c25f0
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Wed Oct 30 21:56:41 2019 +0100

    Extend index.json by additional file meta data.
    
    Implements #31204.
---
 CHANGELOG.md                                       |  13 +
 build.xml                                          |   2 +-
 src/build                                          |   2 +-
 .../org/torproject/metrics/collector/conf/Key.java |   5 +-
 .../metrics/collector/indexer/CreateIndexJson.java | 678 +++++++++++++++++----
 .../metrics/collector/indexer/DirectoryNode.java   |  36 ++
 .../metrics/collector/indexer/FileNode.java        | 125 ++++
 .../metrics/collector/indexer/IndexNode.java       |  30 +
 .../metrics/collector/indexer/IndexerTask.java     | 225 +++++++
 src/main/resources/collector.properties            |  14 +-
 src/main/resources/create-tarballs.sh              |   2 +-
 .../metrics/collector/conf/ConfigurationTest.java  |   2 +-
 .../collector/indexer/CreateIndexJsonTest.java     | 522 ++++++++++++++++
 .../metrics/collector/indexer/IndexerTaskTest.java | 226 +++++++
 14 files changed, 1756 insertions(+), 126 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb328ad..38754c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changes in version 1.??.? - 2019-??-??
 
+ * Medium changes
+   - Extend index.json by including descriptor types, first and last
+     publication timestamp, and SHA-256 file digest. Requires making
+     configuration changes in collector.properties:
+      1) IndexedPath is a new directory with subdirectories for
+         archived and recent descriptors,
+      2) ArchivePath and IndexPath are hard-wired to be subdirectories
+         of IndexedPath,
+      3) RecentPath must be set to be a subdirectory of IndexedPath,
+      4) ContribPath has disappeared, and
+      5) HtdocsPath is a new directory with files served by the web
+         server.
+
 
 # Changes in version 1.12.0 - 2019-10-18
 
diff --git a/build.xml b/build.xml
index 8276e63..019461d 100644
--- a/build.xml
+++ b/build.xml
@@ -12,7 +12,7 @@
   <property name="release.version" value="1.12.0-dev" />
   <property name="project-main-class" value="org.torproject.metrics.collector.Main" />
   <property name="name" value="collector"/>
-  <property name="metricslibversion" value="2.8.0" />
+  <property name="metricslibversion" value="2.9.0" />
   <property name="jarincludes" value="collector.properties logback.xml" />
 
   <patternset id="runtime" >
diff --git a/src/build b/src/build
index d82fff9..eb16cb3 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit d82fff984634fe006ac7b0b102e7f48a52ca20d9
+Subproject commit eb16cb359db41722e6089bafb1e26808df4338df
diff --git a/src/main/java/org/torproject/metrics/collector/conf/Key.java b/src/main/java/org/torproject/metrics/collector/conf/Key.java
index 390feed..d59438b 100644
--- a/src/main/java/org/torproject/metrics/collector/conf/Key.java
+++ b/src/main/java/org/torproject/metrics/collector/conf/Key.java
@@ -18,13 +18,12 @@ public enum Key {
   RunOnce(Boolean.class),
   ExitlistUrl(URL.class),
   InstanceBaseUrl(String.class),
-  ArchivePath(Path.class),
-  ContribPath(Path.class),
+  IndexedPath(Path.class),
   RecentPath(Path.class),
   OutputPath(Path.class),
-  IndexPath(Path.class),
   StatsPath(Path.class),
   SyncPath(Path.class),
+  HtdocsPath(Path.class),
   RelaySources(SourceType[].class),
   BridgeSources(SourceType[].class),
   BridgePoolAssignmentsSources(SourceType[].class),
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
index a40798e..15aa31d 100644
--- a/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
+++ b/src/main/java/org/torproject/metrics/collector/indexer/CreateIndexJson.java
@@ -1,73 +1,190 @@
-/* Copyright 2015--2018 The Tor Project
+/* Copyright 2015--2019 The Tor Project
  * See LICENSE for licensing information */
 
 package org.torproject.metrics.collector.indexer;
 
-import org.torproject.descriptor.index.DirectoryNode;
-import org.torproject.descriptor.index.FileNode;
-import org.torproject.descriptor.index.IndexNode;
-import org.torproject.descriptor.internal.FileType;
 import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.ConfigurationException;
 import org.torproject.metrics.collector.conf.Key;
 import org.torproject.metrics.collector.cron.CollecTorMain;
 
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
+import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStreamWriter;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Locale;
+import java.io.OutputStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Duration;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.temporal.TemporalAmount;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
+import java.util.SortedMap;
 import java.util.SortedSet;
-import java.util.TimeZone;
+import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 
-/* Create a fresh index.json containing all directories and files in the
- * archive/ and recent/ directories.
+/**
+ * Create an index file called {@code index.json} containing metadata of all
+ * files in the {@code indexed/} directory and update the {@code htdocs/}
+ * directory to contain all files to be served via the web server.
  *
- * Note that if this ever takes longer than a few seconds, we'll have to
- * cache index parts of directories or files that haven't changed.
- * Example: if we parse include cryptographic hashes or @type information,
- * we'll likely have to do that. */
+ * <p>File metadata includes:</p>
+ * <ul>
+ *   <li>Path for downloading this file from the web server.</li>
+ *   <li>Size of the file in bytes.</li>
+ *   <li>Timestamp when the file was last modified.</li>
+ *   <li>Descriptor types as found in {@code @type} annotations of contained
+ *   descriptors.</li>
+ *   <li>Earliest and latest publication timestamp of contained
+ *   descriptors.</li>
+ *   <li>SHA-256 digest of the file.</li>
+ * </ul>
+ *
+ * <p>This class maintains its own working directory {@code htdocs/} with
+ * subdirectories like {@code htdocs/archive/} or {@code htdocs/recent/} and
+ * another subdirectory {@code htdocs/index/}. The first two subdirectories
+ * contain (hard) links created and deleted by this class, the third
+ * subdirectory contains the {@code index.json} file in uncompressed and
+ * compressed forms.</p>
+ *
+ * <p>The main reason for having the {@code htdocs/} directory is that indexing
+ * a large descriptor file can be time consuming. New or updated files in
+ * {@code indexed/} first need to be indexed before their metadata can be
+ * included in {@code index.json}. Another reason is that files removed from
+ * {@code indexed/} shall still be available for download for a limited period
+ * of time after disappearing from {@code index.json}.</p>
+ *
+ * <p>The reason for creating (hard) links in {@code htdocs/}, rather than
+ * copies, is that links do not consume additional disk space. All directories
+ * must be located on the same file system. Storing symbolic links in
+ * {@code htdocs/} would not have worked with replaced or deleted files in the
+ * original directories. Symbolic links in original directories are allowed as
+ * long as they target to the same file system.</p>
+ *
+ * <p>This class does not write, modify, or delete any files in the
+ * {@code indexed/} directory. At the same time it does not expect any other
+ * classes to write, modify, or delete contents in the {@code htdocs/}
+ * directory.</p>
+ */
 public class CreateIndexJson extends CollecTorMain {
 
+  /**
+   * Class logger.
+   */
   private static final Logger logger =
       LoggerFactory.getLogger(CreateIndexJson.class);
 
-  private static File indexJsonFile;
+  /**
+   * Delay between finding out that a file has been deleted and deleting its
+   * link.
+   */
+  private static final TemporalAmount deletionDelay = Duration.ofHours(2L);
 
-  private static String basePath;
+  /**
+   * Index tarballs with no more than this many threads at a time.
+   */
+  private static final int tarballIndexerThreads = 3;
 
-  private static File[] indexedDirectories;
+  /**
+   * Index flat files with no more than this many threads at a time.
+   */
+  private static final int flatFileIndexerThreads = 3;
 
-  private static final String dateTimePattern = "yyyy-MM-dd HH:mm";
+  /**
+   * Parser and formatter for all timestamps found in {@code index.json}.
+   */
+  private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+      .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
 
-  private static final Locale dateTimeLocale = Locale.US;
+  /**
+   * Object mapper for parsing and formatting {@code index.json} files.
+   */
+  private static ObjectMapper objectMapper = new ObjectMapper()
+      .setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE)
+      .setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
+      .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
+      .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
 
-  private static final TimeZone dateTimezone = TimeZone.getTimeZone("UTC");
+  /**
+   * Path to the {@code indexed/} directory.
+   */
+  private Path indexedPath;
 
-  private static String buildRevision = null;
+  /**
+   * Path to the {@code htdocs/} directory.
+   */
+  private Path htdocsPath;
 
-  /** Creates indexes of directories containing archived and recent
-   * descriptors and write index files to disk. */
-  public CreateIndexJson(Configuration conf) {
-    super(conf);
-    Properties buildProperties = new Properties();
-    try (InputStream is = getClass().getClassLoader()
-        .getResourceAsStream("collector.buildrevision.properties")) {
-      buildProperties.load(is);
-      buildRevision = buildProperties.getProperty("collector.build.revision",
-          null);
-    } catch (Exception ex) {
-      // This doesn't hamper the index creation: only log a warning.
-      logger.warn("No build revision available.", ex);
-      buildRevision = null;
-    }
+  /**
+   * Path to the uncompressed {@code index.json} file.
+   */
+  private Path indexJsonPath;
+
+  /**
+   * Base URL of all resources included in {@code index.json}.
+   */
+  private String basePathString;
+
+  /**
+   * Git revision of this software to be included in {@code index.json} or
+   * omitted if unknown.
+   */
+  private String buildRevisionString;
+
+  /**
+   * Index containing metadata of files in {@code indexed/}, including new or
+   * updated files that still need to be indexed and deleted files that are
+   * still linked in {@code htdocs/}.
+   *
+   * <p>This map is initialized by reading the last known {@code index.json}
+   * file and remains available in memory between executions until shutdown.</p>
+   */
+  private SortedMap<Path, FileNode> index;
+
+  /**
+   * Executor for indexing tarballs.
+   */
+  private ExecutorService tarballsExecutor
+      = Executors.newFixedThreadPool(tarballIndexerThreads);
+
+  /**
+   * Executor for indexing flat files (non-tarballs).
+   */
+  private ExecutorService flatFilesExecutor
+      = Executors.newFixedThreadPool(flatFileIndexerThreads);
+
+  /**
+   * Initialize this class with the given {@code configuration}.
+   *
+   * @param configuration Configuration values.
+   */
+  public CreateIndexJson(Configuration configuration) {
+    super(configuration);
   }
 
   @Override
@@ -80,96 +197,433 @@ public class CreateIndexJson extends CollecTorMain {
     return "IndexJson";
   }
 
+
+  /**
+   * Run the indexer by (1) adding new files from {@code indexed/} to the index,
+   * (2) adding old files from {@code htdocs/} for which only links exist to the
+   * index, (3) scheduling new tasks and updating links in {@code htdocs/} to
+   * reflect what's contained in the in-memory index, and (4) writing new
+   * uncompressed and compressed {@code index.json} files to disk.
+   */
   @Override
-  protected void startProcessing() {
+  public void startProcessing() {
+    this.startProcessing(Instant.now());
+  }
+
+  /**
+   * Helper method to {@link #startProcessing()} that accepts the current
+   * execution time and which is used by tests.
+   *
+   * @param now Current execution time.
+   */
+  protected void startProcessing(Instant now) {
+    try {
+      this.basePathString = this.config.getProperty(Key.InstanceBaseUrl.name());
+      this.indexedPath = config.getPath(Key.IndexedPath);
+      this.htdocsPath = config.getPath(Key.HtdocsPath);
+    } catch (ConfigurationException e) {
+      logger.error("Unable to read one or more configuration values. Not "
+          + "indexing in this execution.", e);
+    }
+    this.buildRevisionString = this.obtainBuildRevision();
+    this.indexJsonPath = this.htdocsPath
+        .resolve(Paths.get("index", "index.json"));
     try {
-      indexJsonFile = new File(config.getPath(Key.IndexPath).toFile(),
-          "index.json");
-      basePath = config.getProperty(Key.InstanceBaseUrl.name());
-      indexedDirectories = new File[] {
-          config.getPath(Key.ArchivePath).toFile(),
-          config.getPath(Key.ContribPath).toFile(),
-          config.getPath(Key.RecentPath).toFile() };
-      writeIndex(indexDirectories());
-    } catch (Exception e) {
-      logger.error("Cannot run index creation: {}", e.getMessage(), e);
-      throw new RuntimeException(e);
+      this.prepareHtdocsDirectory();
+      if (null == this.index) {
+        logger.info("Reading index.json file from last execution.");
+        this.index = this.readIndex();
+      }
+      logger.info("Going through indexed/ and adding new files to the index.");
+      this.addNewFilesToIndex(this.indexedPath);
+      logger.info("Going through htdocs/ and adding links to deleted files to "
+          + "the index.");
+      this.addOldLinksToIndex();
+      logger.info("Going through the index, scheduling tasks, and updating "
+          + "links.");
+      this.scheduleTasksAndUpdateLinks(now);
+      logger.info("Writing uncompressed and compressed index.json files to "
+          + "disk.");
+      this.writeIndex(this.index, now);
+      logger.info("Pausing until next index update run.");
+    } catch (IOException e) {
+      logger.error("I/O error while updating index.json files. Trying again in "
+          + "the next execution.", e);
     }
   }
 
-  private static DateFormat dateTimeFormat;
-
-  static {
-    dateTimeFormat = new SimpleDateFormat(dateTimePattern,
-        dateTimeLocale);
-    dateTimeFormat.setLenient(false);
-    dateTimeFormat.setTimeZone(dateTimezone);
+  /**
+   * Prepare the {@code htdocs/} directory by checking whether all required
+   * subdirectories exist and by creating them if not.
+   *
+   * @throws IOException Thrown if one or more directories could not be created.
+   */
+  private void prepareHtdocsDirectory() throws IOException {
+    for (Path requiredPath : new Path[] {
+        this.htdocsPath,
+        this.indexJsonPath.getParent() }) {
+      if (!Files.exists(requiredPath)) {
+        Files.createDirectories(requiredPath);
+      }
+    }
   }
 
-  private IndexNode indexDirectories() {
-    SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
-    logger.trace("indexing: {} {}", indexedDirectories[0],
-        indexedDirectories[1]);
-    for (File directory : indexedDirectories) {
-      if (directory.exists() && directory.isDirectory()) {
-        DirectoryNode dn = indexDirectory(directory);
-        if (null != dn) {
-          directoryNodes.add(dn);
+  /**
+   * Read the {@code index.json} file written by the previous execution and
+   * populate our index with its contents, or leave the index empty if this is
+   * the first execution and that file does not yet exist.
+   *
+   * @return Index read from disk, or empty map if {@code index.json} does not
+   *     exist.
+   */
+  private SortedMap<Path, FileNode> readIndex() throws IOException {
+    SortedMap<Path, FileNode> index = new TreeMap<>();
+    if (Files.exists(this.indexJsonPath)) {
+      IndexNode indexNode = objectMapper.readValue(
+          Files.newInputStream(this.indexJsonPath), IndexNode.class);
+      SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+      directoryNodes.put(Paths.get(""), indexNode);
+      while (!directoryNodes.isEmpty()) {
+        Path directoryPath = directoryNodes.firstKey();
+        DirectoryNode directoryNode = directoryNodes.remove(directoryPath);
+        if (null != directoryNode.files) {
+          for (FileNode fileNode : directoryNode.files) {
+            Path filePath = this.indexedPath.resolve(directoryPath)
+                .resolve(Paths.get(fileNode.path));
+            index.put(filePath, fileNode);
+          }
         }
+        if (null != directoryNode.directories) {
+          boolean isRootDirectory = directoryNode == indexNode;
+          for (DirectoryNode subdirectoryNode : directoryNode.directories) {
+            Path subdirectoryPath = isRootDirectory
+                ? Paths.get(subdirectoryNode.path)
+                : directoryPath.resolve(Paths.get(subdirectoryNode.path));
+            directoryNodes.put(subdirectoryPath, subdirectoryNode);
+          }
+        }
+      }
+    }
+    return index;
+  }
+
+  /**
+   * Obtain and return the build revision string that was generated during the
+   * build process with {@code git rev-parse --short HEAD} and written to
+   * {@code collector.buildrevision.properties}, or return {@code null} if the
+   * build revision string cannot be obtained.
+   *
+   * @return Build revision string.
+   */
+  protected String obtainBuildRevision() {
+    String buildRevision = null;
+    Properties buildProperties = new Properties();
+    String propertiesFile = "collector.buildrevision.properties";
+    try (InputStream is = getClass().getClassLoader()
+        .getResourceAsStream(propertiesFile)) {
+      if (null == is) {
+        logger.warn("File {}, which is supposed to contain the build revision "
+            + "string, does not exist in our class path. Writing index.json "
+            + "without the \"build_revision\" field.", propertiesFile);
+        return null;
       }
+      buildProperties.load(is);
+      buildRevision = buildProperties.getProperty(
+          "collector.build.revision", null);
+    } catch (IOException e) {
+      logger.warn("I/O error while trying to obtain build revision string. "
+          + "Writing index.json without the \"build_revision\" field.");
     }
-    return new IndexNode(dateTimeFormat.format(
-        System.currentTimeMillis()), buildRevision, basePath, null,
-        directoryNodes);
-  }
-
-  private DirectoryNode indexDirectory(File directory) {
-    SortedSet<FileNode> fileNodes = new TreeSet<>();
-    SortedSet<DirectoryNode> directoryNodes = new TreeSet<>();
-    logger.trace("indexing: {}", directory);
-    File[] fileList = directory.listFiles();
-    if (null == fileList) {
-      logger.warn("Indexing dubious directory: {}", directory);
-      return null;
+    return buildRevision;
+  }
+
+  /**
+   * Walk the given file tree and add all previously unknown files to the
+   * in-memory index (except for files starting with "." or ending with ".tmp").
+   *
+   * @param path File tree to walk.
+   */
+  private void addNewFilesToIndex(Path path) throws IOException {
+    if (!Files.exists(path)) {
+      return;
     }
-    for (File fileOrDirectory : fileList) {
-      if (fileOrDirectory.getName().startsWith(".")
-          || fileOrDirectory.getName().endsWith(".tmp")) {
-        continue;
+    Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
+      @Override
+      public FileVisitResult visitFile(Path filePath,
+          BasicFileAttributes basicFileAttributes) {
+        if (!filePath.toString().startsWith(".")
+            && !filePath.toString().endsWith(".tmp")) {
+          index.putIfAbsent(filePath, new FileNode());
+        }
+        return FileVisitResult.CONTINUE;
+      }
+    });
+  }
+
+  /**
+   * Walk the file tree of the {@code htdocs/} directory and add all previously
+   * unknown links to the in-memory index to ensure their deletion when they're
+   * known to be deleted from their original directories.
+   */
+  private void addOldLinksToIndex() throws IOException {
+    Path htdocsIndexPath = this.indexJsonPath.getParent();
+    Files.walkFileTree(this.htdocsPath, new SimpleFileVisitor<Path>() {
+      @Override
+      public FileVisitResult visitFile(Path linkPath,
+          BasicFileAttributes basicFileAttributes) {
+        if (!linkPath.startsWith(htdocsIndexPath)) {
+          Path filePath = indexedPath.resolve(htdocsPath.relativize(linkPath));
+          index.putIfAbsent(filePath, new FileNode());
+        }
+        return FileVisitResult.CONTINUE;
       }
-      if (fileOrDirectory.isFile()) {
-        fileNodes.add(indexFile(fileOrDirectory));
+    });
+  }
+
+  /**
+   * Go through the index, schedule tasks to index files, and update links.
+   *
+   * @throws IOException Thrown if an I/O exception occurs while creating or
+   *     deleting links.
+   */
+  private void scheduleTasksAndUpdateLinks(Instant now) throws IOException {
+    int queuedIndexerTasks = 0;
+    Map<Path, FileNode> indexingResults = new HashMap<>();
+    SortedSet<Path> filesToIndex = new TreeSet<>();
+    Map<Path, Path> linksToCreate = new HashMap<>();
+    Set<FileNode> linksToMarkForDeletion = new HashSet<>();
+    Map<Path, Path> linksToDelete = new HashMap<>();
+    for (Map.Entry<Path, FileNode> e : this.index.entrySet()) {
+      Path filePath = e.getKey();
+      Path linkPath = this.htdocsPath
+          .resolve(this.indexedPath.relativize(filePath));
+      FileNode fileNode = e.getValue();
+      if (Files.exists(filePath)) {
+        if (null != fileNode.indexerResult) {
+          if (!fileNode.indexerResult.isDone()) {
+            /* This file is currently being indexed, so we should just skip it
+             * and wait until the indexer is done. */
+            queuedIndexerTasks++;
+            continue;
+          }
+          try {
+            /* Indexing is done, obtain index results. */
+            fileNode = fileNode.indexerResult.get();
+            indexingResults.put(filePath, fileNode);
+          } catch (InterruptedException | ExecutionException ex) {
+            /* Clear index result, so that we can give this file another try
+             * next time. */
+            fileNode.indexerResult = null;
+          }
+        }
+        String originalLastModified = dateTimeFormatter
+            .format(Files.getLastModifiedTime(filePath).toInstant());
+        if (!originalLastModified.equals(fileNode.lastModified)) {
+          /* We either don't have any index results for this file, or we only
+           * have index results for an older version of this file. */
+          filesToIndex.add(filePath);
+        } else if (!Files.exists(linkPath)) {
+          /* We do have index results, but we don't have a link yet, so we're
+           * going to create a link. */
+          linksToCreate.put(linkPath, filePath);
+        } else {
+          String linkLastModified = dateTimeFormatter
+              .format(Files.getLastModifiedTime(linkPath).toInstant());
+          if (!linkLastModified.equals(fileNode.lastModified)) {
+            /* We do have index results plus a link to an older version of this
+             * file, so we'll have to update the link. */
+            linksToCreate.put(linkPath, filePath);
+          }
+        }
       } else {
-        DirectoryNode dn = indexDirectory(fileOrDirectory);
-        if (null != dn) {
-          directoryNodes.add(dn);
+        if (null == fileNode.markedForDeletion) {
+          /* We're noticing just now that the file doesn't exist anymore, so
+           * we're going to mark it for deletion but not deleting the link right
+           * away. */
+          linksToMarkForDeletion.add(fileNode);
+        } else if (fileNode.markedForDeletion
+            .isBefore(now.minus(deletionDelay))) {
+          /* The file doesn't exist anymore, and we found out long enough ago,
+           * so we can now go ahead and delete the link. */
+          linksToDelete.put(linkPath, filePath);
+        }
+      }
+    }
+    if (queuedIndexerTasks > 0) {
+      logger.info("Counting {} file(s) being currently indexed or in the "
+          + "queue.", queuedIndexerTasks);
+    }
+    this.updateIndex(indexingResults);
+    this.scheduleTasks(filesToIndex);
+    this.createLinks(linksToCreate);
+    this.markForDeletion(linksToMarkForDeletion, now);
+    this.deleteLinks(linksToDelete);
+  }
+
+  /**
+   * Update index with index results.
+   */
+  private void updateIndex(Map<Path, FileNode> indexResults) {
+    if (!indexResults.isEmpty()) {
+      logger.info("Updating {} index entries with index results.",
+          indexResults.size());
+      this.index.putAll(indexResults);
+    }
+  }
+
+  /**
+   * Schedule indexing the given set of descriptor files, using different queues
+   * for tarballs and flat files.
+   *
+   * @param filesToIndex Paths to descriptor files to index.
+   */
+  private void scheduleTasks(SortedSet<Path> filesToIndex) {
+    if (!filesToIndex.isEmpty()) {
+      logger.info("Scheduling {} indexer task(s).", filesToIndex.size());
+      for (Path fileToIndex : filesToIndex) {
+        IndexerTask indexerTask = this.createIndexerTask(fileToIndex);
+        if (fileToIndex.getFileName().toString().endsWith(".tar.xz")) {
+          this.index.get(fileToIndex).indexerResult
+              = this.tarballsExecutor.submit(indexerTask);
+        } else {
+          this.index.get(fileToIndex).indexerResult
+              = this.flatFilesExecutor.submit(indexerTask);
         }
       }
     }
-    return new DirectoryNode(
-        directory.getName(), fileNodes.isEmpty() ? null : fileNodes,
-        directoryNodes.isEmpty() ? null : directoryNodes);
-  }
-
-  private FileNode indexFile(File file) {
-    return new FileNode(file.getName(), file.length(),
-        dateTimeFormat.format(file.lastModified()));
-  }
-
-  private void writeIndex(IndexNode indexNode) throws Exception {
-    indexJsonFile.getParentFile().mkdirs();
-    String indexNodeString = IndexNode.makeJsonString(indexNode);
-    for (String filename : new String[] {indexJsonFile.toString(),
-        indexJsonFile + ".gz", indexJsonFile + ".xz", indexJsonFile + ".bz2"}) {
-      FileType type = FileType.valueOf(
-          filename.substring(filename.lastIndexOf(".") + 1).toUpperCase());
-      try (BufferedWriter bufferedWriter
-          = new BufferedWriter(new OutputStreamWriter(type.outputStream(
-          new FileOutputStream(filename))))) {
-        bufferedWriter.write(indexNodeString);
+  }
+
+  /**
+   * Create an indexer task for indexing the given file.
+   *
+   * <p>The reason why this is a separate method is that it can be overriden by
+   * tests that don't actually want to index files but instead provide their own
+   * index results.</p>
+   *
+   * @param fileToIndex File to index.
+   * @return Indexer task.
+   */
+  protected IndexerTask createIndexerTask(Path fileToIndex) {
+    return new IndexerTask(fileToIndex);
+  }
+
+  /**
+   * Create links in {@code htdocs/}, including all necessary parent
+   * directories.
+   *
+   * @param linksToCreate Map of links to be created with keys being link paths
+   *     and values being original file paths.
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  private void createLinks(Map<Path, Path> linksToCreate) throws IOException {
+    if (!linksToCreate.isEmpty()) {
+      logger.info("Creating {} new link(s).", linksToCreate.size());
+      for (Map.Entry<Path, Path> e : linksToCreate.entrySet()) {
+        Path linkPath = e.getKey();
+        Path originalPath = e.getValue();
+        Files.createDirectories(linkPath.getParent());
+        Files.deleteIfExists(linkPath);
+        Files.createLink(linkPath, originalPath);
+      }
+    }
+  }
+
+  /**
+   * Mark the given links for deletion in the in-memory index.
+   *
+   * @param linksToMarkForDeletion Files to be marked for deletion.
+   */
+  private void markForDeletion(Set<FileNode> linksToMarkForDeletion,
+      Instant now) {
+    if (!linksToMarkForDeletion.isEmpty()) {
+      logger.info("Marking {} old link(s) for deletion.",
+          linksToMarkForDeletion.size());
+      for (FileNode fileNode : linksToMarkForDeletion) {
+        fileNode.markedForDeletion = now;
+      }
+    }
+  }
+
+  /**
+   * Delete the given links from {@code htdocs/}.
+   *
+   * @param linksToDelete Map of links to be deleted with keys being link paths
+   *     and values being original file paths.
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  private void deleteLinks(Map<Path, Path> linksToDelete) throws IOException {
+    if (!linksToDelete.isEmpty()) {
+      logger.info("Deleting {} old link(s).", linksToDelete.size());
+      for (Map.Entry<Path, Path> e : linksToDelete.entrySet()) {
+        Path linkPath = e.getKey();
+        Path originalPath = e.getValue();
+        Files.deleteIfExists(linkPath);
+        index.remove(originalPath);
       }
     }
   }
 
+  /**
+   * Write the in-memory index to {@code index.json} and its compressed
+   * variants, but exclude files that have not yet been indexed or that are
+   * marked for deletion.
+   *
+   * @throws IOException Thrown if an I/O error occurs while writing files.
+   */
+  private void writeIndex(SortedMap<Path, FileNode> index,
+      Instant now) throws IOException {
+    IndexNode indexNode = new IndexNode();
+    indexNode.indexCreated = dateTimeFormatter.format(now);
+    indexNode.buildRevision = this.buildRevisionString;
+    indexNode.path = this.basePathString;
+    SortedMap<Path, DirectoryNode> directoryNodes = new TreeMap<>();
+    for (Map.Entry<Path, FileNode> indexEntry : index.entrySet()) {
+      Path filePath = this.indexedPath.relativize(indexEntry.getKey());
+      FileNode fileNode = indexEntry.getValue();
+      if (null == fileNode.lastModified || null != fileNode.markedForDeletion) {
+        /* Skip unindexed or deleted files. */
+        continue;
+      }
+      Path directoryPath = null;
+      DirectoryNode parentDirectoryNode = indexNode;
+      if (null != filePath.getParent()) {
+        for (Path pathPart : filePath.getParent()) {
+          directoryPath = null == directoryPath ? pathPart
+              : directoryPath.resolve(pathPart);
+          DirectoryNode directoryNode = directoryNodes.get(directoryPath);
+          if (null == directoryNode) {
+            directoryNode = new DirectoryNode();
+            directoryNode.path = pathPart.toString();
+            if (null == parentDirectoryNode.directories) {
+              parentDirectoryNode.directories = new ArrayList<>();
+            }
+            parentDirectoryNode.directories.add(directoryNode);
+            directoryNodes.put(directoryPath, directoryNode);
+          }
+          parentDirectoryNode = directoryNode;
+        }
+      }
+      if (null == parentDirectoryNode.files) {
+        parentDirectoryNode.files = new ArrayList<>();
+      }
+      parentDirectoryNode.files.add(fileNode);
+    }
+    Path htdocsIndexPath = this.indexJsonPath.getParent();
+    try (OutputStream uncompressed
+            = Files.newOutputStream(htdocsIndexPath.resolve(".index.json.tmp"));
+        OutputStream bz2Compressed = new BZip2CompressorOutputStream(
+            Files.newOutputStream(htdocsIndexPath.resolve("index.json.bz2")));
+        OutputStream gzCompressed = new GzipCompressorOutputStream(
+            Files.newOutputStream(htdocsIndexPath.resolve("index.json.gz")));
+        OutputStream xzCompressed = new XZCompressorOutputStream(
+            Files.newOutputStream(htdocsIndexPath.resolve("index.json.xz")))) {
+      objectMapper.writeValue(uncompressed, indexNode);
+      objectMapper.writeValue(bz2Compressed, indexNode);
+      objectMapper.writeValue(gzCompressed, indexNode);
+      objectMapper.writeValue(xzCompressed, indexNode);
+    }
+    Files.move(htdocsIndexPath.resolve(".index.json.tmp"), this.indexJsonPath,
+        StandardCopyOption.REPLACE_EXISTING);
+  }
 }
 
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
new file mode 100644
index 0000000..a369d08
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/DirectoryNode.java
@@ -0,0 +1,36 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.util.List;
+
+/**
+ * Directory node in {@code index.json} which is discarded after reading and
+ * re-created before writing that file.
+ */
+ at JsonPropertyOrder({ "path", "files", "directories" })
+class DirectoryNode {
+
+  /**
+   * Relative path of the directory.
+   */
+  @JsonProperty("path")
+  String path;
+
+  /**
+   * List of file objects of files available from this directory.
+   */
+  @JsonProperty("files")
+  List<FileNode> files;
+
+  /**
+   * List of directory objects of directories available from this directory.
+   */
+  @JsonProperty("directories")
+  List<DirectoryNode> directories;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
new file mode 100644
index 0000000..c007196
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/FileNode.java
@@ -0,0 +1,125 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+import java.time.Instant;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Future;
+
+/**
+ * File node in {@code index.json}, also used for storing volatile metadata
+ * like whether a descriptor file is currently being indexed or whether its
+ * link in {@code htdocs/} is marked for deletion.
+ */
+ at JsonPropertyOrder({ "path", "size", "last_modified", "types",
+    "first_published", "last_published", "sha256" })
+class FileNode {
+
+  /**
+   * Relative path of the file.
+   */
+  @JsonProperty("path")
+  String path;
+
+  /**
+   * Size of the file in bytes.
+   */
+  @JsonProperty("size")
+  Long size;
+
+  /**
+   * Timestamp when the file was last modified using pattern
+   * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   */
+  @JsonProperty("last_modified")
+  String lastModified;
+
+  /**
+   * Descriptor types as found in {@code @type} annotations of contained
+   * descriptors.
+   */
+  @JsonProperty("types")
+  SortedSet<String> types;
+
+  /**
+   * Earliest publication timestamp of contained descriptors using pattern
+   * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   */
+  @JsonProperty("first_published")
+  String firstPublished;
+
+  /**
+   * Latest publication timestamp of contained descriptors using pattern
+   * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   */
+  @JsonProperty("last_published")
+  String lastPublished;
+
+  /**
+   * SHA-256 digest of this file.
+   */
+  @JsonProperty("sha256")
+  String sha256;
+
+  /**
+   * Indexer result that will be available as soon as the indexer has completed
+   * its task.
+   */
+  @JsonIgnore
+  Future<FileNode> indexerResult;
+
+  /**
+   * Timestamp when this file was first not found anymore in {@code indexed/},
+   * used to keep the link in {@code htdocs/} around for another 2 hours before
+   * deleting it, too.
+   *
+   * <p>This field is ignored when writing {@code index.json}, because it's an
+   * internal detail that nobody else cares about. The effect is that links
+   * might be around for longer than 2 hours in case of a restart, which seems
+   * acceptable.</p>
+   */
+  @JsonIgnore
+  Instant markedForDeletion;
+
+  /**
+   * Create and return a {@link FileNode} instance with the given values.
+   *
+   * @param path Relative path of the file.
+   * @param size Size of the file in bytes.
+   * @param lastModified Timestamp when the file was last modified using pattern
+   *     {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   * @param types Descriptor types as found in {@code @type} annotations of
+   *     contained descriptors.
+   * @param firstPublished Earliest publication timestamp of contained
+   *     descriptors using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC
+   *     timezone.
+   * @param lastPublished Latest publication timestamp of contained descriptors
+   *     using pattern {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   * @param sha256 SHA-256 digest of this file.
+   *
+   * @return {@link FileNode} instance with the given values.
+   */
+  static FileNode of(String path, Long size, String lastModified,
+      Iterable<String> types, String firstPublished, String lastPublished,
+      String sha256) {
+    FileNode fileNode = new FileNode();
+    fileNode.path = path;
+    fileNode.size = size;
+    fileNode.lastModified = lastModified;
+    fileNode.types = new TreeSet<>();
+    for (String type : types) {
+      fileNode.types.add(type);
+    }
+    fileNode.firstPublished = firstPublished;
+    fileNode.lastPublished = lastPublished;
+    fileNode.sha256 = sha256;
+    return fileNode;
+  }
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
new file mode 100644
index 0000000..8b7a46b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexNode.java
@@ -0,0 +1,30 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonPropertyOrder;
+
+/**
+ * Root node in {@code index.json} containing additional information about index
+ * creation time or Git revision used for creating it.
+ */
+ at JsonPropertyOrder({ "index_created", "build_revision", "path", "files",
+    "directories" })
+class IndexNode extends DirectoryNode {
+
+  /**
+   * Timestamp when this index was created using pattern
+   * {@code "YYYY-MM-DD HH:MM"} in the UTC timezone.
+   */
+  @JsonProperty("index_created")
+  String indexCreated;
+
+  /**
+   * Git revision of this software.
+   */
+  @JsonProperty("build_revision")
+  String buildRevision;
+}
+
diff --git a/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
new file mode 100644
index 0000000..03c750b
--- /dev/null
+++ b/src/main/java/org/torproject/metrics/collector/indexer/IndexerTask.java
@@ -0,0 +1,225 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import org.torproject.descriptor.BandwidthFile;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.BridgePoolAssignment;
+import org.torproject.descriptor.BridgedbMetrics;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.DirectoryKeyCertificate;
+import org.torproject.descriptor.ExitList;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.Microdescriptor;
+import org.torproject.descriptor.RelayDirectory;
+import org.torproject.descriptor.RelayNetworkStatus;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+import org.torproject.descriptor.RelayNetworkStatusVote;
+import org.torproject.descriptor.ServerDescriptor;
+import org.torproject.descriptor.SnowflakeStats;
+import org.torproject.descriptor.TorperfResult;
+import org.torproject.descriptor.UnparseableDescriptor;
+import org.torproject.descriptor.WebServerAccessLog;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Callable;
+
+/**
+ * Callable task that indexes a given descriptor file.
+ */
+class IndexerTask implements Callable<FileNode> {
+
+  /**
+   * Class logger.
+   */
+  private static final Logger logger
+      = LoggerFactory.getLogger(IndexerTask.class);
+
+  /**
+   * Formatter for all timestamps found in {@code index.json}.
+   */
+  private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+      .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+  /**
+   * Path to the descriptor file to index.
+   */
+  private Path path;
+
+  /**
+   * Index results object, which starts out empty and gets populated as indexing
+   * proceeds.
+   */
+  private FileNode indexResult;
+
+  /**
+   * Create a new instance to parse the given descriptor file, but don't start
+   * parsing just yet.
+   *
+   * @param path Descriptor file to index.
+   */
+  IndexerTask(Path path) {
+    this.path = path;
+  }
+
+  /**
+   * Index the given file and return index results when done.
+   *
+   * @return Index results.
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  @Override
+  public FileNode call() throws IOException {
+    this.indexResult = new FileNode();
+    this.requestBasicFileAttributes();
+    this.computeFileDigest();
+    this.parseDescriptorFile();
+    return this.indexResult;
+  }
+
+  /**
+   * Request and store basic file attributes like file name, last-modified time,
+   * and size.
+   *
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  private void requestBasicFileAttributes() throws IOException {
+    this.indexResult.path = this.path.getFileName().toString();
+    this.indexResult.lastModified = dateTimeFormatter
+        .format(Files.getLastModifiedTime(this.path).toInstant());
+    this.indexResult.size = Files.size(this.path);
+  }
+
+  /**
+   * Compute and store the file's SHA-256 digest.
+   *
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  private void computeFileDigest() throws IOException {
+    try (InputStream stream = Files.newInputStream(this.path)) {
+      this.indexResult.sha256
+          = Base64.encodeBase64String(DigestUtils.sha256(stream));
+    }
+  }
+
+  /**
+   * Parse the descriptor file to extract contained descriptor types and first
+   * and last published time.
+   */
+  private void parseDescriptorFile() {
+    Long firstPublishedMillis = null;
+    Long lastPublishedMillis = null;
+    this.indexResult.types = new TreeSet<>();
+    SortedSet<String> unknownDescriptorSubclasses = new TreeSet<>();
+    for (Descriptor descriptor : DescriptorSourceFactory
+        .createDescriptorReader().readDescriptors(this.path.toFile())) {
+      if (descriptor instanceof UnparseableDescriptor) {
+        /* Skip unparseable descriptor. */
+        continue;
+      }
+      for (String annotation : descriptor.getAnnotations()) {
+        if (annotation.startsWith("@type ")) {
+          this.indexResult.types.add(annotation.substring(6));
+        }
+      }
+      Long publishedMillis;
+      if (descriptor instanceof BandwidthFile) {
+        BandwidthFile bandwidthFile = (BandwidthFile) descriptor;
+        LocalDateTime fileCreatedOrTimestamp
+            = bandwidthFile.fileCreated().isPresent()
+            ? bandwidthFile.fileCreated().get()
+            : bandwidthFile.timestamp();
+        publishedMillis = fileCreatedOrTimestamp
+            .toInstant(ZoneOffset.UTC).toEpochMilli();
+      } else if (descriptor instanceof BridgeNetworkStatus) {
+        publishedMillis = ((BridgeNetworkStatus) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof BridgePoolAssignment) {
+        publishedMillis = ((BridgePoolAssignment) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof BridgedbMetrics) {
+        publishedMillis = ((BridgedbMetrics) descriptor)
+            .bridgedbMetricsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+      } else if (descriptor instanceof DirectoryKeyCertificate) {
+        publishedMillis = ((DirectoryKeyCertificate) descriptor)
+            .getDirKeyPublishedMillis();
+      } else if (descriptor instanceof ExitList) {
+        publishedMillis = ((ExitList) descriptor)
+            .getDownloadedMillis();
+      } else if (descriptor instanceof ExtraInfoDescriptor) {
+        publishedMillis = ((ExtraInfoDescriptor) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof Microdescriptor) {
+        /* Microdescriptors don't contain useful timestamps for this purpose,
+         * but we already knew that, so there's no need to log a warning
+         * further down below. */
+        continue;
+      } else if (descriptor instanceof RelayDirectory) {
+        publishedMillis = ((RelayDirectory) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof RelayNetworkStatus) {
+        publishedMillis = ((RelayNetworkStatus) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof RelayNetworkStatusConsensus) {
+        publishedMillis = ((RelayNetworkStatusConsensus) descriptor)
+            .getValidAfterMillis();
+      } else if (descriptor instanceof RelayNetworkStatusVote) {
+        publishedMillis = ((RelayNetworkStatusVote) descriptor)
+            .getValidAfterMillis();
+      } else if (descriptor instanceof ServerDescriptor) {
+        publishedMillis = ((ServerDescriptor) descriptor)
+            .getPublishedMillis();
+      } else if (descriptor instanceof SnowflakeStats) {
+        publishedMillis = ((SnowflakeStats) descriptor)
+            .snowflakeStatsEnd().toInstant(ZoneOffset.UTC).toEpochMilli();
+      } else if (descriptor instanceof TorperfResult) {
+        publishedMillis = ((TorperfResult) descriptor)
+            .getStartMillis();
+      } else if (descriptor instanceof WebServerAccessLog) {
+        publishedMillis = ((WebServerAccessLog) descriptor)
+            .getLogDate().atStartOfDay(ZoneOffset.UTC)
+            .toInstant().toEpochMilli();
+      } else {
+        /* Skip published timestamp if descriptor type is unknown or doesn't
+         * contain such a timestamp. */
+        unknownDescriptorSubclasses.add(
+            descriptor.getClass().getSimpleName());
+        continue;
+      }
+      if (null == firstPublishedMillis
+          || publishedMillis < firstPublishedMillis) {
+        firstPublishedMillis = publishedMillis;
+      }
+      if (null == lastPublishedMillis
+          || publishedMillis > lastPublishedMillis) {
+        lastPublishedMillis = publishedMillis;
+      }
+    }
+    if (!unknownDescriptorSubclasses.isEmpty()) {
+      logger.warn("Ran into unknown/unexpected Descriptor subclass(es) in "
+          + "{}: {}. Ignoring for index.json, but maybe worth looking into.",
+          this.path, unknownDescriptorSubclasses);
+    }
+    this.indexResult.firstPublished = null == firstPublishedMillis ? null
+        : dateTimeFormatter.format(Instant.ofEpochMilli(firstPublishedMillis));
+    this.indexResult.lastPublished = null == lastPublishedMillis ? null
+        : dateTimeFormatter.format(Instant.ofEpochMilli(lastPublishedMillis));
+  }
+}
+
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index e7cadf7..65e0f99 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -76,16 +76,11 @@ BridgedbMetricsOffsetMinutes = 340
 # The URL of this instance.  This will be the base URL
 # written to index.json, i.e. please change this to the mirrors url!
 InstanceBaseUrl = https://collector.torproject.org
-# The target location for index.json and its compressed
-# versions index.json.gz, index.json.bz2, and index.json.xz
-IndexPath = index
 # The top-level directory for archived descriptors.
-ArchivePath = archive
-# The top-level directory for third party data.
-ContribPath = contrib
+IndexedPath = indexed
 # The top-level directory for the recent descriptors that were
 # published in the last 72 hours.
-RecentPath = recent
+RecentPath = indexed/recent
 # The top-level directory for the retrieved descriptors that will
 # be archived.
 OutputPath = out
@@ -93,6 +88,11 @@ OutputPath = out
 StatsPath = stats
 # Path for descriptors downloaded from other instances
 SyncPath = sync
+# Directory served via an external web server and managed by us which contains
+# (hard) links to files in ArchivePath and RecentPath and which therefore must
+# be located on the same file system. Also contains index.json and its
+# compressed versions index.json.gz, index.json.bz2, and index.json.xz.
+HtdocsPath = htdocs
 ######## Relay descriptors ########
 #
 ## Define descriptor sources
diff --git a/src/main/resources/create-tarballs.sh b/src/main/resources/create-tarballs.sh
index 5802020..695bb24 100755
--- a/src/main/resources/create-tarballs.sh
+++ b/src/main/resources/create-tarballs.sh
@@ -10,7 +10,7 @@
 # Configuration section:
 # The following path should be adjusted, if the CollecTor server layout differs.
 # All paths should be given absolute.
-ARCHIVEDIR="/srv/collector.torproject.org/collector/archive"
+ARCHIVEDIR="/srv/collector.torproject.org/collector/indexed/archive"
 WORKDIR="/srv/collector.torproject.org/collector/tarballs"
 OUTDIR="/srv/collector.torproject.org/collector/out"
 TARBALLTARGETDIR="/srv/collector.torproject.org/collector/data"
diff --git a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
index 7e9ea28..887f3ae 100644
--- a/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/metrics/collector/conf/ConfigurationTest.java
@@ -39,7 +39,7 @@ public class ConfigurationTest {
   public void testKeyCount() {
     assertEquals("The number of properties keys in enum Key changed."
         + "\n This test class should be adapted.",
-        71, Key.values().length);
+        70, Key.values().length);
   }
 
   @Test()
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
new file mode 100644
index 0000000..db00032
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/CreateIndexJsonTest.java
@@ -0,0 +1,522 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.Key;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.FileTime;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Test class for {@link CreateIndexJson}.
+ */
+public class CreateIndexJsonTest {
+
+  /**
+   * Mocked indexer task that does not actually index a file but instead waits
+   * for the test class to set index results.
+   */
+  static class MockedIndexerTask extends IndexerTask {
+
+    /**
+     * Index result, to be set by the test.
+     */
+    private FileNode result;
+
+    /**
+     * Create a new mocked indexer task for the given path.
+     *
+     * @param path Path to index.
+     */
+    MockedIndexerTask(Path path) {
+      super(path);
+    }
+
+    /**
+     * Set index results.
+     *
+     * @param result Index results.
+     */
+    synchronized void setResult(FileNode result) {
+      this.result = result;
+      this.notifyAll();
+    }
+
+    /**
+     * Execute the task by waiting for the test to set index results.
+     *
+     * @return Index results provided by the test.
+     */
+    @Override
+    public FileNode call() {
+      synchronized (this) {
+        while (null == result) {
+          try {
+            wait();
+          } catch (InterruptedException e) {
+            /* Don't care about being interrupted, just keep waiting. */
+          }
+        }
+        return this.result;
+      }
+    }
+  }
+
+  /**
+   * List of mocked indexer tasks in the order of creation.
+   */
+  private List<MockedIndexerTask> indexerTasks = new ArrayList<>();
+
+  /**
+   * Testable version of the class under test.
+   */
+  class TestableCreateIndexJson extends CreateIndexJson {
+
+    /**
+     * Create a new instance with the given configuration.
+     *
+     * @param configuration Configuration for this test.
+     */
+    TestableCreateIndexJson(Configuration configuration) {
+      super(configuration);
+    }
+
+    /**
+     * Create an indexer task that doesn't actually index a file but that can
+     * be controlled by the test, and add that task to the list of tasks.
+     *
+     * @param fileToIndex File to index.
+     * @return Created (mocked) indexer task.
+     */
+    @Override
+    protected IndexerTask createIndexerTask(Path fileToIndex) {
+      MockedIndexerTask indexerTask = new MockedIndexerTask(fileToIndex);
+      indexerTasks.add(indexerTask);
+      return indexerTask;
+    }
+
+    /**
+     * Return {@code null} as build revision string to make it easier to compare
+     * written {@code index.json} files in tests.
+     *
+     * @return Always {@code null}.
+     */
+    @Override
+    protected String obtainBuildRevision() {
+      return null;
+    }
+  }
+
+  /**
+   * Temporary folder containing all files for this test.
+   */
+  @Rule
+  public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  /**
+   * Path to recent exit list file in {@code indexed/recent/}.
+   */
+  private Path recentExitListFilePath;
+
+  /**
+   * Path to archived exit list file in {@code indexed/archive/}.
+   */
+  private Path archiveExitListFilePath;
+
+  /**
+   * Path to exit list link in {@code htdocs/recent/}.
+   */
+  private Path recentExitListLinkPath;
+
+  /**
+   * Path to {@code index.json} file in {@code htdocs/index/}.
+   */
+  private Path indexJsonPath;
+
+  /**
+   * Class under test.
+   */
+  private CreateIndexJson cij;
+
+  /**
+   * Prepares the temporary folder and configuration for this test.
+   *
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  @Before
+  public void prepareDirectoriesAndConfiguration() throws IOException {
+    Path indexedPath = this.temporaryFolder.newFolder("indexed").toPath();
+    this.recentExitListFilePath = indexedPath.resolve(
+        Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+    this.archiveExitListFilePath = indexedPath.resolve(
+        Paths.get("archive", "exit-lists", "exit-list-2016-09.tar.xz"));
+    Path htdocsPath = this.temporaryFolder.newFolder("htdocs").toPath();
+    this.recentExitListLinkPath = htdocsPath.resolve(
+        Paths.get("recent", "exit-lists", "2016-09-20-13-02-00"));
+    this.indexJsonPath = htdocsPath.resolve(
+        Paths.get("index", "index.json"));
+    Configuration configuration = new Configuration();
+    configuration.setProperty(Key.IndexedPath.name(),
+        indexedPath.toAbsolutePath().toString());
+    configuration.setProperty(Key.HtdocsPath.name(),
+        htdocsPath.toAbsolutePath().toString());
+    configuration.setProperty(Key.InstanceBaseUrl.name(),
+        "https://collector.torproject.org");
+    this.cij = new TestableCreateIndexJson(configuration);
+  }
+
+  /**
+   * First execution time.
+   */
+  private static final Instant firstExecution
+      = Instant.parse("2016-09-20T13:04:00Z");
+
+  /**
+   * Second execution time, two minutes after the first execution time, which is
+   * the default rate for executing this module.
+   */
+  private static final Instant secondExecution
+      = Instant.parse("2016-09-20T13:06:00Z");
+
+  /**
+   * Third execution, three hours later than the second execution time, to see
+   * if links to files that have been marked for deletion are actually deleted.
+   */
+  private static final Instant thirdExecution
+      = Instant.parse("2016-09-20T16:06:00Z");
+
+  /**
+   * Index result from indexing recent exit list.
+   */
+  private FileNode recentExitListFileNode = FileNode.of(
+      "2016-09-20-13-02-00", 177_090L, "2016-09-20 13:02",
+      Collections.singletonList("tordnsel 1.0"), "2016-09-20 13:02",
+      "2016-09-20 13:02", "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=");
+
+  /**
+   * Index result from indexing archived exit list.
+   */
+  private FileNode archiveExitListFileNode = FileNode.of(
+      "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-04 03:31",
+      Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+      "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+  /**
+   * Index result from indexing <i>updated</i> archived exit list.
+   */
+  private FileNode updatedArchiveExitListFileNode = FileNode.of(
+      "exit-list-2016-09.tar.xz", 1_008_748L, "2016-10-07 03:31",
+      Collections.singletonList("tordnsel 1.0"), "2016-09-01 00:02",
+      "2016-09-30 23:02", "P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=");
+
+  /**
+   * Finish the oldest indexer task by providing the given file node as index
+   * result.
+   *
+   * @param fileNode Index result.
+   */
+  private void finishIndexing(FileNode fileNode) {
+    assertFalse(this.indexerTasks.isEmpty());
+    this.indexerTasks.remove(0).setResult(fileNode);
+  }
+
+  /**
+   * (Almost) empty {@code index.json} file.
+   */
+  private static final String emptyIndexJsonString
+      = "{\"index_created\":\"2016-09-20 13:06\","
+      + "\"path\":\"https://collector.torproject.org\"}";
+
+  /**
+   * {@code index.json} file containing a single recent exit list.
+   */
+  private static final String recentExitListIndexJsonString
+      = "{\"index_created\":\"2016-09-20 13:06\","
+      + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+      + "\"path\":\"recent\",\"directories\":[{"
+      + "\"path\":\"exit-lists\",\"files\":[{"
+      + "\"path\":\"2016-09-20-13-02-00\",\"size\":177090,"
+      + "\"last_modified\":\"2016-09-20 13:02\","
+      + "\"types\":[\"tordnsel 1.0\"],"
+      + "\"first_published\":\"2016-09-20 13:02\","
+      + "\"last_published\":\"2016-09-20 13:02\","
+      + "\"sha256\":\"4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw=\"}]}]}]}";
+
+  /**
+   * {@code index.json} file containing a single archived exit list with a
+   * placeholder for the last-modified time.
+   */
+  private static final String archiveExitListIndexJsonString
+      = "{\"index_created\":\"2016-09-20 13:06\","
+      + "\"path\":\"https://collector.torproject.org\",\"directories\":[{"
+      + "\"path\":\"archive\",\"directories\":[{"
+      + "\"path\":\"exit-lists\",\"files\":[{"
+      + "\"path\":\"exit-list-2016-09.tar.xz\",\"size\":1008748,"
+      + "\"last_modified\":\"%s\","
+      + "\"types\":[\"tordnsel 1.0\"],"
+      + "\"first_published\":\"2016-09-01 00:02\","
+      + "\"last_published\":\"2016-09-30 23:02\","
+      + "\"sha256\":\"P4zUKVOJFtKzxOXpN3NLU0UBZTBqCAM95yDPJ5JH62g=\"}]}]}]}";
+
+  /**
+   * Delete the given file.
+   *
+   * @param fileToDelete Path to file to delete.
+   */
+  private static void deleteFile(Path fileToDelete) {
+    try {
+      Files.delete(fileToDelete);
+    } catch (IOException e) {
+      fail(String.format("I/O error while deleting %s.", fileToDelete));
+    }
+  }
+
+  /**
+   * Create the given file.
+   *
+   * @param fileToCreate Path to file to create.
+   * @param lastModified Last-modified time of file to create.
+   */
+  private static void createFile(Path fileToCreate, Instant lastModified) {
+    try {
+      Files.createDirectories(fileToCreate.getParent());
+      Files.createFile(fileToCreate);
+      Files.setLastModifiedTime(fileToCreate, FileTime.from(lastModified));
+    } catch (IOException e) {
+      fail(String.format("I/O error while creating %s.", fileToCreate));
+    }
+  }
+
+  /**
+   * Return whether the given file exists.
+   *
+   * @param fileToCheck Path to file to check.
+   * @return Whether the file exists.
+   */
+  private boolean fileExists(Path fileToCheck) {
+    return Files.exists(fileToCheck);
+  }
+
+  /**
+   * Change last-modified time of the given file.
+   *
+   * @param fileToChange File to change.
+   * @param lastModified New last-modified time.
+   */
+  private void changeLastModified(Path fileToChange, Instant lastModified) {
+    try {
+      Files.setLastModifiedTime(fileToChange, FileTime.from(lastModified));
+    } catch (IOException e) {
+      fail(String.format("I/O error while changing last-modified time of %s.",
+          fileToChange));
+    }
+  }
+
+  /**
+   * Write the given string to the {@code index.json} file.
+   *
+   * @param indexJsonString String to write.
+   * @param formatArguments Optional format arguments.
+   */
+  private void writeIndexJson(String indexJsonString,
+      Object ... formatArguments) {
+    try {
+      Files.createDirectories(indexJsonPath.getParent());
+      Files.write(indexJsonPath,
+          String.format(indexJsonString, formatArguments).getBytes());
+    } catch (IOException e) {
+      fail("I/O error while writing index.json file.");
+    }
+  }
+
+  /**
+   * Read and return the first line from the {@code index.json} file.
+   *
+   * @return First line from the {@code index.json} file.
+   */
+  private String readIndexJson() {
+    try {
+      return Files.readAllLines(indexJsonPath).get(0);
+    } catch (IOException e) {
+      fail("I/O error while reading index.json file.");
+      return null;
+    }
+  }
+
+  /**
+   * Run the module with the given system time.
+   *
+   * @param now Time when running the module.
+   */
+  private void startProcessing(Instant now) {
+    this.cij.startProcessing(now);
+  }
+
+  /**
+   * Test whether two executions on an empty {@code indexed/} directory produce
+   * an {@code index.json} file without any files or directories.
+   */
+  @Test
+  public void testEmptyDirs() {
+    startProcessing(firstExecution);
+    startProcessing(secondExecution);
+    assertEquals(emptyIndexJsonString, readIndexJson());
+  }
+
+  /**
+   * Test whether a new exit list in {@code indexed/recent/} gets indexed and
+   * then included in {@code index.json}.
+   */
+  @Test
+  public void testNewRecentExitList() {
+    createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+    startProcessing(firstExecution);
+    finishIndexing(this.recentExitListFileNode);
+    startProcessing(secondExecution);
+    assertEquals(recentExitListIndexJsonString, readIndexJson());
+  }
+
+  /**
+   * Test whether an existing exit list in {@code indexed/recent/} that is
+   * already contained in {@code index.json} gets ignored by the indexers.
+   */
+  @Test
+  public void testExistingRecentExitList() {
+    createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+    writeIndexJson(recentExitListIndexJsonString);
+    startProcessing(firstExecution);
+    startProcessing(secondExecution);
+    assertEquals(recentExitListIndexJsonString, readIndexJson());
+  }
+
+  /**
+   * Test whether a deleted exit list in {@code indexed/recent/} is first
+   * removed from {@code index.json} and later deleted from
+   * {@code htdocs/recent/}.
+   */
+  @Test
+  public void testDeletedRecentExitList() {
+    createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+    writeIndexJson(recentExitListIndexJsonString);
+    startProcessing(firstExecution);
+    assertTrue(fileExists(recentExitListLinkPath));
+    deleteFile(recentExitListFilePath);
+    startProcessing(secondExecution);
+    assertEquals(emptyIndexJsonString, readIndexJson());
+    fileExists(recentExitListLinkPath);
+    assertTrue(fileExists(recentExitListLinkPath));
+    startProcessing(thirdExecution);
+    assertFalse(fileExists(recentExitListLinkPath));
+  }
+
+  /**
+   * Test whether a link in {@code htdocs/recent/} for which no corresponding
+   * file in {@code indexed/recent/} exists is eventually deleted.
+   */
+  @Test
+  public void testDeletedLink() {
+    createFile(recentExitListLinkPath, Instant.parse("2016-09-20T13:02:00Z"));
+    startProcessing(firstExecution);
+    assertTrue(Files.exists(recentExitListLinkPath));
+    startProcessing(secondExecution);
+    assertTrue(Files.exists(recentExitListLinkPath));
+    startProcessing(thirdExecution);
+    assertFalse(Files.exists(recentExitListLinkPath));
+  }
+
+  /**
+   * Test whether a tarball that gets deleted while being indexed is not
+   * included in {@code index.json} even after indexing is completed.
+   */
+  @Test
+  public void testIndexingDisappearingTarball() {
+    createFile(recentExitListFilePath, Instant.parse("2016-09-20T13:02:00Z"));
+    startProcessing(firstExecution);
+    deleteFile(recentExitListFilePath);
+    finishIndexing(recentExitListFileNode);
+    startProcessing(secondExecution);
+    assertEquals(emptyIndexJsonString, readIndexJson());
+  }
+
+  /**
+   * Test whether a tarball that gets updated in {@code indexed/archive/} gets
+   * re-indexed and updated in {@code index.json}.
+   */
+  @Test
+  public void testUpdatedFile() {
+    writeIndexJson(archiveExitListIndexJsonString, "2016-10-04 03:31");
+    createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+    startProcessing(firstExecution);
+    finishIndexing(updatedArchiveExitListFileNode);
+    startProcessing(secondExecution);
+    assertEquals(String.format(archiveExitListIndexJsonString,
+        "2016-10-07 03:31"), readIndexJson());
+  }
+
+  /**
+   * Test whether a tarball that gets updated while being indexed is not
+   * included in {@code index.json} even after indexing is completed.
+   */
+  @Test
+  public void testUpdateFileWhileIndexing() {
+    createFile(archiveExitListFilePath, Instant.parse("2016-10-07T03:31:00Z"));
+    startProcessing(firstExecution);
+    changeLastModified(archiveExitListFilePath,
+        Instant.parse("2016-10-07T03:31:00Z"));
+    finishIndexing(archiveExitListFileNode);
+    startProcessing(secondExecution);
+    assertEquals(String.format(archiveExitListIndexJsonString,
+        "2016-10-04 03:31"), readIndexJson());
+  }
+
+  /**
+   * Test whether a tarball that gets updated after being indexed but before
+   * being included in {@code index.json} is not being updated in
+   * {@code index.json} until the updated file is being indexed. */
+  @Test
+  public void testUpdateFileAfterIndexing() {
+    createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+    startProcessing(firstExecution);
+    finishIndexing(archiveExitListFileNode);
+    changeLastModified(archiveExitListFilePath,
+        Instant.parse("2016-10-07T03:31:00Z"));
+    startProcessing(secondExecution);
+    assertEquals(String.format(archiveExitListIndexJsonString,
+        "2016-10-04 03:31"), readIndexJson());
+  }
+
+  /**
+   * Test whether a long-running indexer task is being given the time to finish,
+   * rather than starting another task for the same file.
+   */
+  @Test
+  public void testLongRunningIndexerTask() {
+    createFile(archiveExitListFilePath, Instant.parse("2016-10-04T03:31:00Z"));
+    startProcessing(firstExecution);
+    startProcessing(secondExecution);
+    assertEquals(emptyIndexJsonString, readIndexJson());
+    finishIndexing(archiveExitListFileNode);
+    startProcessing(thirdExecution);
+    assertTrue(this.indexerTasks.isEmpty());
+  }
+}
+
diff --git a/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
new file mode 100644
index 0000000..8e5e6f4
--- /dev/null
+++ b/src/test/java/org/torproject/metrics/collector/indexer/IndexerTaskTest.java
@@ -0,0 +1,226 @@
+/* Copyright 2019 The Tor Project
+ * See LICENSE for licensing information */
+
+package org.torproject.metrics.collector.indexer;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.FileTime;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Test class for {@link IndexerTask}.
+ */
+ at RunWith(Parameterized.class)
+public class IndexerTaskTest {
+
+  @Parameterized.Parameter
+  public String path;
+
+  @Parameterized.Parameter(1)
+  public Long size;
+
+  @Parameterized.Parameter(2)
+  public String lastModified;
+
+  @Parameterized.Parameter(3)
+  public String[] types;
+
+  @Parameterized.Parameter(4)
+  public String firstPublished;
+
+  @Parameterized.Parameter(5)
+  public String lastPublished;
+
+  @Parameterized.Parameter(6)
+  public String sha256;
+
+  /**
+   * Initialize test parameters.
+   *
+   * @return Test parameters.
+   */
+  @Parameterized.Parameters
+  public static Collection<Object[]> pathFilename() {
+    return Arrays.asList(new Object[][]{
+
+        {"2016-09-20-13-00-00-consensus", /* Path in src/test/resources/ */
+            1_618_103L, /* Size in bytes */
+            "2017-09-07 12:13", /* Last-modified time */
+            new String[] { "network-status-consensus-3 1.17" }, /* Types */
+            "2016-09-20 13:00", /* First published */
+            "2016-09-20 13:00", /* Last published */
+            "3mLpDZmP/NSgOgmuPDyljxh0Lup1L6FtD16266ZCGAw="}, /* SHA-256 */
+
+        {"2016-09-20-13-00-00-vote-49015F787433103580E3B66A1707A00E60F2D15B-"
+            + "60ADC6BEC262AE921A1037D54C8A3976367DBE87",
+            3_882_514L,
+            "2017-09-07 12:13",
+            new String[] { "network-status-vote-3 1.17" },
+            "2016-09-20 13:00",
+            "2016-09-20 13:00",
+            "UCnSSrvdm26dJOriFgEQNQVrBLpVKbH/fF0VPRX3TGc="},
+
+        {"2016-09-20-13-02-00",
+            177_090L,
+            "2017-01-13 16:55",
+            new String[] { "tordnsel 1.0" },
+            "2016-09-20 13:02",
+            "2016-09-20 13:02",
+            "4aXdw+jQ5O33AS8n+fUOwD5ZzHCICnwzvxkK8fWDhdw="},
+
+        {"2016-10-01-16-00-00-vote-0232AF901C31A04EE9848595AF9BB7620D4C5B2E-"
+            + "FEE63B4AB7CE5A6BDD09E9A5C4F01BD61EB7E4F1",
+            3_226_152L,
+            "2017-01-13 16:55",
+            new String[] { "network-status-vote-3 1.0" },
+            "2016-10-01 16:00",
+            "2016-10-01 16:00",
+            "bilv6zEXr0Y9f5o24RMN0lUujsJJiSQAn9LkG0XJrZE="},
+
+        {"2016-10-02-17-00-00-consensus-microdesc",
+            1_431_627L,
+            "2017-09-07 12:13",
+            new String[] { "network-status-microdesc-consensus-3 1.17" },
+            "2016-10-02 17:00",
+            "2016-10-02 17:00",
+            "rrkxuLahYENLExX99Jio587/kUz9NtOoaYyKXxvX5EA="},
+
+        {"20160920-063816-1D8F3A91C37C5D1C4C19B1AD1D0CFBE8BF72D8E1",
+            339_256L,
+            "2017-09-07 12:13",
+            new String[] { "bridge-network-status 1.17" },
+            "2016-09-20 06:38",
+            "2016-09-20 06:38",
+            "sMAcyFrZ2rxj50b6iGe3icCNMC4gBSA1y9ZH4EWTa8s="},
+
+        {"bridge-2016-10-02-08-09-00-extra-infos",
+            11_561L,
+            "2017-09-07 12:13",
+            new String[] { "bridge-extra-info 1.3" },
+            "2016-10-02 06:09",
+            "2016-10-02 06:09",
+            "hat+vbyE04eH9JBQa0s6ezB6sLaStUUhvUj8CZ1aoEY="},
+
+        {"bridge-2016-10-02-16-09-00-server-descriptors",
+            5_336L,
+            "2017-01-13 16:55",
+            new String[] { "bridge-server-descriptor 1.2" },
+            "2016-10-02 14:09",
+            "2016-10-02 14:09",
+            "6CtHdo+eRFOi5xBjJcOVszC1hibC5gTB+YWvn1VmIIc="},
+
+        {"moria-1048576-2016-10-05.tpf",
+            20_405L,
+            "2017-09-07 12:13",
+            new String[0],
+            null,
+            null,
+            "DZyk6c0lQQ7OVZo1cmA+SuxPA+1thmuiooVifQPPOiA="},
+
+        {"op-nl-1048576-2017-04-11.tpf",
+            4_220L,
+            "2017-09-20 12:14",
+            new String[] { "torperf 1.1" },
+            "2017-04-11 06:24",
+            "2017-04-11 15:54",
+            "Gwex5yN3+s2PrhekjA68XmPg+UorOfx7mUa4prd7Dt8="},
+
+        {"relay-2016-10-02-08-05-00-extra-infos",
+            20_541L,
+            "2017-01-13 16:55",
+            new String[] { "extra-info 1.0" },
+            "2016-10-02 07:01",
+            "2016-10-02 07:01",
+            "3ZSO3+9ed9OwMVPx2LcVIiJfC+O30eEXEdbz64Hrp0w="},
+
+        {"relay-2016-10-02-16-05-00-server-descriptors",
+            17_404L,
+            "2017-01-13 16:55",
+            new String[] { "server-descriptor 1.0" },
+            "2016-10-02 14:58",
+            "2016-10-02 15:01",
+            "uWKHHzq4+oVNdOGh0mfkLUSjwGrBlLtEtN2DtF5qcLU="},
+
+        {"siv-1048576-2016-10-03.tpf",
+            39_193L,
+            "2017-01-13 16:55",
+            new String[] { "torperf 1.0" },
+            "2016-10-03 00:02",
+            "2016-10-03 23:32",
+            "paaFPI6BVuIDQ32aIuHYNCuKmBvFxsDvVCCwp+oM0GE="},
+
+        {"torperf-51200-2016-10-02.tpf",
+            233_763L,
+            "2017-01-13 16:55",
+            new String[] { "torperf 1.0" },
+            "2016-10-02 00:00",
+            "2016-10-02 23:55",
+            "fqeVAXamvB4yQ/8UlZAxhJx0+1Y7IfipqIpOUqQ57rE="}
+    });
+  }
+
+  /**
+   * Formatter for all timestamps found in {@code index.json}.
+   */
+  private static DateTimeFormatter dateTimeFormatter = DateTimeFormatter
+      .ofPattern("uuuu-MM-dd HH:mm").withZone(ZoneOffset.UTC);
+
+  /**
+   * Temporary folder containing all files for this test.
+   */
+  @Rule
+  public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  /**
+   * Test indexing a file.
+   *
+   * @throws IOException Thrown if an I/O error occurs.
+   */
+  @Test
+  public void testIndexFile() throws IOException {
+    Path indexedDirectory = this.temporaryFolder.newFolder().toPath();
+    Path temporaryFile = indexedDirectory.resolve(this.path);
+    try (InputStream is = getClass()
+        .getClassLoader().getResourceAsStream(this.path)) {
+      if (null == is) {
+        fail(String.format("Unable to read test resource %s.", this.path));
+        return;
+      }
+      Files.copy(is, temporaryFile);
+    }
+    Files.setLastModifiedTime(temporaryFile,
+        FileTime.from(LocalDateTime.parse(this.lastModified, dateTimeFormatter)
+            .toInstant(ZoneOffset.UTC)));
+    assertTrue(Files.exists(temporaryFile));
+    IndexerTask indexerTask = new IndexerTask(temporaryFile);
+    FileNode indexResult = indexerTask.call();
+    assertEquals(this.path, indexResult.path);
+    assertEquals(this.size, indexResult.size);
+    assertEquals(this.lastModified, indexResult.lastModified);
+    SortedSet<String> expectedTypes = new TreeSet<>(Arrays.asList(this.types));
+    assertEquals(expectedTypes, indexResult.types);
+    assertEquals(this.firstPublished, indexResult.firstPublished);
+    assertEquals(this.lastPublished, indexResult.lastPublished);
+    assertEquals(this.sha256, indexResult.sha256);
+  }
+}
+



More information about the tor-commits mailing list