[tor-commits] [metrics-db/master] Make copying to rsync/ more efficient.

karsten at torproject.org karsten at torproject.org
Sat Oct 27 20:07:46 UTC 2012


commit 663cdab7217ef66fec2c60757c88c42bd2e08d8f
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Fri Oct 26 20:57:08 2012 -0400

    Make copying to rsync/ more efficient.
    
    Each module now writes to its own subdirectory in rsync/ in parallel to
    writing to its output directory for making tarballs.  This should be more
    efficient than going over the output directories and copying files to
    rsync/.
---
 .../db/bridgedescs/SanitizedBridgesWriter.java     |  107 ++++++++++--------
 .../BridgePoolAssignmentsProcessor.java            |   51 ++++++---
 .../ernie/db/exitlists/ExitListDownloader.java     |   50 ++++++--
 .../ernie/db/main/RsyncDataProvider.java           |  118 --------------------
 .../ernie/db/relaydescs/ArchiveWriter.java         |  109 ++++++++++--------
 .../ernie/db/torperf/TorperfDownloader.java        |   42 +++++--
 6 files changed, 226 insertions(+), 251 deletions(-)

diff --git a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
index 8db267a..87593bd 100644
--- a/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/org/torproject/ernie/db/bridgedescs/SanitizedBridgesWriter.java
@@ -15,9 +15,11 @@ import java.security.SecureRandom;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
+import java.util.Stack;
 import java.util.TimeZone;
 import java.util.TreeMap;
 import java.util.logging.Level;
@@ -28,7 +30,6 @@ import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
 
 /**
  * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
@@ -192,16 +193,7 @@ public class SanitizedBridgesWriter extends Thread {
     // Finish writing sanitized bridge descriptors to disk
     this.finishWriting();
 
-    // Copy sanitized bridge descriptors from the last 3 days to the rsync
-    // directory.
-    RsyncDataProvider rdp = new RsyncDataProvider();
-    rdp.copyFiles(new File(sanitizedBridgesDirectory, "statuses"),
-        "bridge-descriptors/statuses");
-    rdp.copyFiles(
-        new File(sanitizedBridgesDirectory, "server-descriptor"),
-        "bridge-descriptors/server-descriptors");
-    rdp.copyFiles(new File(sanitizedBridgesDirectory, "extra-info"),
-        "bridge-descriptors/extra-infos");
+    this.cleanUpRsyncDirectory();
   }
 
   private String scrubOrAddress(String orAddress, byte[] fingerprintBytes,
@@ -519,32 +511,31 @@ public class SanitizedBridgesWriter extends Thread {
 
     /* Write the sanitized network status to disk. */
     try {
-
-      /* Determine file name. */
       String syear = publicationTime.substring(0, 4);
       String smonth = publicationTime.substring(5, 7);
       String sday = publicationTime.substring(8, 10);
       String stime = publicationTime.substring(11, 13)
           + publicationTime.substring(14, 16)
           + publicationTime.substring(17, 19);
-      File statusFile = new File(
+      File tarballFile = new File(
           this.sanitizedBridgesDirectory.getAbsolutePath() + "/" + syear
           + "/" + smonth + "/statuses/" + sday + "/" + syear + smonth
           + sday + "-" + stime + "-"
           + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
-
-      /* Create all parent directories to write this network status. */
-      statusFile.getParentFile().mkdirs();
-
-      /* Write sanitized network status to disk. */
-      BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
-      bw.write("@type bridge-network-status 1.0\n");
-      bw.write("published " + publicationTime + "\n");
-      for (String scrubbed : scrubbedLines.values()) {
-        bw.write(scrubbed);
+      File rsyncFile = new File("rsync/bridge-descriptors/statuses/"
+          + tarballFile.getName());
+      File[] outputFiles = new File[] { tarballFile, rsyncFile };
+      for (File outputFile : outputFiles) {
+        outputFile.getParentFile().mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            outputFile));
+        bw.write("@type bridge-network-status 1.0\n");
+        bw.write("published " + publicationTime + "\n");
+        for (String scrubbed : scrubbedLines.values()) {
+          bw.write(scrubbed);
+        }
+        bw.close();
       }
-      bw.close();
-
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not write sanitized bridge "
           + "network status to disk.", e);
@@ -781,22 +772,26 @@ public class SanitizedBridgesWriter extends Thread {
     }
     String dyear = published.substring(0, 4);
     String dmonth = published.substring(5, 7);
-    File newFile = new File(
+    File tarballFile = new File(
         this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
         + dyear + "/" + dmonth + "/server-descriptors/"
         + "/" + descriptorDigest.charAt(0) + "/"
         + descriptorDigest.charAt(1) + "/"
         + descriptorDigest);
-
-    /* Write sanitized server descriptor to disk, including all its parent
-     * directories. */
+    File rsyncFile = new File(
+        "rsync/bridge-descriptors/server-descriptors/"
+        + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
     try {
-      newFile.getParentFile().mkdirs();
-      BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
-      bw.write("@type bridge-server-descriptor 1.0\n");
-      bw.write(scrubbedDesc);
-      bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
-      bw.close();
+      for (File outputFile : outputFiles) {
+        outputFile.getParentFile().mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            outputFile));
+        bw.write("@type bridge-server-descriptor 1.0\n");
+        bw.write(scrubbedDesc);
+        bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+        bw.close();
+      }
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not write sanitized server "
           + "descriptor to disk.", e);
@@ -910,22 +905,25 @@ public class SanitizedBridgesWriter extends Thread {
     }
     String dyear = published.substring(0, 4);
     String dmonth = published.substring(5, 7);
-    File newFile = new File(
+    File tarballFile = new File(
         this.sanitizedBridgesDirectory.getAbsolutePath() + "/"
         + dyear + "/" + dmonth + "/extra-infos/"
         + descriptorDigest.charAt(0) + "/"
         + descriptorDigest.charAt(1) + "/"
         + descriptorDigest);
-
-    /* Write sanitized extra-info descriptor to disk, including all its
-     * parent directories. */
+    File rsyncFile = new File("rsync/bridge-descriptors/extra-infos/"
+        + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
     try {
-      newFile.getParentFile().mkdirs();
-      BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
-      bw.write("@type bridge-extra-info 1.1\n");
-      bw.write(scrubbedDesc);
-      bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
-      bw.close();
+      for (File outputFile : outputFiles) {
+        outputFile.getParentFile().mkdirs();
+        BufferedWriter bw = new BufferedWriter(new FileWriter(
+            outputFile));
+        bw.write("@type bridge-extra-info 1.1\n");
+        bw.write(scrubbedDesc);
+        bw.write("router-digest " + descriptorDigest.toUpperCase() + "\n");
+        bw.close();
+      }
     } catch (Exception e) {
       this.logger.log(Level.WARNING, "Could not write sanitized "
           + "extra-info descriptor to disk.", e);
@@ -969,5 +967,22 @@ public class SanitizedBridgesWriter extends Thread {
       }
     }
   }
+
+  /* Delete all files from the rsync directory that have not been modified
+   * in the last three days. */
+  public void cleanUpRsyncDirectory() {
+    long cutOffMillis = System.currentTimeMillis()
+        - 3L * 24L * 60L * 60L * 1000L;
+    Stack<File> allFiles = new Stack<File>();
+    allFiles.add(new File("rsync/bridge-descriptors"));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        allFiles.addAll(Arrays.asList(file.listFiles()));
+      } else if (file.lastModified() < cutOffMillis) {
+        file.delete();
+      }
+    }
+  }
 }
 
diff --git a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
index 4f8dcb6..0ac6f90 100644
--- a/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
+++ b/src/org/torproject/ernie/db/bridgepools/BridgePoolAssignmentsProcessor.java
@@ -27,7 +27,6 @@ import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
 
 public class BridgePoolAssignmentsProcessor extends Thread {
 
@@ -121,19 +120,26 @@ public class BridgePoolAssignmentsProcessor extends Thread {
                 long bridgePoolAssignmentTime = assignmentFormat.parse(
                     bridgePoolAssignmentLine.substring(
                     "bridge-pool-assignment ".length())).getTime();
-                File sanitizedAssignmentsFile = new File(
+                File tarballFile = new File(
                     sanitizedAssignmentsDirectory, filenameFormat.format(
                     bridgePoolAssignmentTime));
-                if (!sanitizedAssignmentsFile.exists()) {
-                  sanitizedAssignmentsFile.getParentFile().mkdirs();
-                  BufferedWriter bw = new BufferedWriter(new FileWriter(
-                      sanitizedAssignmentsFile));
-                  bw.write("@type bridge-pool-assignment 1.0\n");
-                  bw.write(bridgePoolAssignmentLine + "\n");
-                  for (String assignmentLine : sanitizedAssignments) {
-                    bw.write(assignmentLine + "\n");
+                File rsyncFile = new File(
+                    "rsync/bridge-pool-assignments/"
+                    + tarballFile.getName());
+                File[] outputFiles = new File[] { tarballFile,
+                    rsyncFile };
+                for (File outputFile : outputFiles) {
+                  if (!outputFile.exists()) {
+                    outputFile.getParentFile().mkdirs();
+                    BufferedWriter bw = new BufferedWriter(new FileWriter(
+                        outputFile));
+                    bw.write("@type bridge-pool-assignment 1.0\n");
+                    bw.write(bridgePoolAssignmentLine + "\n");
+                    for (String assignmentLine : sanitizedAssignments) {
+                      bw.write(assignmentLine + "\n");
+                    }
+                    bw.close();
                   }
-                  bw.close();
                 }
               } catch (IOException e) {
                 logger.log(Level.WARNING, "Could not write sanitized "
@@ -186,13 +192,26 @@ public class BridgePoolAssignmentsProcessor extends Thread {
       }
     }
 
-    // Copy sanitized bridge pool assignments from the last 3 days to the
-    // rsync directory.
-    RsyncDataProvider rdp = new RsyncDataProvider();
-    rdp.copyFiles(sanitizedAssignmentsDirectory,
-        "bridge-pool-assignments");
+    this.cleanUpRsyncDirectory();
 
     logger.info("Finished processing bridge pool assignment file(s).");
   }
+
+  /* Delete all files from the rsync directory that have not been modified
+   * in the last three days. */
+  public void cleanUpRsyncDirectory() {
+    long cutOffMillis = System.currentTimeMillis()
+        - 3L * 24L * 60L * 60L * 1000L;
+    Stack<File> allFiles = new Stack<File>();
+    allFiles.add(new File("rsync/bridge-pool-assignments"));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        allFiles.addAll(Arrays.asList(file.listFiles()));
+      } else if (file.lastModified() < cutOffMillis) {
+        file.delete();
+      }
+    }
+  }
 }
 
diff --git a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
index bbd93ac..26e944c 100644
--- a/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
+++ b/src/org/torproject/ernie/db/exitlists/ExitListDownloader.java
@@ -10,6 +10,7 @@ import java.io.IOException;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.text.SimpleDateFormat;
+import java.util.Arrays;
 import java.util.Date;
 import java.util.SortedSet;
 import java.util.Stack;
@@ -19,7 +20,6 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
 
 public class ExitListDownloader extends Thread {
 
@@ -55,24 +55,33 @@ public class ExitListDownloader extends Thread {
           new SimpleDateFormat("yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
       printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
       Date downloadedDate = new Date();
-      File exitListFile = new File("exitlist/" + printFormat.format(
+      File tarballFile = new File("exitlist/" + printFormat.format(
           downloadedDate));
-      exitListFile.getParentFile().mkdirs();
+      tarballFile.getParentFile().mkdirs();
+      File rsyncFile = new File("rsync/exit-lists/"
+          + tarballFile.getName());
+      rsyncFile.getParentFile().mkdirs();
       SimpleDateFormat dateTimeFormat =
           new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-      dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      BufferedWriter bw = new BufferedWriter(new FileWriter(
-          exitListFile));
-      bw.write("@type tordnsel 1.0\n");
-      bw.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+      BufferedWriter bwT = new BufferedWriter(new FileWriter(
+          tarballFile));
+      BufferedWriter bwR = new BufferedWriter(new FileWriter(
+          rsyncFile));
+      bwT.write("@type tordnsel 1.0\n");
+      bwT.write("Downloaded " + dateTimeFormat.format(downloadedDate)
+          + "\n");
+      bwR.write("@type tordnsel 1.0\n");
+      bwR.write("Downloaded " + dateTimeFormat.format(downloadedDate)
           + "\n");
       int len;
       byte[] data = new byte[1024];
       while ((len = in.read(data, 0, 1024)) >= 0) {
-        bw.write(new String(data, 0, len));
+        bwT.write(new String(data, 0, len));
+        bwR.write(new String(data, 0, len));
       }   
       in.close();
-      bw.close();
+      bwT.close();
+      bwR.close();
       logger.fine("Finished downloading exit list.");
     } catch (IOException e) {
       logger.log(Level.WARNING, "Failed downloading exit list", e);
@@ -110,9 +119,24 @@ public class ExitListDownloader extends Thread {
     }
     logger.info(dumpStats.toString());
 
-    /* Copy exit lists from the last 3 days to the rsync directory. */
-    RsyncDataProvider rdp = new RsyncDataProvider();
-    rdp.copyFiles(new File("exitlist"), "exit-lists");
+    this.cleanUpRsyncDirectory();
+  }
+
+  /* Delete all files from the rsync directory that have not been modified
+   * in the last three days. */
+  public void cleanUpRsyncDirectory() {
+    long cutOffMillis = System.currentTimeMillis()
+        - 3L * 24L * 60L * 60L * 1000L;
+    Stack<File> allFiles = new Stack<File>();
+    allFiles.add(new File("rsync/exit-lists"));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        allFiles.addAll(Arrays.asList(file.listFiles()));
+      } else if (file.lastModified() < cutOffMillis) {
+        file.delete();
+      }
+    }
   }
 }
 
diff --git a/src/org/torproject/ernie/db/main/RsyncDataProvider.java b/src/org/torproject/ernie/db/main/RsyncDataProvider.java
deleted file mode 100644
index 7788584..0000000
--- a/src/org/torproject/ernie/db/main/RsyncDataProvider.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/* Copyright 2012 The Tor Project
- * See LICENSE for licensing information */
-package org.torproject.ernie.db.main;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.Stack;
-import java.util.logging.Logger;
-
-/**
- * Copy files published in the last 3 days to a local directory that can
- * then be served via rsync.
- */
-public class RsyncDataProvider {
-
-  private Logger logger;
-
-  private long cutOffMillis;
-
-  private File rsyncDirectory;
-
-  public RsyncDataProvider() {
-
-    /* Initialize logger. */
-    this.logger = Logger.getLogger(RsyncDataProvider.class.getName());
-
-    /* Determine the cut-off time for files in rsync/. */
-    this.cutOffMillis = System.currentTimeMillis()
-        - 3L * 24L * 60L * 60L * 1000L;
-
-    /* Create rsync/ directory if it doesn't exist. */
-    this.rsyncDirectory = new File("rsync");
-    if (!rsyncDirectory.exists()) {
-      rsyncDirectory.mkdirs();
-    }
-  }
-
-  public void copyFiles(File fromDirectory, String toRsyncSubDirectory) {
-
-    File toDirectory = new File(this.rsyncDirectory, toRsyncSubDirectory);
-
-    /* Make a list of all files in the rsync/ subdirectory to delete those
-     * that we didn't copy in this run. */
-    Set<String> fileNamesInRsync = new HashSet<String>();
-    Stack<File> files = new Stack<File>();
-    files.add(toDirectory);
-    while (!files.isEmpty()) {
-      File pop = files.pop();
-      if (pop.isDirectory()) {
-        files.addAll(Arrays.asList(pop.listFiles()));
-      } else {
-        fileNamesInRsync.add(pop.getName());
-      }
-    }
-    logger.info("Found " + fileNamesInRsync.size() + " files in "
-        + toDirectory.getAbsolutePath() + " that we're either "
-        + "overwriting or deleting in this execution.");
-
-    /* Copy files modified in the last 3 days. */
-    files.add(fromDirectory);
-    while (!files.isEmpty()) {
-      File pop = files.pop();
-      if (pop.isDirectory()) {
-        files.addAll(Arrays.asList(pop.listFiles()));
-      } else if (pop.lastModified() >= this.cutOffMillis) {
-        String fileName = pop.getName();
-        this.copyFile(pop, new File(toDirectory, fileName));
-        fileNamesInRsync.remove(fileName);
-      }
-    }
-
-    /* Delete all files that we didn't (over-)write in this run. */
-    files.add(toDirectory);
-    while (!files.isEmpty()) {
-      File pop = files.pop();
-      if (pop.isDirectory()) {
-        files.addAll(Arrays.asList(pop.listFiles()));
-      } else if (fileNamesInRsync.contains(pop.getName())) {
-        fileNamesInRsync.remove(pop.getName());
-        pop.delete();
-      }
-    }
-    logger.info("After deleting files that we didn't overwrite in this "
-        + "run, there are " + fileNamesInRsync.size() + " files left in "
-        + toDirectory.getAbsolutePath() + ".");
-  }
-
-  private void copyFile(File from, File to) {
-    if (from.exists() && to.exists() &&
-        from.lastModified() == to.lastModified() &&
-        from.length() == to.length()) {
-      return;
-    }
-    try {
-      to.getParentFile().mkdirs();
-      FileInputStream fis = new FileInputStream(from);
-      BufferedInputStream bis = new BufferedInputStream(fis);
-      FileOutputStream fos = new FileOutputStream(to);
-      int len;
-      byte[] data = new byte[1024];
-      while ((len = bis.read(data, 0, 1024)) >= 0) {
-        fos.write(data, 0, len);
-      }
-      bis.close();
-      fos.close();
-      to.setLastModified(from.lastModified());
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-}
-
diff --git a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
index 5c3d2e9..f95bbf7 100644
--- a/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
+++ b/src/org/torproject/ernie/db/relaydescs/ArchiveWriter.java
@@ -10,6 +10,7 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.Arrays;
 import java.util.Date;
 import java.util.List;
 import java.util.SortedSet;
@@ -25,7 +26,6 @@ import org.torproject.descriptor.DescriptorParser;
 import org.torproject.descriptor.DescriptorSourceFactory;
 import org.torproject.descriptor.impl.DescriptorParseException;
 import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
 
 public class ArchiveWriter extends Thread {
 
@@ -94,51 +94,36 @@ public class ArchiveWriter extends Thread {
     // Write output to disk that only depends on relay descriptors
     this.dumpStats();
 
-    /* Copy relay descriptors from the last 3 days to the rsync
-     * directory. */
-    RsyncDataProvider rsdp = new RsyncDataProvider();
-    rsdp.copyFiles(
-        new File(outputDirectory, "consensus"),
-        "relay-descriptors/consensuses");
-    rsdp.copyFiles(
-        new File(outputDirectory, "vote"),
-        "relay-descriptors/votes");
-    rsdp.copyFiles(
-        new File(outputDirectory, "server-descriptor"),
-        "relay-descriptors/server-descriptors");
-    rsdp.copyFiles(
-        new File(outputDirectory, "extra-info"),
-        "relay-descriptors/extra-infos");
+    this.cleanUpRsyncDirectory();
   }
 
   private boolean store(byte[] typeAnnotation, byte[] data,
-      String filename) {
+      File[] outputFiles) {
     try {
-      File file = new File(filename);
-      if (!file.exists()) {
-        this.logger.finer("Storing " + filename);
-        if (this.descriptorParser.parseDescriptors(data, filename).size()
-            != 1) {
-          this.logger.info("Relay descriptor file " + filename
-              + " doesn't contain exactly one descriptor.  Not storing.");
-          return false;
-        }
-        file.getParentFile().mkdirs();
+      this.logger.finer("Storing " + outputFiles[0]);
+      if (this.descriptorParser.parseDescriptors(data,
+          outputFiles[0].getName()).size() != 1) {
+        this.logger.info("Relay descriptor file " + outputFiles[0]
+            + " doesn't contain exactly one descriptor.  Not storing.");
+        return false;
+      }
+      for (File outputFile : outputFiles) {
+        outputFile.getParentFile().mkdirs();
         BufferedOutputStream bos = new BufferedOutputStream(
-            new FileOutputStream(file));
+            new FileOutputStream(outputFile));
         if (data.length > 0 && data[0] != '@') {
           bos.write(typeAnnotation, 0, typeAnnotation.length);
         }
         bos.write(data, 0, data.length);
         bos.close();
-        return true;
       }
+      return true;
     } catch (DescriptorParseException e) {
       this.logger.log(Level.WARNING, "Could not parse relay descriptor "
-          + filename + " before storing it to disk.  Skipping.", e);
+          + outputFiles[0] + " before storing it to disk.  Skipping.", e);
     } catch (IOException e) {
       this.logger.log(Level.WARNING, "Could not store relay descriptor "
-          + filename, e);
+          + outputFiles[0], e);
     }
     return false;
   }
@@ -149,9 +134,12 @@ public class ArchiveWriter extends Thread {
     SimpleDateFormat printFormat = new SimpleDateFormat(
         "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String filename = outputDirectory + "/consensus/"
-        + printFormat.format(new Date(validAfter)) + "-consensus";
-    if (this.store(CONSENSUS_ANNOTATION, data, filename)) {
+    File tarballFile = new File(this.outputDirectory + "/consensus/"
+        + printFormat.format(new Date(validAfter)) + "-consensus");
+    File rsyncFile = new File("rsync/relay-descriptors/consensuses/"
+        + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(CONSENSUS_ANNOTATION, data, outputFiles)) {
       this.storedConsensuses++;
     }
   }
@@ -163,10 +151,13 @@ public class ArchiveWriter extends Thread {
     SimpleDateFormat printFormat = new SimpleDateFormat(
         "yyyy/MM/dd/yyyy-MM-dd-HH-mm-ss");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String filename = outputDirectory + "/vote/"
+    File tarballFile = new File(this.outputDirectory + "/vote/"
         + printFormat.format(new Date(validAfter)) + "-vote-"
-        + fingerprint + "-" + digest;
-    if (this.store(VOTE_ANNOTATION, data, filename)) {
+        + fingerprint + "-" + digest);
+    File rsyncFile = new File("rsync/relay-descriptors/votes/"
+        + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(VOTE_ANNOTATION, data, outputFiles)) {
       this.storedVotes++;
     }
   }
@@ -178,9 +169,10 @@ public class ArchiveWriter extends Thread {
     SimpleDateFormat printFormat = new SimpleDateFormat(
         "yyyy-MM-dd-HH-mm-ss");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String filename = outputDirectory + "/certs/"
-        + fingerprint + "-" + printFormat.format(new Date(published));
-    if (this.store(CERTIFICATE_ANNOTATION, data, filename)) {
+    File tarballFile = new File(this.outputDirectory + "/certs/"
+        + fingerprint + "-" + printFormat.format(new Date(published)));
+    File[] outputFiles = new File[] { tarballFile };
+    if (this.store(CERTIFICATE_ANNOTATION, data, outputFiles)) {
       this.storedCerts++;
     }
   }
@@ -191,11 +183,14 @@ public class ArchiveWriter extends Thread {
       long published) {
     SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
     printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String filename = outputDirectory + "/server-descriptor/"
-        + printFormat.format(new Date(published))
+    File tarballFile = new File(this.outputDirectory
+        + "/server-descriptor/" + printFormat.format(new Date(published))
         + digest.substring(0, 1) + "/" + digest.substring(1, 2) + "/"
-        + digest;
-    if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, filename)) {
+        + digest);
+    File rsyncFile = new File(
+        "rsync/relay-descriptors/server-descriptors/" + digest);
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(SERVER_DESCRIPTOR_ANNOTATION, data, outputFiles)) {
       this.storedServerDescriptors++;
     }
   }
@@ -206,12 +201,15 @@ public class ArchiveWriter extends Thread {
       String extraInfoDigest, long published) {
     SimpleDateFormat descriptorFormat = new SimpleDateFormat("yyyy/MM/");
     descriptorFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String filename = outputDirectory + "/extra-info/"
+    File tarballFile = new File(this.outputDirectory + "/extra-info/"
         + descriptorFormat.format(new Date(published))
         + extraInfoDigest.substring(0, 1) + "/"
         + extraInfoDigest.substring(1, 2) + "/"
-        + extraInfoDigest;
-    if (this.store(EXTRA_INFO_ANNOTATION, data, filename)) {
+        + extraInfoDigest);
+    File rsyncFile = new File("rsync/relay-descriptors/extra-infos/"
+        + extraInfoDigest);
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    if (this.store(EXTRA_INFO_ANNOTATION, data, outputFiles)) {
       this.storedExtraInfoDescriptors++;
     }
   }
@@ -404,4 +402,21 @@ public class ArchiveWriter extends Thread {
           e);
     }
   }
+
+  /* Delete all files from the rsync directory that have not been modified
+   * in the last three days. */
+  public void cleanUpRsyncDirectory() {
+    long cutOffMillis = System.currentTimeMillis()
+        - 3L * 24L * 60L * 60L * 1000L;
+    Stack<File> allFiles = new Stack<File>();
+    allFiles.add(new File("rsync/relay-descriptors"));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        allFiles.addAll(Arrays.asList(file.listFiles()));
+      } else if (file.lastModified() < cutOffMillis) {
+        file.delete();
+      }
+    }
+  }
 }
diff --git a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
index 143010c..19b4a54 100644
--- a/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
+++ b/src/org/torproject/ernie/db/torperf/TorperfDownloader.java
@@ -12,16 +12,17 @@ import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.text.SimpleDateFormat;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
+import java.util.Stack;
 import java.util.TimeZone;
 import java.util.TreeMap;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.torproject.ernie.db.main.Configuration;
-import org.torproject.ernie.db.main.RsyncDataProvider;
 
 /* Download possibly truncated Torperf .data and .extradata files from
  * configured sources, append them to the files we already have, and merge
@@ -68,9 +69,7 @@ public class TorperfDownloader extends Thread {
     }
     this.writeLastMergedTimestamps();
 
-    /* Copy Torperf files from the last 3 days to the rsync directory. */
-    RsyncDataProvider rdp = new RsyncDataProvider();
-    rdp.copyFiles(torperfOutputDirectory, "torperf");
+    this.cleanUpRsyncDirectory();
   }
 
   private File torperfLastMergedFile =
@@ -572,22 +571,43 @@ public class TorperfDownloader extends Thread {
         this.cachedStartDate == null || this.cachedTpfLines == null) {
       return;
     }
-    File tpfFile = new File(torperfOutputDirectory,
+    File tarballFile = new File(torperfOutputDirectory,
         this.cachedStartDate.replaceAll("-", "/")
         + "/" + this.cachedSource + "-"
         + String.valueOf(this.cachedFileSize) + "-"
         + this.cachedStartDate + ".tpf");
-    tpfFile.getParentFile().mkdirs();
-    BufferedWriter bw = new BufferedWriter(new FileWriter(tpfFile));
-    bw.write("@type torperf 1.0\n");
-    for (String line : this.cachedTpfLines.values()) {
-      bw.write(line + "\n");
+    File rsyncFile = new File("rsync/torperf/" + tarballFile.getName());
+    File[] outputFiles = new File[] { tarballFile, rsyncFile };
+    for (File outputFile : outputFiles) {
+      outputFile.getParentFile().mkdirs();
+      BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile));
+      bw.write("@type torperf 1.0\n");
+      for (String line : this.cachedTpfLines.values()) {
+        bw.write(line + "\n");
+      }
+      bw.close();
     }
-    bw.close();
     this.cachedSource = null;
     this.cachedFileSize = 0;
     this.cachedStartDate = null;
     this.cachedTpfLines = null;
   }
+
+  /* Delete all files from the rsync directory that have not been modified
+   * in the last three days. */
+  public void cleanUpRsyncDirectory() {
+    long cutOffMillis = System.currentTimeMillis()
+        - 3L * 24L * 60L * 60L * 1000L;
+    Stack<File> allFiles = new Stack<File>();
+    allFiles.add(new File("rsync/torperf"));
+    while (!allFiles.isEmpty()) {
+      File file = allFiles.pop();
+      if (file.isDirectory()) {
+        allFiles.addAll(Arrays.asList(file.listFiles()));
+      } else if (file.lastModified() < cutOffMillis) {
+        file.delete();
+      }
+    }
+  }
 }
 





More information about the tor-commits mailing list