commit 32992eea202770065b4cb61d4dc39bbee1b87628 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed May 17 15:58:22 2017 +0200
Switch to metrics-lib's DescriptorCollector.
Implements #22287. --- CHANGELOG.md | 2 + .../onionoo/updater/DescriptorDownloader.java | 184 --------------------- .../onionoo/updater/DescriptorSource.java | 41 ++--- 3 files changed, 13 insertions(+), 214 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index bb38dd0..e028dd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ - Remove optional fields "countries", "transports", and "versions" from clients objects which were still labeled as beta. - Add new "version" parameter to filter for Tor version. + - Switch from our own CollecTor downloader to metrics-lib's + DescriptorCollector.
# Changes in version 4.0-1.3.0 - 2017-08-04 diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java b/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java deleted file mode 100644 index 1e41f25..0000000 --- a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2016--2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.onionoo.updater; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.zip.GZIPInputStream; - -class DescriptorDownloader { - - private static Logger log = LoggerFactory.getLogger( - DescriptorDownloader.class); - - private final String protocolHostNameResourcePrefix = - "https://collector.torproject.org/recent/"; - - private String directory; - - private final File inDir = new File("in/recent"); - - public DescriptorDownloader(DescriptorType descriptorType) { - switch (descriptorType) { - case RELAY_CONSENSUSES: - this.directory = "relay-descriptors/consensuses/"; - break; - case RELAY_SERVER_DESCRIPTORS: - this.directory = "relay-descriptors/server-descriptors/"; - break; - case RELAY_EXTRA_INFOS: - this.directory = "relay-descriptors/extra-infos/"; - break; - case EXIT_LISTS: - this.directory = "exit-lists/"; - break; - case BRIDGE_STATUSES: - this.directory = "bridge-descriptors/statuses/"; - break; - case BRIDGE_SERVER_DESCRIPTORS: - this.directory = "bridge-descriptors/server-descriptors/"; - break; - case BRIDGE_EXTRA_INFOS: - this.directory = "bridge-descriptors/extra-infos/"; - break; - default: - log.error("Unknown descriptor type."); - return; - } - } - - private SortedSet<String> localFiles = new TreeSet<>(); - - public int statLocalFiles() { - File localDirectory = new File(this.inDir, this.directory); - if (localDirectory.exists()) { - for (File file : localDirectory.listFiles()) { - this.localFiles.add(file.getName()); - } - } - return this.localFiles.size(); - } - - private SortedSet<String> remoteFiles = new TreeSet<>(); - - public int fetchRemoteDirectory() { - String directoryUrl = this.protocolHostNameResourcePrefix - + this.directory; - try { - URL url = new URL(directoryUrl); - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setRequestMethod("GET"); - huc.connect(); - if (huc.getResponseCode() != 200) { - log.error("Could not fetch " + directoryUrl - + ": " + huc.getResponseCode() + " " - + huc.getResponseMessage() + ". Skipping."); - return 0; - } - try (BufferedReader br = new BufferedReader(new InputStreamReader( - huc.getInputStream()))) { - String line; - while ((line = br.readLine()) != null) { - if (!line.trim().startsWith("<tr>") - || !line.contains("<a href="")) { - continue; - } - String linePart = line.substring( - line.indexOf("<a href="") + "<a href="".length()); - if (!linePart.contains(""")) { - continue; - } - linePart = linePart.substring(0, linePart.indexOf(""")); - if (linePart.endsWith("/")) { - continue; - } - this.remoteFiles.add(linePart); - } - } - } catch (IOException e) { - log.error("Could not fetch or parse " + directoryUrl - + ". Skipping. Reason: " + e.getMessage()); - } - return this.remoteFiles.size(); - } - - public int fetchRemoteFiles() { - int fetchedFiles = 0; - for (String remoteFile : this.remoteFiles) { - if (this.localFiles.contains(remoteFile)) { - continue; - } - String fileUrl = this.protocolHostNameResourcePrefix - + this.directory + remoteFile; - File localTempFile = new File(this.inDir, this.directory - + remoteFile + ".tmp"); - File localFile = new File(this.inDir, this.directory + remoteFile); - try { - localFile.getParentFile().mkdirs(); - URL url = new URL(fileUrl); - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setRequestMethod("GET"); - huc.addRequestProperty("Accept-Encoding", "gzip"); - huc.connect(); - if (huc.getResponseCode() != 200) { - log.error("Could not fetch \n\t" + fileUrl - + ": " + huc.getResponseCode() + " " - + huc.getResponseMessage() + ". Skipping."); - continue; - } - InputStream is; - if (huc.getContentEncoding() != null - && huc.getContentEncoding().equalsIgnoreCase("gzip")) { - is = new GZIPInputStream(huc.getInputStream()); - } else { - is = huc.getInputStream(); - } - try (BufferedInputStream bis = new BufferedInputStream(is); - BufferedOutputStream bos = new BufferedOutputStream( - new FileOutputStream(localTempFile))) { - int len; - byte[] data = new byte[1024]; - while ((len = bis.read(data, 0, 1024)) >= 0) { - bos.write(data, 0, len); - } - } - localTempFile.renameTo(localFile); - long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L); - if (lastModified >= 0) { - localFile.setLastModified(lastModified); - } - fetchedFiles++; - } catch (IOException e) { - log.error("Could not fetch or store \n\t" + fileUrl - + ". Skipping.\n\tReason: " + e.getMessage()); - } - } - return fetchedFiles; - } - - public int deleteOldLocalFiles() { - int deletedFiles = 0; - for (String localFile : this.localFiles) { - if (!this.remoteFiles.contains(localFile)) { - new File(this.inDir, this.directory + localFile).delete(); - deletedFiles++; - } - } - return deletedFiles; - } -} - diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java index 45b40ee..d32727f 100644 --- a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java +++ b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java @@ -4,6 +4,7 @@ package org.torproject.onionoo.updater;
import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorCollector; import org.torproject.onionoo.util.FormattingUtils;
import org.slf4j.Logger; @@ -22,9 +23,11 @@ public class DescriptorSource { private static final Logger log = LoggerFactory.getLogger( DescriptorSource.class);
- private final File inRecentDir = new File("in/recent"); + private final File inDir = new File("in");
- private final File inArchiveDir = new File("in/archive"); + private final File inRecentDir = new File(inDir, "recent"); + + private final File inArchiveDir = new File(inDir, "archive");
private final File statusDir = new File("status");
@@ -65,28 +68,14 @@ public class DescriptorSource {
/** Downloads descriptors from CollecTor. */ public void downloadDescriptors() { + List<String> remoteDirectories = new ArrayList<>(); for (DescriptorType descriptorType : DescriptorType.values()) { - log.info("Loading: " + descriptorType); - this.downloadDescriptors(descriptorType); + remoteDirectories.add("/recent/" + descriptorType.getDir()); } - } - - private int localFilesBefore = 0; - - private int foundRemoteFiles = 0; - - private int downloadedFiles = 0; - - private int deletedLocalFiles = 0; - - private void downloadDescriptors(DescriptorType descriptorType) { - DescriptorDownloader descriptorDownloader = - new DescriptorDownloader(descriptorType); - this.localFilesBefore += descriptorDownloader.statLocalFiles(); - this.foundRemoteFiles += - descriptorDownloader.fetchRemoteDirectory(); - this.downloadedFiles += descriptorDownloader.fetchRemoteFiles(); - this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles(); + DescriptorCollector dc = org.torproject.descriptor.DescriptorSourceFactory + .createDescriptorCollector(); + dc.collectDescriptors("https://collector.torproject.org", + remoteDirectories.toArray(new String[0]), 0L, inDir, true); }
/** Reads archived and recent descriptors from disk and feeds them into @@ -206,14 +195,6 @@ public class DescriptorSource { * descriptors during the current execution. */ public String getStatsString() { StringBuilder sb = new StringBuilder(); - sb.append(" ").append(this.localFilesBefore) - .append(" recent descriptor files ").append("found locally\n"); - sb.append(" ").append(this.foundRemoteFiles) - .append(" recent descriptor files ").append("found remotely\n"); - sb.append(" ").append(this.downloadedFiles) - .append(" recent descriptor files ").append("downloaded from remote\n"); - sb.append(" ").append(this.deletedLocalFiles) - .append(" recent descriptor ").append("files deleted locally\n"); sb.append(" ").append(this.descriptorQueues.size()) .append(" descriptor ").append("queues created for recent descriptors\n"); int historySizeBefore = 0;
tor-commits@lists.torproject.org