commit 0a324223d90c2aac1c5198a7f8c9c00548ee1f0b Author: iwakeh iwakeh@torproject.org Date: Thu Apr 27 09:49:30 2017 +0000
Rename Torperf class and package to Onionperf. --- src/main/java/org/torproject/collector/Main.java | 4 +- .../collector/onionperf/OnionperfDownloader.java | 304 ++++++++++++++++++++ .../collector/torperf/TorperfDownloader.java | 307 --------------------- 3 files changed, 306 insertions(+), 309 deletions(-)
diff --git a/src/main/java/org/torproject/collector/Main.java b/src/main/java/org/torproject/collector/Main.java index 9ce709a..95da01a 100644 --- a/src/main/java/org/torproject/collector/Main.java +++ b/src/main/java/org/torproject/collector/Main.java @@ -12,8 +12,8 @@ import org.torproject.collector.cron.Scheduler; import org.torproject.collector.cron.ShutdownHook; import org.torproject.collector.exitlists.ExitListDownloader; import org.torproject.collector.index.CreateIndexJson; +import org.torproject.collector.onionperf.OnionperfDownloader; import org.torproject.collector.relaydescs.ArchiveWriter; -import org.torproject.collector.torperf.TorperfDownloader;
import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,7 +50,7 @@ public class Main { collecTorMains.put(Key.ExitlistsActivated, ExitListDownloader.class); collecTorMains.put(Key.UpdateindexActivated, CreateIndexJson.class); collecTorMains.put(Key.RelaydescsActivated, ArchiveWriter.class); - collecTorMains.put(Key.TorperfActivated, TorperfDownloader.class); + collecTorMains.put(Key.TorperfActivated, OnionperfDownloader.class); }
private static Configuration conf = new Configuration(); diff --git a/src/main/java/org/torproject/collector/onionperf/OnionperfDownloader.java b/src/main/java/org/torproject/collector/onionperf/OnionperfDownloader.java new file mode 100644 index 0000000..7453234 --- /dev/null +++ b/src/main/java/org/torproject/collector/onionperf/OnionperfDownloader.java @@ -0,0 +1,304 @@ +/* Copyright 2012-2017 The Tor Project + * See LICENSE for licensing information */ + +package org.torproject.collector.onionperf; + +import org.torproject.collector.conf.Configuration; +import org.torproject.collector.conf.ConfigurationException; +import org.torproject.collector.conf.Key; +import org.torproject.collector.cron.CollecTorMain; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.TorperfResult; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.SortedSet; +import java.util.Stack; +import java.util.TimeZone; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** Download download .tpf files from OnionPerf hosts. */ +public class OnionperfDownloader extends CollecTorMain { + + private static final Logger logger = LoggerFactory.getLogger( + OnionperfDownloader.class); + + private static final String TORPERF = "torperf"; + + public OnionperfDownloader(Configuration config) { + super(config); + } + + /** File containing the download history, which is necessary, because + * OnionPerf does not delete older .tpf files, but which enables us to do + * so. */ + private File onionPerfDownloadedFile; + + /** Full URLs of .tpf files downloaded in the current or in past + * executions. */ + private SortedSet<String> downloadedTpfFiles = new TreeSet<>(); + + /** Base URLs of configured OnionPerf hosts. */ + private URL[] onionPerfHosts = null; + + /** Directory for storing archived .tpf files. */ + private File archiveDirectory = null; + + /** Directory for storing recent .tpf files. */ + private File recentDirectory = null; + + @Override + public String module() { + return TORPERF; + } + + @Override + protected String syncMarker() { + return "TorperfFiles"; + } + + @Override + protected void startProcessing() throws ConfigurationException { + this.onionPerfDownloadedFile = + new File(config.getPath(Key.StatsPath).toFile(), + "onionperf-downloaded"); + this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts); + this.readDownloadedOnionPerfTpfFiles(); + this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(), + TORPERF); + this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(), + TORPERF); + for (URL baseUrl : this.onionPerfHosts) { + this.downloadFromOnionPerfHost(baseUrl); + } + this.writeDownloadedOnionPerfTpfFiles(); + this.cleanUpRsyncDirectory(); + } + + private void readDownloadedOnionPerfTpfFiles() { + if (!this.onionPerfDownloadedFile.exists()) { + return; + } + try (BufferedReader br = new BufferedReader(new FileReader( + this.onionPerfDownloadedFile))) { + String line; + while ((line = br.readLine()) != null) { + this.downloadedTpfFiles.add(line); + } + } catch (IOException e) { + logger.info("Unable to read download history file '" + + this.onionPerfDownloadedFile.getAbsolutePath() + "'. Ignoring " + + "download history and downloading all available .tpf files."); + this.downloadedTpfFiles.clear(); + } + } + + private void downloadFromOnionPerfHost(URL baseUrl) { + logger.info("Downloading from OnionPerf host {}", baseUrl); + List<String> tpfFileNames = + this.downloadOnionPerfDirectoryListing(baseUrl); + String source = baseUrl.getHost().split("\.")[0]; + for (String tpfFileName : tpfFileNames) { + this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName); + } + } + + /** Pattern for links contained in directory listings. */ + private static final Pattern TPF_FILE_URL_PATTERN = + Pattern.compile(".*<a href="([^"]+\.tpf)">.*"); + + private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) { + List<String> tpfFileUrls = new ArrayList<>(); + try (BufferedReader br = new BufferedReader(new InputStreamReader( + baseUrl.openStream()))) { + String line; + while ((line = br.readLine()) != null) { + Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line); + if (matcher.matches() && !matcher.group(1).startsWith("/")) { + tpfFileUrls.add(matcher.group(1)); + } + } + } catch (IOException e) { + logger.warn("Unable to download directory listing from '{}'. Skipping " + + "this OnionPerf host.", baseUrl); + tpfFileUrls.clear(); + } + return tpfFileUrls; + } + + private static final DateFormat DATE_FORMAT; + + static { + DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); + DATE_FORMAT.setLenient(false); + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private void downloadAndParseOnionPerfTpfFile(URL baseUrl, String source, + String tpfFileName) { + URL tpfFileUrl; + try { + tpfFileUrl = new URL(baseUrl, tpfFileName); + } catch (MalformedURLException e1) { + logger.warn("Unable to put together base URL '{}' and .tpf file path " + + "'{}' to a URL. Skipping.", baseUrl, tpfFileName); + return; + } + + /* Skip if we successfully downloaded this file before. */ + if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) { + return; + } + + /* Verify file name before downloading: source-filesize-yyyy-MM-dd.tpf */ + String[] tpfFileNameParts = tpfFileName.split("-"); + if (!tpfFileName.startsWith(source + "-") + || tpfFileName.length() < "s-f-yyyy-MM-dd".length() + || tpfFileNameParts.length < 5) { + logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, + tpfFileName); + return; + } + int fileSize = 0; + String date = null; + try { + fileSize = Integer.parseInt( + tpfFileNameParts[tpfFileNameParts.length - 4]); + date = tpfFileName.substring(tpfFileName.length() - 14, + tpfFileName.length() - 4); + DATE_FORMAT.parse(date); + } catch (NumberFormatException | ParseException e) { + logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, + tpfFileName, e); + return; + } + + /* Download file contents to temporary file. */ + File tempFile = new File(this.recentDirectory, "." + tpfFileName); + tempFile.getParentFile().mkdirs(); + try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) { + Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + logger.warn("Unable to download '{}{}' to temporary file '{}'. " + + "Skipping.", baseUrl, tpfFileName, tempFile, e); + return; + } + + /* Validate contained descriptors. */ + DescriptorParser descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + List<Descriptor> descriptors; + try { + descriptors = descriptorParser.parseDescriptors( + Files.readAllBytes(tempFile.toPath()), tpfFileName); + } catch (IOException | DescriptorParseException e) { + logger.warn("OnionPerf file '{}{}' could not be parsed. " + + "Skipping.", baseUrl, tpfFileName, e); + tempFile.delete(); + return; + } + String message = null; + for (Descriptor descriptor : descriptors) { + if (!(descriptor instanceof TorperfResult)) { + message = "File contains descriptors other than Torperf results."; + break; + } + TorperfResult torperf = (TorperfResult) descriptor; + if (!source.equals(torperf.getSource())) { + message = "File contains Torperf result from another source."; + break; + } + if (fileSize != torperf.getFileSize()) { + message = "File contains Torperf result from another file size."; + break; + } + if (!date.equals(DATE_FORMAT.format(torperf.getStartMillis()))) { + message = "File contains Torperf result from another date."; + break; + } + } + if (null != message) { + logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. " + + "Skipping.", baseUrl, tpfFileName, message); + tempFile.delete(); + return; + } + + /* Copy/move files in place. */ + File archiveFile = new File(this.archiveDirectory, + date.replaceAll("-", "/") + "/" + tpfFileName); + archiveFile.getParentFile().mkdirs(); + try { + Files.copy(tempFile.toPath(), archiveFile.toPath(), + StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.", + tempFile, archiveFile, e); + tempFile.delete(); + return; + } + File recentFile = new File(this.recentDirectory, tpfFileName); + tempFile.renameTo(recentFile); + + /* Add to download history to avoid downloading it again. */ + this.downloadedTpfFiles.add(baseUrl + tpfFileName); + } + + private void writeDownloadedOnionPerfTpfFiles() { + this.onionPerfDownloadedFile.getParentFile().mkdirs(); + try (BufferedWriter bw = new BufferedWriter(new FileWriter( + this.onionPerfDownloadedFile))) { + for (String line : this.downloadedTpfFiles) { + bw.write(line); + bw.newLine(); + } + } catch (IOException e) { + logger.warn("Unable to write download history file '{}'. This may " + + "result in ignoring history and downloading all available .tpf " + + "files in the next execution.", + this.onionPerfDownloadedFile.getAbsolutePath(), e); + } + } + + /** Delete all files from the rsync directory that have not been modified + * in the last three days. */ + public void cleanUpRsyncDirectory() throws ConfigurationException { + long cutOffMillis = System.currentTimeMillis() + - 3L * 24L * 60L * 60L * 1000L; + Stack<File> allFiles = new Stack<>(); + allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), TORPERF)); + while (!allFiles.isEmpty()) { + File file = allFiles.pop(); + if (file.isDirectory()) { + allFiles.addAll(Arrays.asList(file.listFiles())); + } else if (file.lastModified() < cutOffMillis) { + file.delete(); + } + } + } +} + diff --git a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java deleted file mode 100644 index b4f91fd..0000000 --- a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java +++ /dev/null @@ -1,307 +0,0 @@ -/* Copyright 2012-2017 The Tor Project - * See LICENSE for licensing information */ - -package org.torproject.collector.torperf; - -import org.torproject.collector.conf.Configuration; -import org.torproject.collector.conf.ConfigurationException; -import org.torproject.collector.conf.Key; -import org.torproject.collector.cron.CollecTorMain; -import org.torproject.descriptor.Descriptor; -import org.torproject.descriptor.DescriptorParseException; -import org.torproject.descriptor.DescriptorParser; -import org.torproject.descriptor.DescriptorSourceFactory; -import org.torproject.descriptor.TorperfResult; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.text.DateFormat; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.SortedSet; -import java.util.Stack; -import java.util.TimeZone; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/* Download possibly truncated Torperf .data and .extradata files from - * configured sources, append them to the files we already have, and merge - * the two files into the .tpf format; - * also download .tpf files from OnionPerf hosts. */ -public class TorperfDownloader extends CollecTorMain { - - private static final Logger logger = LoggerFactory.getLogger( - TorperfDownloader.class); - - private static final String TORPERF = "torperf"; - - public TorperfDownloader(Configuration config) { - super(config); - } - - /** File containing the download history, which is necessary, because - * OnionPerf does not delete older .tpf files, but which enables us to do - * so. */ - private File onionPerfDownloadedFile; - - /** Full URLs of .tpf files downloaded in the current or in past - * executions. */ - private SortedSet<String> downloadedTpfFiles = new TreeSet<>(); - - /** Base URLs of configured OnionPerf hosts. */ - private URL[] onionPerfHosts = null; - - /** Directory for storing archived .tpf files. */ - private File archiveDirectory = null; - - /** Directory for storing recent .tpf files. */ - private File recentDirectory = null; - - @Override - public String module() { - return TORPERF; - } - - @Override - protected String syncMarker() { - return "TorperfFiles"; - } - - @Override - protected void startProcessing() throws ConfigurationException { - this.onionPerfDownloadedFile = - new File(config.getPath(Key.StatsPath).toFile(), - "onionperf-downloaded"); - this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts); - this.readDownloadedOnionPerfTpfFiles(); - this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(), - TORPERF); - this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(), - TORPERF); - for (URL baseUrl : this.onionPerfHosts) { - this.downloadFromOnionPerfHost(baseUrl); - } - this.writeDownloadedOnionPerfTpfFiles(); - this.cleanUpRsyncDirectory(); - } - - private void readDownloadedOnionPerfTpfFiles() { - if (!this.onionPerfDownloadedFile.exists()) { - return; - } - try (BufferedReader br = new BufferedReader(new FileReader( - this.onionPerfDownloadedFile))) { - String line; - while ((line = br.readLine()) != null) { - this.downloadedTpfFiles.add(line); - } - } catch (IOException e) { - logger.info("Unable to read download history file '" - + this.onionPerfDownloadedFile.getAbsolutePath() + "'. Ignoring " - + "download history and downloading all available .tpf files."); - this.downloadedTpfFiles.clear(); - } - } - - private void downloadFromOnionPerfHost(URL baseUrl) { - logger.info("Downloading from OnionPerf host {}", baseUrl); - List<String> tpfFileNames = - this.downloadOnionPerfDirectoryListing(baseUrl); - String source = baseUrl.getHost().split("\.")[0]; - for (String tpfFileName : tpfFileNames) { - this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName); - } - } - - /** Pattern for links contained in directory listings. */ - private static final Pattern TPF_FILE_URL_PATTERN = - Pattern.compile(".*<a href="([^"]+\.tpf)">.*"); - - private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) { - List<String> tpfFileUrls = new ArrayList<>(); - try (BufferedReader br = new BufferedReader(new InputStreamReader( - baseUrl.openStream()))) { - String line; - while ((line = br.readLine()) != null) { - Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line); - if (matcher.matches() && !matcher.group(1).startsWith("/")) { - tpfFileUrls.add(matcher.group(1)); - } - } - } catch (IOException e) { - logger.warn("Unable to download directory listing from '{}'. Skipping " - + "this OnionPerf host.", baseUrl); - tpfFileUrls.clear(); - } - return tpfFileUrls; - } - - private static final DateFormat DATE_FORMAT; - - static { - DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); - DATE_FORMAT.setLenient(false); - DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); - } - - private void downloadAndParseOnionPerfTpfFile(URL baseUrl, String source, - String tpfFileName) { - URL tpfFileUrl; - try { - tpfFileUrl = new URL(baseUrl, tpfFileName); - } catch (MalformedURLException e1) { - logger.warn("Unable to put together base URL '{}' and .tpf file path " - + "'{}' to a URL. Skipping.", baseUrl, tpfFileName); - return; - } - - /* Skip if we successfully downloaded this file before. */ - if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) { - return; - } - - /* Verify file name before downloading: source-filesize-yyyy-MM-dd.tpf */ - String[] tpfFileNameParts = tpfFileName.split("-"); - if (!tpfFileName.startsWith(source + "-") - || tpfFileName.length() < "s-f-yyyy-MM-dd".length() - || tpfFileNameParts.length < 5) { - logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, - tpfFileName); - return; - } - int fileSize = 0; - String date = null; - try { - fileSize = Integer.parseInt( - tpfFileNameParts[tpfFileNameParts.length - 4]); - date = tpfFileName.substring(tpfFileName.length() - 14, - tpfFileName.length() - 4); - DATE_FORMAT.parse(date); - } catch (NumberFormatException | ParseException e) { - logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, - tpfFileName, e); - return; - } - - /* Download file contents to temporary file. */ - File tempFile = new File(this.recentDirectory, "." + tpfFileName); - tempFile.getParentFile().mkdirs(); - try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) { - Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - } catch (IOException e) { - logger.warn("Unable to download '{}{}' to temporary file '{}'. " - + "Skipping.", baseUrl, tpfFileName, tempFile, e); - return; - } - - /* Validate contained descriptors. */ - DescriptorParser descriptorParser = - DescriptorSourceFactory.createDescriptorParser(); - List<Descriptor> descriptors; - try { - descriptors = descriptorParser.parseDescriptors( - Files.readAllBytes(tempFile.toPath()), tpfFileName); - } catch (IOException | DescriptorParseException e) { - logger.warn("OnionPerf file '{}{}' could not be parsed. " - + "Skipping.", baseUrl, tpfFileName, e); - tempFile.delete(); - return; - } - String message = null; - for (Descriptor descriptor : descriptors) { - if (!(descriptor instanceof TorperfResult)) { - message = "File contains descriptors other than Torperf results."; - break; - } - TorperfResult torperf = (TorperfResult) descriptor; - if (!source.equals(torperf.getSource())) { - message = "File contains Torperf result from another source."; - break; - } - if (fileSize != torperf.getFileSize()) { - message = "File contains Torperf result from another file size."; - break; - } - if (!date.equals(DATE_FORMAT.format(torperf.getStartMillis()))) { - message = "File contains Torperf result from another date."; - break; - } - } - if (null != message) { - logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. " - + "Skipping.", baseUrl, tpfFileName, message); - tempFile.delete(); - return; - } - - /* Copy/move files in place. */ - File archiveFile = new File(this.archiveDirectory, - date.replaceAll("-", "/") + "/" + tpfFileName); - archiveFile.getParentFile().mkdirs(); - try { - Files.copy(tempFile.toPath(), archiveFile.toPath(), - StandardCopyOption.REPLACE_EXISTING); - } catch (IOException e) { - logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.", - tempFile, archiveFile, e); - tempFile.delete(); - return; - } - File recentFile = new File(this.recentDirectory, tpfFileName); - tempFile.renameTo(recentFile); - - /* Add to download history to avoid downloading it again. */ - this.downloadedTpfFiles.add(baseUrl + tpfFileName); - } - - private void writeDownloadedOnionPerfTpfFiles() { - this.onionPerfDownloadedFile.getParentFile().mkdirs(); - try (BufferedWriter bw = new BufferedWriter(new FileWriter( - this.onionPerfDownloadedFile))) { - for (String line : this.downloadedTpfFiles) { - bw.write(line); - bw.newLine(); - } - } catch (IOException e) { - logger.warn("Unable to write download history file '{}'. This may " - + "result in ignoring history and downloading all available .tpf " - + "files in the next execution.", - this.onionPerfDownloadedFile.getAbsolutePath(), e); - } - } - - /** Delete all files from the rsync directory that have not been modified - * in the last three days. */ - public void cleanUpRsyncDirectory() throws ConfigurationException { - long cutOffMillis = System.currentTimeMillis() - - 3L * 24L * 60L * 60L * 1000L; - Stack<File> allFiles = new Stack<>(); - allFiles.add(new File(config.getPath(Key.RecentPath).toFile(), TORPERF)); - while (!allFiles.isEmpty()) { - File file = allFiles.pop(); - if (file.isDirectory()) { - allFiles.addAll(Arrays.asList(file.listFiles())); - } else if (file.lastModified() < cutOffMillis) { - file.delete(); - } - } - } -} -