commit 6dd06f3f298ffd3b64abfd28214944f9f3cc01a9 Author: Karsten Loesing karsten.loesing@gmx.net Date: Wed Mar 8 16:26:24 2017 +0100
Download .tpf files from OnionPerf hosts.
Implements #21272. --- CHANGELOG.md | 3 + .../java/org/torproject/collector/conf/Key.java | 3 +- .../collector/torperf/TorperfDownloader.java | 237 ++++++++++++++++++++- src/main/resources/collector.properties | 7 + .../collector/conf/ConfigurationTest.java | 2 +- 5 files changed, 249 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2365447..5e1107f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changes in version 1.?.? - 2017-??-??
+ * Major changes + - Download .tpf files from OnionPerf hosts. + * Medium changes - Clean up files in recent/exit-lists/ again.
diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java index 0274c98..dd35322 100644 --- a/src/main/java/org/torproject/collector/conf/Key.java +++ b/src/main/java/org/torproject/collector/conf/Key.java @@ -57,7 +57,8 @@ public enum Key { ReplaceIpAddressesWithHashes(Boolean.class), BridgeDescriptorMappingsLimit(Integer.class), TorperfFilesLines(String[].class), - TorperfHosts(String[][].class); + TorperfHosts(String[][].class), + OnionPerfHosts(URL[].class);
private Class clazz; private static Set<String> keys; diff --git a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java index b09a6d6..2cd99df 100644 --- a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java +++ b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java @@ -8,6 +8,11 @@ import org.torproject.collector.conf.Configuration; import org.torproject.collector.conf.ConfigurationException; import org.torproject.collector.conf.Key; import org.torproject.collector.cron.CollecTorMain; +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorParser; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.TorperfResult;
import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,21 +23,34 @@ import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; +import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.text.DateFormat; +import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.SortedMap; +import java.util.SortedSet; import java.util.Stack; import java.util.TimeZone; import java.util.TreeMap; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern;
/* Download possibly truncated Torperf .data and .extradata files from * configured sources, append them to the files we already have, and merge - * the two files into the .tpf format. */ + * the two files into the .tpf format; + * also download .tpf files from OnionPerf hosts. */ public class TorperfDownloader extends CollecTorMain {
private static final Logger logger = LoggerFactory.getLogger( @@ -50,6 +68,24 @@ public class TorperfDownloader extends CollecTorMain { private SimpleDateFormat dateFormat; private File torperfLastMergedFile;
+ /** File containing the download history, which is necessary, because + * OnionPerf does not delete older .tpf files, but which enables us to do + * so. */ + private File onionPerfDownloadedFile; + + /** Full URLs of .tpf files downloaded in the current or in past + * executions. */ + private SortedSet<String> downloadedTpfFiles = new TreeSet<>(); + + /** Base URLs of configured OnionPerf hosts. */ + private URL[] onionPerfHosts = null; + + /** Directory for storing archived .tpf files. */ + private File archiveDirectory = null; + + /** Directory for storing recent .tpf files. */ + private File recentDirectory = null; + @Override public String module() { return TORPERF; @@ -81,6 +117,20 @@ public class TorperfDownloader extends CollecTorMain { } this.writeLastMergedTimestamps();
+ this.onionPerfDownloadedFile = + new File(config.getPath(Key.StatsPath).toFile(), + "onionperf-downloaded"); + this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts); + this.readDownloadedOnionPerfTpfFiles(); + this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(), + TORPERF); + this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(), + TORPERF); + for (URL baseUrl : this.onionPerfHosts) { + this.downloadFromOnionPerfHost(baseUrl); + } + this.writeDownloadedOnionPerfTpfFiles(); + this.cleanUpRsyncDirectory(); }
@@ -617,6 +667,191 @@ public class TorperfDownloader extends CollecTorMain { this.cachedTpfLines = null; }
+ private void readDownloadedOnionPerfTpfFiles() { + if (!this.onionPerfDownloadedFile.exists()) { + return; + } + try (BufferedReader br = new BufferedReader(new FileReader( + this.onionPerfDownloadedFile))) { + String line; + while ((line = br.readLine()) != null) { + this.downloadedTpfFiles.add(line); + } + } catch (IOException e) { + logger.info("Unable to read download history file '" + + this.onionPerfDownloadedFile.getAbsolutePath() + "'. Ignoring " + + "download history and downloading all available .tpf files."); + this.downloadedTpfFiles.clear(); + } + } + + private void downloadFromOnionPerfHost(URL baseUrl) { + logger.info("Downloading from OnionPerf host {}", baseUrl); + List<String> tpfFileNames = + this.downloadOnionPerfDirectoryListing(baseUrl); + String source = baseUrl.getHost().split("\.")[0]; + for (String tpfFileName : tpfFileNames) { + this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName); + } + } + + /** Pattern for links contained in directory listings. */ + private static final Pattern TPF_FILE_URL_PATTERN = + Pattern.compile(".*<a href="([^"]+\.tpf)">.*"); + + private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) { + List<String> tpfFileUrls = new ArrayList<>(); + try (BufferedReader br = new BufferedReader(new InputStreamReader( + baseUrl.openStream()))) { + String line; + while ((line = br.readLine()) != null) { + Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line); + if (matcher.matches() && !matcher.group(1).startsWith("/")) { + tpfFileUrls.add(matcher.group(1)); + } + } + } catch (IOException e) { + logger.warn("Unable to download directory listing from '{}'. Skipping " + + "this OnionPerf host.", baseUrl); + tpfFileUrls.clear(); + } + return tpfFileUrls; + } + + private static final DateFormat DATE_FORMAT; + + static { + DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); + DATE_FORMAT.setLenient(false); + DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private void downloadAndParseOnionPerfTpfFile(URL baseUrl, String source, + String tpfFileName) { + URL tpfFileUrl; + try { + tpfFileUrl = new URL(baseUrl, tpfFileName); + } catch (MalformedURLException e1) { + logger.warn("Unable to put together base URL '{}' and .tpf file path " + + "'{}' to a URL. Skipping.", baseUrl, tpfFileName); + return; + } + + /* Skip if we successfully downloaded this file before. */ + if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) { + return; + } + + /* Verify file name before downloading: source-filesize-yyyy-MM-dd.tpf */ + String[] tpfFileNameParts = tpfFileName.split("-"); + if (!tpfFileName.startsWith(source + "-") + || tpfFileName.length() < "s-f-yyyy-MM-dd".length() + || tpfFileNameParts.length < 5) { + logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, + tpfFileName); + return; + } + int fileSize = 0; + String date = null; + try { + fileSize = Integer.parseInt( + tpfFileNameParts[tpfFileNameParts.length - 4]); + date = tpfFileName.substring(tpfFileName.length() - 14, + tpfFileName.length() - 4); + DATE_FORMAT.parse(date); + } catch (NumberFormatException | ParseException e) { + logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl, + tpfFileName, e); + return; + } + + /* Download file contents to temporary file. */ + File tempFile = new File(this.recentDirectory, "." + tpfFileName); + tempFile.getParentFile().mkdirs(); + try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) { + Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + logger.warn("Unable to download '{}{}' to temporary file '{}'. " + + "Skipping.", baseUrl, tpfFileName, tempFile, e); + return; + } + + /* Validate contained descriptors. */ + DescriptorParser descriptorParser = + DescriptorSourceFactory.createDescriptorParser(); + List<Descriptor> descriptors; + try { + descriptors = descriptorParser.parseDescriptors( + Files.readAllBytes(tempFile.toPath()), tpfFileName); + } catch (IOException | DescriptorParseException e) { + logger.warn("OnionPerf file '{}{}' could not be parsed. " + + "Skipping.", baseUrl, tpfFileName, e); + tempFile.delete(); + return; + } + String message = null; + for (Descriptor descriptor : descriptors) { + if (!(descriptor instanceof TorperfResult)) { + message = "File contains descriptors other than Torperf results."; + break; + } + TorperfResult torperf = (TorperfResult) descriptor; + if (!source.equals(torperf.getSource())) { + message = "File contains Torperf result from another source."; + break; + } + if (fileSize != torperf.getFileSize()) { + message = "File contains Torperf result from another file size."; + break; + } + if (!date.equals(DATE_FORMAT.format(torperf.getStartMillis()))) { + message = "File contains Torperf result from another date."; + break; + } + } + if (null != message) { + logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. " + + "Skipping.", baseUrl, tpfFileName, message); + tempFile.delete(); + return; + } + + /* Copy/move files in place. */ + File archiveFile = new File(this.archiveDirectory, + date.replaceAll("-", "/") + "/" + tpfFileName); + archiveFile.getParentFile().mkdirs(); + try { + Files.copy(tempFile.toPath(), archiveFile.toPath(), + StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.", + tempFile, archiveFile, e); + tempFile.delete(); + return; + } + File recentFile = new File(this.recentDirectory, tpfFileName); + tempFile.renameTo(recentFile); + + /* Add to download history to avoid downloading it again. */ + this.downloadedTpfFiles.add(baseUrl + tpfFileName); + } + + private void writeDownloadedOnionPerfTpfFiles() { + this.onionPerfDownloadedFile.getParentFile().mkdirs(); + try (BufferedWriter bw = new BufferedWriter(new FileWriter( + this.onionPerfDownloadedFile))) { + for (String line : this.downloadedTpfFiles) { + bw.write(line); + bw.newLine(); + } + } catch (IOException e) { + logger.warn("Unable to write download history file '" + + this.onionPerfDownloadedFile.getAbsolutePath() + "'. This may " + + "result in ignoring history and downloading all available .tpf " + + "files in the next execution.", e); + } + } + /** Delete all files from the rsync directory that have not been modified * in the last three days. */ public void cleanUpRsyncDirectory() throws ConfigurationException { diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties index 593d580..fb43495 100644 --- a/src/main/resources/collector.properties +++ b/src/main/resources/collector.properties @@ -150,3 +150,10 @@ TorperfHosts = torperf, http://torperf.torproject.org/ ## available on a given host (multiple times lists can be given ## TorperfFiles = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata TorperfFilesLines = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata, torperf 5242880 5mb.data 5mb.extradata + +## OnionPerf base URLs +## Hosts must be configured to use the first subdomain part of the given URL as +## source name, e.g., SOURCE=first for the first URL below, SOURCE=second for +## the second, etc.: +## OnionPerfHosts = http://first.torproject.org/, http://second.torproject.org/ +OnionPerfHosts = https://op-us.onionperf.torproject.net/ diff --git a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java index 287fb11..90065b0 100644 --- a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java +++ b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java @@ -40,7 +40,7 @@ public class ConfigurationTest { public void testKeyCount() throws Exception { assertEquals("The number of properties keys in enum Key changed." + "\n This test class should be adapted.", - 44, Key.values().length); + 45, Key.values().length); }
@Test()
tor-commits@lists.torproject.org