commit 7b859f8dc21c801f0bf707ceac6703f84c056497 Author: iwakeh iwakeh@torproject.org Date: Tue Mar 27 07:25:37 2018 +0000
Use the descriptor file name for determining descriptor type.
Initial solution suggested by Karsten. In addition, add a test that would fail without this patch and adapt old test to expect the correct behavior.
Also provide the origin of the descriptor with getDescriptorFile. Include a test for this functionality in log descriptors. Provide proper file name to log descriptor classes, because this filename contains log metadata for storing log files properly.
Solves task-25332. --- CHANGELOG.md | 3 +++ .../descriptor/impl/DescriptorParserImpl.java | 4 ++-- .../descriptor/log/LogDescriptorImpl.java | 19 ++++++++-------- .../descriptor/log/WebServerAccessLogImpl.java | 25 +++++++++++++-------- .../descriptor/impl/DescriptorParserImplTest.java | 18 +++++++++++++++ .../descriptor/log/LogDescriptorTest.java | 4 ++-- .../descriptor/log/WebServerAccessLogTest.java | 2 +- .../descriptor/log/WebServerModuleTest.java | 18 +++++---------- src/test/resources/webstats-2015-02.tar | Bin 0 -> 20480 bytes 9 files changed, 58 insertions(+), 35 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md index d64d58d..5edc519 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ - Replace ServerDescriptor#getHiddenServiceDirVersions with ServerDescriptor#isHiddenServiceDir, because Tor has never supported versions in the hidden-service-dir descriptor line. + - Add support for reading web server logs contained in tarballs by + providing file names of both the tarball and of contained files + to WebServerAccessLog.
* Minor changes - Override logLines() method from LogDescriptor in diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java index be9041f..713170e 100644 --- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java +++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java @@ -130,8 +130,8 @@ public class DescriptorParserImpl implements DescriptorParser { } else if (firstLines.startsWith("@type torperf 1.")) { return TorperfResultImpl.parseTorperfResults(rawDescriptorBytes, sourceFile); - } else if (sourceFile.getName().contains(LogDescriptorImpl.MARKER)) { - return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile); + } else if (fileName.contains(LogDescriptorImpl.MARKER)) { + return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName); } else { throw new DescriptorParseException("Could not detect descriptor " + "type in descriptor starting with '" + firstLines + "'."); diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java index 3583d26..b730465 100644 --- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java +++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java @@ -64,14 +64,15 @@ public abstract class LogDescriptorImpl * @since 2.2.0 */ protected LogDescriptorImpl(byte[] logBytes, File descriptorFile, - FileType defaultCompression) throws DescriptorParseException { + String logName, FileType defaultCompression) + throws DescriptorParseException { this.logBytes = logBytes; this.descriptorFile = descriptorFile; try { - Matcher mat = filenamePattern.matcher(descriptorFile.getName()); + Matcher mat = filenamePattern.matcher(logName); if (!mat.find()) { throw new DescriptorParseException( - "Log file name doesn't comply to standard: " + descriptorFile); + "Log file name doesn't comply to standard: " + logName); } this.fileType = FileType.findType(mat.group(1).toUpperCase()); if (FileType.PLAIN == this.fileType) { @@ -80,7 +81,7 @@ public abstract class LogDescriptorImpl } } catch (Exception ex) { throw new DescriptorParseException("Cannot parse file " - + descriptorFile.getName(), ex); + + logName + " from file " + descriptorFile.getName(), ex); } }
@@ -112,13 +113,13 @@ public abstract class LogDescriptorImpl * @since 2.2.0 */ public static List<Descriptor> parse(byte[] logBytes, - File descriptorFile) throws DescriptorParseException { - if (descriptorFile.getName().contains(InternalWebServerAccessLog.MARKER)) { + File descriptorFile, String logName) throws DescriptorParseException { + if (logName.contains(InternalWebServerAccessLog.MARKER)) { return Arrays.asList(new Descriptor[]{ - new WebServerAccessLogImpl(logBytes, descriptorFile)}); + new WebServerAccessLogImpl(logBytes, descriptorFile, logName)}); } else { - throw new DescriptorParseException("Cannot parse file " - + descriptorFile.getName()); + throw new DescriptorParseException("Cannot parse file " + logName + + " from file " + descriptorFile.getName()); } }
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java index 3666d5d..ffaef7b 100644 --- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java +++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java @@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; +import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -65,28 +66,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl * The immediate parent name is taken to be the physical host collecting the * logs.</p> */ - protected WebServerAccessLogImpl(byte[] logBytes, File file) + protected WebServerAccessLogImpl(byte[] logBytes, File file, String logName) throws DescriptorParseException { - this(logBytes, file, FileType.XZ); + this(logBytes, file, logName, FileType.XZ); }
/** For internal use only. */ public WebServerAccessLogImpl(byte[] bytes, String filename, boolean validate) throws DescriptorParseException { - this(bytes, new File(filename), FileType.XZ, validate); + this(bytes, null, filename, FileType.XZ, validate); }
- private WebServerAccessLogImpl(byte[] logBytes, File file, + /** For internal use only. */ + public WebServerAccessLogImpl(byte[] bytes, File sourceFile, String filename, + boolean validate) throws DescriptorParseException { + this(bytes, sourceFile, filename, FileType.XZ, validate); + } + + private WebServerAccessLogImpl(byte[] logBytes, File file, String logName, FileType defaultCompression) throws DescriptorParseException { - this(logBytes, file, defaultCompression, true); + this(logBytes, file, logName, defaultCompression, true); }
- private WebServerAccessLogImpl(byte[] logBytes, File file, + private WebServerAccessLogImpl(byte[] logBytes, File file, String logName, FileType defaultCompression, boolean validate) throws DescriptorParseException { - super(logBytes, file, defaultCompression); + super(logBytes, file, logName, defaultCompression); try { - String fn = file.toPath().getFileName().toString(); + String fn = Paths.get(logName).getFileName().toString(); Matcher mat = filenamePattern.matcher(fn); if (!mat.find()) { throw new DescriptorParseException( @@ -110,7 +117,7 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl throw dpe; // escalate } catch (Exception pe) { throw new DescriptorParseException( - "Cannot parse WebServerAccessLog file: " + file, pe); + "Cannot parse WebServerAccessLog file: " + logName, pe); } }
diff --git a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java index d9dd178..70594d8 100644 --- a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java +++ b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java @@ -4,9 +4,13 @@ package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue;
import org.torproject.descriptor.Descriptor; import org.torproject.descriptor.DescriptorParseException; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.WebServerAccessLog;
import org.junit.Rule; import org.junit.Test; @@ -48,6 +52,20 @@ public class DescriptorParserImplTest { }
@Test + public void testParseDescriptorTar() throws Exception { + DescriptorReader dr = DescriptorSourceFactory.createDescriptorReader(); + int count = 0; + for (Descriptor desc : dr.readDescriptors(new File(getClass() + .getClassLoader().getResource("webstats-2015-02.tar").toURI()))) { + assertTrue(desc instanceof WebServerAccessLog); + assertTrue(desc.getDescriptorFile().toString() + .endsWith("webstats-2015-02.tar")); + count++; + } + assertEquals(7, count); + } + + @Test public void testParseDescriptor() throws DescriptorParseException { Constructor<? extends DescriptorImpl> constructor; try { diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java index 0ff3e62..e76566a 100644 --- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java +++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java @@ -149,8 +149,8 @@ public class LogDescriptorTest { File invalidFile = new File(this.reader.getParsedFiles().firstKey() .replace("access", "-")); List<Descriptor> descs = new ArrayList<>(); - for (Descriptor desc // note: only 'invalidFile' is used by LogDescriptor - : dp.parseDescriptors(new byte[]{}, invalidFile, logFile.getName())) { + for (Descriptor desc // note: only 'invalidFile' is used by DescriptorReader + : dp.parseDescriptors(new byte[]{}, logFile, invalidFile.getName())) { descs.add(desc); } assertTrue(dataUsed() + "\nWrong type: " diff --git a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java index 3e98f13..b64c4df 100644 --- a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java +++ b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java @@ -79,7 +79,7 @@ public class WebServerAccessLogTest { @Test public void testValidation() throws Exception { WebServerAccessLogImpl wsal - = new WebServerAccessLogImpl(real.getBytes(), file); + = new WebServerAccessLogImpl(real.getBytes(), file, fn); wsal.validate(); if (valid) { assertEquals(0, wsal.getUnrecognizedLines().size()); diff --git a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java index 66c8f54..c5cf6e5 100644 --- a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java +++ b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java @@ -14,7 +14,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException;
-import java.io.File; import java.util.Arrays; import java.util.stream.Collectors;
@@ -31,8 +30,7 @@ public class WebServerModuleTest { thrown.expectMessage(Matchers .containsString("Cannot parse WebServerAccessLog file: " + filename)); - new WebServerAccessLogImpl(new byte[0], - new File(filename)); + new WebServerAccessLogImpl(new byte[0], null, filename); }
@Test @@ -42,8 +40,7 @@ public class WebServerModuleTest { thrown.expectMessage(Matchers .containsString("Cannot parse WebServerAccessLog file: " + filename)); - new WebServerAccessLogImpl(new byte[0], - new File(filename)); + new WebServerAccessLogImpl(new byte[0], null, filename); }
@Test @@ -53,8 +50,7 @@ public class WebServerModuleTest { thrown.expectMessage(Matchers .containsString("WebServerAccessLog " + "file name doesn't comply to standard: " + filename)); - new WebServerAccessLogImpl(new byte[0], - new File(filename)); + new WebServerAccessLogImpl(new byte[0], null, filename); }
@Test @@ -64,8 +60,7 @@ public class WebServerModuleTest { thrown.expectMessage(Matchers .containsString("WebServerAccessLog " + "file name doesn't comply to standard: " + filename)); - new WebServerAccessLogImpl(new byte[0], - new File(filename)); + new WebServerAccessLogImpl(new byte[0], null, filename); }
@Test @@ -75,8 +70,7 @@ public class WebServerModuleTest { thrown.expectMessage(Matchers .containsString("WebServerAccessLog " + "file name doesn't comply to standard: " + filename)); - new WebServerAccessLogImpl(new byte[0], - new File(filename)); + new WebServerAccessLogImpl(new byte[0], null, filename); }
private static String[] logLines = { @@ -103,7 +97,7 @@ public class WebServerModuleTest { @Test public void testBasics() throws Exception { WebServerAccessLogImpl wsal = new WebServerAccessLogImpl(logText.getBytes(), - new File("vhost_host7_access.log_20170530")); + null, "vhost_host7_access.log_20170530"); assertEquals(wsal.getAnnotations().size(), 0); assertEquals(logText, new String(FileType.XZ.decompress(wsal.getRawDescriptorBytes()))); diff --git a/src/test/resources/webstats-2015-02.tar b/src/test/resources/webstats-2015-02.tar new file mode 100644 index 0000000..ffb3fc7 Binary files /dev/null and b/src/test/resources/webstats-2015-02.tar differ
tor-commits@lists.torproject.org