commit 7b859f8dc21c801f0bf707ceac6703f84c056497
Author: iwakeh <iwakeh(a)torproject.org>
Date: Tue Mar 27 07:25:37 2018 +0000
Use the descriptor file name for determining descriptor type.
Initial solution suggested by Karsten.
In addition, add a test that would fail without this patch
and adapt old test to expect the correct behavior.
Also provide the origin of the descriptor with getDescriptorFile.
Include a test for this functionality in log descriptors.
Provide proper file name to log descriptor classes, because this
filename contains log metadata for storing log files properly.
Solves task-25332.
---
CHANGELOG.md | 3 +++
.../descriptor/impl/DescriptorParserImpl.java | 4 ++--
.../descriptor/log/LogDescriptorImpl.java | 19 ++++++++--------
.../descriptor/log/WebServerAccessLogImpl.java | 25 +++++++++++++--------
.../descriptor/impl/DescriptorParserImplTest.java | 18 +++++++++++++++
.../descriptor/log/LogDescriptorTest.java | 4 ++--
.../descriptor/log/WebServerAccessLogTest.java | 2 +-
.../descriptor/log/WebServerModuleTest.java | 18 +++++----------
src/test/resources/webstats-2015-02.tar | Bin 0 -> 20480 bytes
9 files changed, 58 insertions(+), 35 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d64d58d..5edc519 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@
- Replace ServerDescriptor#getHiddenServiceDirVersions with
ServerDescriptor#isHiddenServiceDir, because Tor has never
supported versions in the hidden-service-dir descriptor line.
+ - Add support for reading web server logs contained in tarballs by
+ providing file names of both the tarball and of contained files
+ to WebServerAccessLog.
* Minor changes
- Override logLines() method from LogDescriptor in
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
index be9041f..713170e 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
@@ -130,8 +130,8 @@ public class DescriptorParserImpl implements DescriptorParser {
} else if (firstLines.startsWith("@type torperf 1.")) {
return TorperfResultImpl.parseTorperfResults(rawDescriptorBytes,
sourceFile);
- } else if (sourceFile.getName().contains(LogDescriptorImpl.MARKER)) {
- return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile);
+ } else if (fileName.contains(LogDescriptorImpl.MARKER)) {
+ return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName);
} else {
throw new DescriptorParseException("Could not detect descriptor "
+ "type in descriptor starting with '" + firstLines + "'.");
diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
index 3583d26..b730465 100644
--- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
@@ -64,14 +64,15 @@ public abstract class LogDescriptorImpl
* @since 2.2.0
*/
protected LogDescriptorImpl(byte[] logBytes, File descriptorFile,
- FileType defaultCompression) throws DescriptorParseException {
+ String logName, FileType defaultCompression)
+ throws DescriptorParseException {
this.logBytes = logBytes;
this.descriptorFile = descriptorFile;
try {
- Matcher mat = filenamePattern.matcher(descriptorFile.getName());
+ Matcher mat = filenamePattern.matcher(logName);
if (!mat.find()) {
throw new DescriptorParseException(
- "Log file name doesn't comply to standard: " + descriptorFile);
+ "Log file name doesn't comply to standard: " + logName);
}
this.fileType = FileType.findType(mat.group(1).toUpperCase());
if (FileType.PLAIN == this.fileType) {
@@ -80,7 +81,7 @@ public abstract class LogDescriptorImpl
}
} catch (Exception ex) {
throw new DescriptorParseException("Cannot parse file "
- + descriptorFile.getName(), ex);
+ + logName + " from file " + descriptorFile.getName(), ex);
}
}
@@ -112,13 +113,13 @@ public abstract class LogDescriptorImpl
* @since 2.2.0
*/
public static List<Descriptor> parse(byte[] logBytes,
- File descriptorFile) throws DescriptorParseException {
- if (descriptorFile.getName().contains(InternalWebServerAccessLog.MARKER)) {
+ File descriptorFile, String logName) throws DescriptorParseException {
+ if (logName.contains(InternalWebServerAccessLog.MARKER)) {
return Arrays.asList(new Descriptor[]{
- new WebServerAccessLogImpl(logBytes, descriptorFile)});
+ new WebServerAccessLogImpl(logBytes, descriptorFile, logName)});
} else {
- throw new DescriptorParseException("Cannot parse file "
- + descriptorFile.getName());
+ throw new DescriptorParseException("Cannot parse file " + logName
+ + " from file " + descriptorFile.getName());
}
}
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index 3666d5d..ffaef7b 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
+import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
@@ -65,28 +66,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
* The immediate parent name is taken to be the physical host collecting the
* logs.</p>
*/
- protected WebServerAccessLogImpl(byte[] logBytes, File file)
+ protected WebServerAccessLogImpl(byte[] logBytes, File file, String logName)
throws DescriptorParseException {
- this(logBytes, file, FileType.XZ);
+ this(logBytes, file, logName, FileType.XZ);
}
/** For internal use only. */
public WebServerAccessLogImpl(byte[] bytes, String filename,
boolean validate) throws DescriptorParseException {
- this(bytes, new File(filename), FileType.XZ, validate);
+ this(bytes, null, filename, FileType.XZ, validate);
}
- private WebServerAccessLogImpl(byte[] logBytes, File file,
+ /** For internal use only. */
+ public WebServerAccessLogImpl(byte[] bytes, File sourceFile, String filename,
+ boolean validate) throws DescriptorParseException {
+ this(bytes, sourceFile, filename, FileType.XZ, validate);
+ }
+
+ private WebServerAccessLogImpl(byte[] logBytes, File file, String logName,
FileType defaultCompression) throws DescriptorParseException {
- this(logBytes, file, defaultCompression, true);
+ this(logBytes, file, logName, defaultCompression, true);
}
- private WebServerAccessLogImpl(byte[] logBytes, File file,
+ private WebServerAccessLogImpl(byte[] logBytes, File file, String logName,
FileType defaultCompression, boolean validate)
throws DescriptorParseException {
- super(logBytes, file, defaultCompression);
+ super(logBytes, file, logName, defaultCompression);
try {
- String fn = file.toPath().getFileName().toString();
+ String fn = Paths.get(logName).getFileName().toString();
Matcher mat = filenamePattern.matcher(fn);
if (!mat.find()) {
throw new DescriptorParseException(
@@ -110,7 +117,7 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
throw dpe; // escalate
} catch (Exception pe) {
throw new DescriptorParseException(
- "Cannot parse WebServerAccessLog file: " + file, pe);
+ "Cannot parse WebServerAccessLog file: " + logName, pe);
}
}
diff --git a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
index d9dd178..70594d8 100644
--- a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
+++ b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
@@ -4,9 +4,13 @@
package org.torproject.descriptor.impl;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.WebServerAccessLog;
import org.junit.Rule;
import org.junit.Test;
@@ -48,6 +52,20 @@ public class DescriptorParserImplTest {
}
@Test
+ public void testParseDescriptorTar() throws Exception {
+ DescriptorReader dr = DescriptorSourceFactory.createDescriptorReader();
+ int count = 0;
+ for (Descriptor desc : dr.readDescriptors(new File(getClass()
+ .getClassLoader().getResource("webstats-2015-02.tar").toURI()))) {
+ assertTrue(desc instanceof WebServerAccessLog);
+ assertTrue(desc.getDescriptorFile().toString()
+ .endsWith("webstats-2015-02.tar"));
+ count++;
+ }
+ assertEquals(7, count);
+ }
+
+ @Test
public void testParseDescriptor() throws DescriptorParseException {
Constructor<? extends DescriptorImpl> constructor;
try {
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 0ff3e62..e76566a 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -149,8 +149,8 @@ public class LogDescriptorTest {
File invalidFile = new File(this.reader.getParsedFiles().firstKey()
.replace("access", "-"));
List<Descriptor> descs = new ArrayList<>();
- for (Descriptor desc // note: only 'invalidFile' is used by LogDescriptor
- : dp.parseDescriptors(new byte[]{}, invalidFile, logFile.getName())) {
+ for (Descriptor desc // note: only 'invalidFile' is used by DescriptorReader
+ : dp.parseDescriptors(new byte[]{}, logFile, invalidFile.getName())) {
descs.add(desc);
}
assertTrue(dataUsed() + "\nWrong type: "
diff --git a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
index 3e98f13..b64c4df 100644
--- a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
+++ b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
@@ -79,7 +79,7 @@ public class WebServerAccessLogTest {
@Test
public void testValidation() throws Exception {
WebServerAccessLogImpl wsal
- = new WebServerAccessLogImpl(real.getBytes(), file);
+ = new WebServerAccessLogImpl(real.getBytes(), file, fn);
wsal.validate();
if (valid) {
assertEquals(0, wsal.getUnrecognizedLines().size());
diff --git a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
index 66c8f54..c5cf6e5 100644
--- a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
+++ b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
@@ -14,7 +14,6 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
-import java.io.File;
import java.util.Arrays;
import java.util.stream.Collectors;
@@ -31,8 +30,7 @@ public class WebServerModuleTest {
thrown.expectMessage(Matchers
.containsString("Cannot parse WebServerAccessLog file: "
+ filename));
- new WebServerAccessLogImpl(new byte[0],
- new File(filename));
+ new WebServerAccessLogImpl(new byte[0], null, filename);
}
@Test
@@ -42,8 +40,7 @@ public class WebServerModuleTest {
thrown.expectMessage(Matchers
.containsString("Cannot parse WebServerAccessLog file: "
+ filename));
- new WebServerAccessLogImpl(new byte[0],
- new File(filename));
+ new WebServerAccessLogImpl(new byte[0], null, filename);
}
@Test
@@ -53,8 +50,7 @@ public class WebServerModuleTest {
thrown.expectMessage(Matchers
.containsString("WebServerAccessLog "
+ "file name doesn't comply to standard: " + filename));
- new WebServerAccessLogImpl(new byte[0],
- new File(filename));
+ new WebServerAccessLogImpl(new byte[0], null, filename);
}
@Test
@@ -64,8 +60,7 @@ public class WebServerModuleTest {
thrown.expectMessage(Matchers
.containsString("WebServerAccessLog "
+ "file name doesn't comply to standard: " + filename));
- new WebServerAccessLogImpl(new byte[0],
- new File(filename));
+ new WebServerAccessLogImpl(new byte[0], null, filename);
}
@Test
@@ -75,8 +70,7 @@ public class WebServerModuleTest {
thrown.expectMessage(Matchers
.containsString("WebServerAccessLog "
+ "file name doesn't comply to standard: " + filename));
- new WebServerAccessLogImpl(new byte[0],
- new File(filename));
+ new WebServerAccessLogImpl(new byte[0], null, filename);
}
private static String[] logLines = {
@@ -103,7 +97,7 @@ public class WebServerModuleTest {
@Test
public void testBasics() throws Exception {
WebServerAccessLogImpl wsal = new WebServerAccessLogImpl(logText.getBytes(),
- new File("vhost_host7_access.log_20170530"));
+ null, "vhost_host7_access.log_20170530");
assertEquals(wsal.getAnnotations().size(), 0);
assertEquals(logText,
new String(FileType.XZ.decompress(wsal.getRawDescriptorBytes())));
diff --git a/src/test/resources/webstats-2015-02.tar b/src/test/resources/webstats-2015-02.tar
new file mode 100644
index 0000000..ffb3fc7
Binary files /dev/null and b/src/test/resources/webstats-2015-02.tar differ