[tor-commits] [metrics-lib/release] Use the descriptor file name for determining descriptor type.

karsten at torproject.org karsten at torproject.org
Wed Apr 18 14:35:37 UTC 2018


commit 7b859f8dc21c801f0bf707ceac6703f84c056497
Author: iwakeh <iwakeh at torproject.org>
Date:   Tue Mar 27 07:25:37 2018 +0000

    Use the descriptor file name for determining descriptor type.
    
    Initial solution suggested by Karsten.
    In addition, add a test that would fail without this patch
    and adapt old test to expect the correct behavior.
    
    Also provide the origin of the descriptor with getDescriptorFile.
    Include a test for this functionality in log descriptors.
    Provide proper file name to log descriptor classes, because this
    filename contains log metadata for storing log files properly.
    
    Solves task-25332.
---
 CHANGELOG.md                                       |   3 +++
 .../descriptor/impl/DescriptorParserImpl.java      |   4 ++--
 .../descriptor/log/LogDescriptorImpl.java          |  19 ++++++++--------
 .../descriptor/log/WebServerAccessLogImpl.java     |  25 +++++++++++++--------
 .../descriptor/impl/DescriptorParserImplTest.java  |  18 +++++++++++++++
 .../descriptor/log/LogDescriptorTest.java          |   4 ++--
 .../descriptor/log/WebServerAccessLogTest.java     |   2 +-
 .../descriptor/log/WebServerModuleTest.java        |  18 +++++----------
 src/test/resources/webstats-2015-02.tar            | Bin 0 -> 20480 bytes
 9 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d64d58d..5edc519 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@
    - Replace ServerDescriptor#getHiddenServiceDirVersions with
      ServerDescriptor#isHiddenServiceDir, because Tor has never
      supported versions in the hidden-service-dir descriptor line.
+   - Add support for reading web server logs contained in tarballs by
+     providing file names of both the tarball and of contained files
+     to WebServerAccessLog.
 
  * Minor changes
    - Override logLines() method from LogDescriptor in
diff --git a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
index be9041f..713170e 100644
--- a/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
+++ b/src/main/java/org/torproject/descriptor/impl/DescriptorParserImpl.java
@@ -130,8 +130,8 @@ public class DescriptorParserImpl implements DescriptorParser {
     } else if (firstLines.startsWith("@type torperf 1.")) {
       return TorperfResultImpl.parseTorperfResults(rawDescriptorBytes,
           sourceFile);
-    } else if (sourceFile.getName().contains(LogDescriptorImpl.MARKER)) {
-      return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile);
+    } else if (fileName.contains(LogDescriptorImpl.MARKER)) {
+      return LogDescriptorImpl.parse(rawDescriptorBytes, sourceFile, fileName);
     } else {
       throw new DescriptorParseException("Could not detect descriptor "
           + "type in descriptor starting with '" + firstLines + "'.");
diff --git a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
index 3583d26..b730465 100644
--- a/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/LogDescriptorImpl.java
@@ -64,14 +64,15 @@ public abstract class LogDescriptorImpl
    * @since 2.2.0
    */
   protected LogDescriptorImpl(byte[] logBytes, File descriptorFile,
-       FileType defaultCompression) throws DescriptorParseException {
+       String logName, FileType defaultCompression)
+       throws DescriptorParseException {
     this.logBytes = logBytes;
     this.descriptorFile = descriptorFile;
     try {
-      Matcher mat = filenamePattern.matcher(descriptorFile.getName());
+      Matcher mat = filenamePattern.matcher(logName);
       if (!mat.find()) {
         throw new DescriptorParseException(
-            "Log file name doesn't comply to standard: " + descriptorFile);
+            "Log file name doesn't comply to standard: " + logName);
       }
       this.fileType = FileType.findType(mat.group(1).toUpperCase());
       if (FileType.PLAIN == this.fileType) {
@@ -80,7 +81,7 @@ public abstract class LogDescriptorImpl
       }
     } catch (Exception ex) {
       throw new DescriptorParseException("Cannot parse file "
-          + descriptorFile.getName(), ex);
+          + logName + " from file " + descriptorFile.getName(), ex);
     }
   }
 
@@ -112,13 +113,13 @@ public abstract class LogDescriptorImpl
    * @since 2.2.0
    */
   public static List<Descriptor> parse(byte[] logBytes,
-      File descriptorFile) throws DescriptorParseException {
-    if (descriptorFile.getName().contains(InternalWebServerAccessLog.MARKER)) {
+      File descriptorFile, String logName) throws DescriptorParseException {
+    if (logName.contains(InternalWebServerAccessLog.MARKER)) {
       return Arrays.asList(new Descriptor[]{
-          new WebServerAccessLogImpl(logBytes, descriptorFile)});
+          new WebServerAccessLogImpl(logBytes, descriptorFile, logName)});
     } else {
-      throw new DescriptorParseException("Cannot parse file "
-          + descriptorFile.getName());
+      throw new DescriptorParseException("Cannot parse file " + logName
+          + " from file " + descriptorFile.getName());
     }
   }
 
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index 3666d5d..ffaef7b 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStreamReader;
+import java.nio.file.Paths;
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
@@ -65,28 +66,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
    * The immediate parent name is taken to be the physical host collecting the
    * logs.</p>
    */
-  protected WebServerAccessLogImpl(byte[] logBytes, File file)
+  protected WebServerAccessLogImpl(byte[] logBytes, File file, String logName)
       throws DescriptorParseException {
-    this(logBytes, file, FileType.XZ);
+    this(logBytes, file, logName, FileType.XZ);
   }
 
   /** For internal use only. */
   public WebServerAccessLogImpl(byte[] bytes, String filename,
       boolean validate) throws DescriptorParseException {
-    this(bytes, new File(filename), FileType.XZ, validate);
+    this(bytes, null, filename, FileType.XZ, validate);
   }
 
-  private WebServerAccessLogImpl(byte[] logBytes, File file,
+  /** For internal use only. */
+  public WebServerAccessLogImpl(byte[] bytes, File sourceFile, String filename,
+      boolean validate) throws DescriptorParseException {
+    this(bytes, sourceFile, filename, FileType.XZ, validate);
+  }
+
+  private WebServerAccessLogImpl(byte[] logBytes, File file, String logName,
       FileType defaultCompression) throws DescriptorParseException {
-    this(logBytes, file, defaultCompression, true);
+    this(logBytes, file, logName, defaultCompression, true);
   }
 
-  private WebServerAccessLogImpl(byte[] logBytes, File file,
+  private WebServerAccessLogImpl(byte[] logBytes, File file, String logName,
       FileType defaultCompression, boolean validate)
       throws DescriptorParseException {
-    super(logBytes, file, defaultCompression);
+    super(logBytes, file, logName, defaultCompression);
     try {
-      String fn = file.toPath().getFileName().toString();
+      String fn = Paths.get(logName).getFileName().toString();
       Matcher mat = filenamePattern.matcher(fn);
       if (!mat.find()) {
         throw new DescriptorParseException(
@@ -110,7 +117,7 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
       throw dpe; // escalate
     } catch (Exception pe) {
       throw new DescriptorParseException(
-          "Cannot parse WebServerAccessLog file: " + file, pe);
+          "Cannot parse WebServerAccessLog file: " + logName, pe);
     }
   }
 
diff --git a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
index d9dd178..70594d8 100644
--- a/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
+++ b/src/test/java/org/torproject/descriptor/impl/DescriptorParserImplTest.java
@@ -4,9 +4,13 @@
 package org.torproject.descriptor.impl;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import org.torproject.descriptor.Descriptor;
 import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.WebServerAccessLog;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -48,6 +52,20 @@ public class DescriptorParserImplTest {
   }
 
   @Test
+  public void testParseDescriptorTar() throws Exception {
+    DescriptorReader dr = DescriptorSourceFactory.createDescriptorReader();
+    int count = 0;
+    for (Descriptor desc : dr.readDescriptors(new File(getClass()
+        .getClassLoader().getResource("webstats-2015-02.tar").toURI()))) {
+      assertTrue(desc instanceof WebServerAccessLog);
+      assertTrue(desc.getDescriptorFile().toString()
+          .endsWith("webstats-2015-02.tar"));
+      count++;
+    }
+    assertEquals(7, count);
+  }
+
+  @Test
   public void testParseDescriptor() throws DescriptorParseException {
     Constructor<? extends DescriptorImpl> constructor;
     try {
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 0ff3e62..e76566a 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -149,8 +149,8 @@ public class LogDescriptorTest {
     File invalidFile = new File(this.reader.getParsedFiles().firstKey()
         .replace("access", "-"));
     List<Descriptor> descs = new ArrayList<>();
-    for (Descriptor desc // note: only 'invalidFile' is used by LogDescriptor
-        : dp.parseDescriptors(new byte[]{}, invalidFile, logFile.getName())) {
+    for (Descriptor desc // note: only 'invalidFile' is used by DescriptorReader
+        : dp.parseDescriptors(new byte[]{}, logFile, invalidFile.getName())) {
       descs.add(desc);
     }
     assertTrue(dataUsed() + "\nWrong type: "
diff --git a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
index 3e98f13..b64c4df 100644
--- a/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
+++ b/src/test/java/org/torproject/descriptor/log/WebServerAccessLogTest.java
@@ -79,7 +79,7 @@ public class WebServerAccessLogTest {
   @Test
   public void testValidation() throws Exception {
     WebServerAccessLogImpl wsal
-        = new WebServerAccessLogImpl(real.getBytes(), file);
+        = new WebServerAccessLogImpl(real.getBytes(), file, fn);
     wsal.validate();
     if (valid) {
       assertEquals(0, wsal.getUnrecognizedLines().size());
diff --git a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
index 66c8f54..c5cf6e5 100644
--- a/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
+++ b/src/test/java/org/torproject/descriptor/log/WebServerModuleTest.java
@@ -14,7 +14,6 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
-import java.io.File;
 import java.util.Arrays;
 import java.util.stream.Collectors;
 
@@ -31,8 +30,7 @@ public class WebServerModuleTest {
     thrown.expectMessage(Matchers
          .containsString("Cannot parse WebServerAccessLog file: "
          + filename));
-    new WebServerAccessLogImpl(new byte[0],
-        new File(filename));
+    new WebServerAccessLogImpl(new byte[0], null, filename);
   }
 
   @Test
@@ -42,8 +40,7 @@ public class WebServerModuleTest {
     thrown.expectMessage(Matchers
          .containsString("Cannot parse WebServerAccessLog file: "
          + filename));
-    new WebServerAccessLogImpl(new byte[0],
-        new File(filename));
+    new WebServerAccessLogImpl(new byte[0], null, filename);
   }
 
   @Test
@@ -53,8 +50,7 @@ public class WebServerModuleTest {
     thrown.expectMessage(Matchers
          .containsString("WebServerAccessLog "
          + "file name doesn't comply to standard: " + filename));
-    new WebServerAccessLogImpl(new byte[0],
-        new File(filename));
+    new WebServerAccessLogImpl(new byte[0], null, filename);
   }
 
   @Test
@@ -64,8 +60,7 @@ public class WebServerModuleTest {
     thrown.expectMessage(Matchers
          .containsString("WebServerAccessLog "
          + "file name doesn't comply to standard: " + filename));
-    new WebServerAccessLogImpl(new byte[0],
-        new File(filename));
+    new WebServerAccessLogImpl(new byte[0], null, filename);
   }
 
   @Test
@@ -75,8 +70,7 @@ public class WebServerModuleTest {
     thrown.expectMessage(Matchers
          .containsString("WebServerAccessLog "
          + "file name doesn't comply to standard: " + filename));
-    new WebServerAccessLogImpl(new byte[0],
-        new File(filename));
+    new WebServerAccessLogImpl(new byte[0], null, filename);
   }
 
   private static String[] logLines = {
@@ -103,7 +97,7 @@ public class WebServerModuleTest {
   @Test
   public void testBasics() throws Exception {
     WebServerAccessLogImpl wsal = new WebServerAccessLogImpl(logText.getBytes(),
-        new File("vhost_host7_access.log_20170530"));
+        null, "vhost_host7_access.log_20170530");
     assertEquals(wsal.getAnnotations().size(), 0);
     assertEquals(logText,
         new String(FileType.XZ.decompress(wsal.getRawDescriptorBytes())));
diff --git a/src/test/resources/webstats-2015-02.tar b/src/test/resources/webstats-2015-02.tar
new file mode 100644
index 0000000..ffb3fc7
Binary files /dev/null and b/src/test/resources/webstats-2015-02.tar differ





More information about the tor-commits mailing list