[or-cvs] [ernie/master] Handle non-ASCII char craziness in descriptors.

karsten at torproject.org karsten at torproject.org
Thu Feb 25 13:53:19 UTC 2010


Author: Karsten Loesing <karsten.loesing at gmx.net>
Date: Thu, 25 Feb 2010 14:52:38 +0100
Subject: Handle non-ASCII char craziness in descriptors.
Commit: 498e096add421fe5139388c71a41bf786db86062

---
 src/ArchiveWriter.java               |   80 ++++++++++++++++------------------
 src/CachedRelayDescriptorReader.java |   79 ++++++++++++++++++++-------------
 src/RelayDescriptorDownloader.java   |   80 ++++++++++++++++++++++++++--------
 3 files changed, 146 insertions(+), 93 deletions(-)

diff --git a/src/ArchiveWriter.java b/src/ArchiveWriter.java
index 996db2e..7fad326 100644
--- a/src/ArchiveWriter.java
+++ b/src/ArchiveWriter.java
@@ -153,8 +153,9 @@ public class ArchiveWriter {
       }
     }
   }
-  public void store(BufferedReader br) throws IOException,
-      ParseException {
+  public void store(byte[] data) throws IOException, ParseException {
+    BufferedReader br = new BufferedReader(new StringReader(new String(
+        data, "US-ASCII")));
     String line = br.readLine();
     if (line == null) {
       this.logger.warning("Someone gave us an empty file for storing!");
@@ -220,23 +221,24 @@ public class ArchiveWriter {
           String publishedTime = line.split(" ")[4] + " "
               + line.split(" ")[5];
           long published = parseFormat.parse(publishedTime).getTime();
-          String digest = Hex.encodeHexString(Base64.decodeBase64(
+          String serverDesc = Hex.encodeHexString(Base64.decodeBase64(
               line.split(" ")[3] + "=")).toLowerCase();
           // TODO are 24 hours okay?
           if (published + 24L * 60L * 60L * 1000L > now &&
               !new File("directory-archive/server-descriptor/"
               + descriptorFormat.format(new Date(published))
-              + digest.substring(0, 1) + "/" + digest.substring(1, 2)
-              + "/" + digest).exists()) {
-            if (!this.missingDescriptors.contains("server," + digest + ","
-                + publishedTime)) {
+              + serverDesc.substring(0, 1) + "/"
+              + serverDesc.substring(1, 2)
+              + "/" + serverDesc).exists()) {
+            if (!this.missingDescriptors.contains("server," + serverDesc
+                + "," + publishedTime)) {
               this.logger.fine("Adding server descriptor to missing list: "
-                  + "digest=" + digest
+                  + "digest=" + serverDesc
                   + ", filename=directory-archive/server-descriptor/"
                   + descriptorFormat.format(new Date(published))
-                  + digest.substring(0, 1) + "/" + digest.substring(1, 2)
-                  + "/" + digest);
-              this.missingDescriptors.add("server," + digest + ","
+                  + serverDesc.substring(0, 1) + "/"
+                  + serverDesc.substring(1, 2) + "/" + serverDesc);
+              this.missingDescriptors.add("server," + serverDesc + ","
                   + publishedTime);
               this.archiveWriterParseHistoryModified = true;
             }
@@ -254,15 +256,10 @@ public class ArchiveWriter {
               + validAfterTime + ", filename=directory-archive/consensus/"
               + printFormat.format(new Date(validAfter)) + "-consensus");
           consensusFile.getParentFile().mkdirs();
-          BufferedReader br2 = new BufferedReader(new StringReader(
-              sb.toString()));
-          BufferedWriter bw = new BufferedWriter(new FileWriter(
-              consensusFile));
-          while ((line = br2.readLine()) != null) {
-              bw.write(line + "\n");
-          }
-          bw.close();
-          br2.close();
+          BufferedOutputStream bos = new BufferedOutputStream(
+              new FileOutputStream(consensusFile));
+          bos.write(data, 0, data.length);
+          bos.close();
           this.logger.fine("Removing consensus from missing list: "
               + "valid-after=" + validAfterTime
               + ", filename=directory-archive/consensus/"
@@ -288,15 +285,10 @@ public class ArchiveWriter {
               + printFormat.format(new Date(validAfter)) + "-vote-"
               + fingerprint);
           voteFile.getParentFile().mkdirs();
-          BufferedReader br2 = new BufferedReader(new StringReader(
-              sb.toString()));
-          BufferedWriter bw = new BufferedWriter(new FileWriter(
-              voteFile));
-          while ((line = br2.readLine()) != null) {
-              bw.write(line + "\n");
-          }
-          bw.close();
-          br2.close();
+          BufferedOutputStream bos = new BufferedOutputStream(
+              new FileOutputStream(voteFile));
+          bos.write(data, 0, data.length);
+          bos.close();
           this.logger.fine("Removing vote from missing list: "
               + "fingerprint=" + fingerprint + ", valid-after="
               + printFormat.format(new Date(validAfter))
@@ -320,9 +312,7 @@ public class ArchiveWriter {
       boolean isServerDescriptor = line.startsWith("router ");
       String publishedTime = null;
       long published = -1L;
-      String digest = null;
       while ((line = br.readLine()) != null) {
-        sb.append(line + "\n");
         if (line.startsWith("published ")) {
           publishedTime = line.substring("published ".length());
           published = parseFormat.parse(publishedTime).getTime();
@@ -353,10 +343,21 @@ public class ArchiveWriter {
               this.archiveWriterParseHistoryModified = true;
             }
           }
-        } else if (line.equals("router-signature")) {
-          digest = DigestUtils.shaHex(sb.toString()).toLowerCase();
         }
       }
+      String ascii = new String(data, "US-ASCII");
+      String startToken = isServerDescriptor ?
+          "router " : "extra-info ";
+      String sigToken = "\nrouter-signature\n";
+      int start = ascii.indexOf(startToken);
+      int sig = ascii.indexOf(sigToken) + sigToken.length();
+      if (start < 0 || sig < 0 || sig < start) {
+        this.logger.info("Cannot determine descriptor digest! Skipping.");
+        return;
+      }
+      byte[] forDigest = new byte[sig - start];
+      System.arraycopy(data, start, forDigest, 0, sig - start);
+      String digest = DigestUtils.shaHex(forDigest);
       SimpleDateFormat printFormat = new SimpleDateFormat("yyyy/MM/");
       printFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
       File descriptorFile = new File("directory-archive/"
@@ -373,15 +374,10 @@ public class ArchiveWriter {
             + digest.substring(0, 1) + "/" + digest.substring(1, 2)
             + "/" + digest);
         descriptorFile.getParentFile().mkdirs();
-        BufferedReader br2 = new BufferedReader(new StringReader(
-            sb.toString()));
-        BufferedWriter bw = new BufferedWriter(new FileWriter(
-            descriptorFile));
-        while ((line = br2.readLine()) != null) {
-          bw.write(line + "\n");
-        }
-        bw.close();
-        br2.close();
+        BufferedOutputStream bos = new BufferedOutputStream(
+            new FileOutputStream(descriptorFile));
+        bos.write(data, 0, data.length);
+        bos.close();
         this.logger.fine("Removing " + (isServerDescriptor ?
             "server descriptor" : "extra-info descriptor")
             + " from missing list: digest=" + digest
diff --git a/src/CachedRelayDescriptorReader.java b/src/CachedRelayDescriptorReader.java
index d865b5c..113f738 100644
--- a/src/CachedRelayDescriptorReader.java
+++ b/src/CachedRelayDescriptorReader.java
@@ -22,10 +22,22 @@ public class CachedRelayDescriptorReader {
       }
       for (File f : cachedDescDir.listFiles()) {
         try {
+          // descriptors may contain non-ASCII chars; read as bytes to
+          // determine digests
+          BufferedInputStream bis =
+              new BufferedInputStream(new FileInputStream(f));
+          ByteArrayOutputStream baos = new ByteArrayOutputStream();
+          int len;
+          byte[] data = new byte[1024];
+          while ((len = bis.read(data, 0, 1024)) >= 0) {
+            baos.write(data, 0, len);
+          }
+          bis.close();
+          byte[] allData = baos.toByteArray();
           if (f.getName().equals("cached-consensus")) {
             BufferedReader br = new BufferedReader(new FileReader(f));
             if (aw != null) {
-              aw.store(br);
+              aw.store(allData);
             }
             br.close();
             br = new BufferedReader(new FileReader(f));
@@ -35,41 +47,44 @@ public class CachedRelayDescriptorReader {
             br.close();
           } else if (f.getName().startsWith("cached-descriptors") ||
               f.getName().startsWith("cached-extrainfo")) {
-            BufferedReader br = new BufferedReader(new FileReader(f));
-            String line = null;
-            StringBuilder sb = new StringBuilder();
-            while ((line = br.readLine()) != null || sb != null) {
-              if (line == null && sb.length() < 1) {
-                continue; // empty file?
+            String ascii = new String(allData, "US-ASCII");
+            int start = -1, sig = -1, end = -1;
+            String startToken =
+                f.getName().startsWith("cached-descriptors") ?
+                "router " : "extra-info ";
+            String sigToken = "\nrouter-signature\n";
+            String endToken = "\n-----END SIGNATURE-----\n";
+            while (end < ascii.length()) {
+              start = ascii.indexOf(startToken, end);
+              if (start < 0) {
+                break;
+              }
+              sig = ascii.indexOf(sigToken, start)
+                  + sigToken.length();
+              if (sig < 0) {
+                break;
               }
-              if (line == null || line.startsWith("router ") ||
-                  line.startsWith("extra-info ")) {
-                if (sb.length() > 0) {
-                  BufferedReader storeBr = new BufferedReader(
-                      new StringReader(sb.toString()));
-                  if (aw != null) {
-                    aw.store(storeBr);
-                  }
-                  storeBr.close();
-                  storeBr = new BufferedReader(
-                      new StringReader(sb.toString()));
-                  if (rdp != null) {
-                    rdp.parse(storeBr);
-                  }
-                  storeBr.close();
-                }
-                if (line == null) {
-                  sb = null;
-                  break;
-                } else {
-                  sb = new StringBuilder();
-                }
+              end = ascii.indexOf(endToken, sig)
+                  + endToken.length();
+              if (end < 0) {
+                break;
               }
-              if (!line.startsWith("@")) {
-                sb.append(line + "\n");
+              String desc = ascii.substring(start, end);
+              byte[] forDigest = new byte[sig - start];
+              System.arraycopy(allData, start, forDigest, 0, sig - start);
+              String digest = DigestUtils.shaHex(forDigest);
+              byte[] descBytes = new byte[end - start];
+              System.arraycopy(allData, start, descBytes, 0, end - start);
+              if (aw != null) {
+                aw.store(descBytes);
+              }
+              if (rdp != null) {
+                BufferedReader storeBr = new BufferedReader(
+                    new StringReader(desc));
+                rdp.parse(storeBr);
+                storeBr.close();
               }
             }
-            br.close();
             logger.info("Finished reading cacheddesc/ directory.");
           }
         } catch (IOException e) {
diff --git a/src/RelayDescriptorDownloader.java b/src/RelayDescriptorDownloader.java
index b0f270d..c9e5ceb 100644
--- a/src/RelayDescriptorDownloader.java
+++ b/src/RelayDescriptorDownloader.java
@@ -3,6 +3,7 @@ import java.net.*;
 import java.text.*;
 import java.util.*;
 import java.util.logging.*;
+import org.apache.commons.codec.digest.*;
 
 /**
  * Download the current consensus and relevant extra-info descriptors and
@@ -33,9 +34,12 @@ public class RelayDescriptorDownloader {
         urls.addAll(aw.getMissingDescriptorUrls());
       }
       urls.removeAll(downloaded);
+      SortedSet<String> sortedAuthorities =
+          new TreeSet<String>(remainingAuthorities);
       SortedSet<String> sortedUrls = new TreeSet<String>(urls);
-      while (!remainingAuthorities.isEmpty() && !sortedUrls.isEmpty()) {
-        String authority = remainingAuthorities.get(0);
+      SortedSet<String> retryUrls = new TreeSet<String>();
+      while (!sortedAuthorities.isEmpty() && !sortedUrls.isEmpty()) {
+        String authority = sortedAuthorities.first();
         String url = sortedUrls.first();
         try {
           URL u = new URL("http://" + authority + url);
@@ -49,35 +53,73 @@ public class RelayDescriptorDownloader {
           if (response == 200) {
             BufferedInputStream in = new BufferedInputStream(
                 huc.getInputStream());
-            StringBuilder sb = new StringBuilder();
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
             int len;
             byte[] data = new byte[1024];
             while ((len = in.read(data, 0, 1024)) >= 0) {
-              sb.append(new String(data, 0, len));
+              // we need to write the result to a byte array in order
+              // to get a sane digest; otherwise, descriptors with
+              // non-ASCII chars lead to different digests.
+              baos.write(data, 0, len);
             }
             in.close();
-            String result = sb.toString();
-            if (rdp != null) {
-              BufferedReader br = new BufferedReader(new StringReader(
-                  result));
-              rdp.parse(br);
-              br.close();
+            String digest = null;
+            byte[] allData = baos.toByteArray();
+            int beforeSig = new String(allData).indexOf(
+                "\nrouter-signature\n")
+                + "\nrouter-signature\n".length();
+            byte[] noSig = new byte[beforeSig];
+            System.arraycopy(allData, 0, noSig, 0, beforeSig);
+            digest = DigestUtils.shaHex(noSig);
+            // TODO UTF-8 may be wrong, but we don't care about the fields
+            // containing non-ASCII
+            String result = new String(allData, "UTF-8");
+            boolean verified = false;
+            if (url.contains("/tor/server/d/") ||
+                url.contains("/tor/extra/d/")) {
+              if (url.endsWith(digest)) {
+                verified = true;
+              } else {
+                logger.info("Downloaded descriptor digest (" + digest
+                    + " doesn't match what we asked for (" + url + ")! "
+                    + "Retrying.");
+                retryUrls.add(url);
+              }
+            } else {
+              verified = true;
+              // TODO verify downloaded consensuses and votes, too
             }
-            if (aw != null) {
-              BufferedReader br = new BufferedReader(new StringReader(
-                  result));
-              try {
-                aw.store(br);
-              } catch (Exception e) {
-                e.printStackTrace();
-                //TODO find better way to handle this
+            if (verified) {
+              if (rdp != null) {
+                BufferedReader br = new BufferedReader(new StringReader(
+                    result));
+                rdp.parse(br);
+                br.close();
+              }
+              if (aw != null) {
+                BufferedReader br = new BufferedReader(new StringReader(
+                    result));
+                try {
+                  aw.store(allData);
+                } catch (Exception e) {
+                  e.printStackTrace();
+                  //TODO find better way to handle this
+                }
+                br.close();
               }
-              br.close();
             }
+          } else {
+            retryUrls.add(url);
           }
           sortedUrls.remove(url);
+          if (sortedUrls.isEmpty()) {
+            sortedAuthorities.remove(authority);
+            sortedUrls.addAll(retryUrls);
+            retryUrls.clear();
+          }
         } catch (IOException e) {
           remainingAuthorities.remove(authority);
+          sortedAuthorities.remove(authority);
           if (!remainingAuthorities.isEmpty()) {
             logger.log(Level.INFO, "Failed downloading from "
                 + authority + "!", e);
-- 
1.6.5



More information about the tor-commits mailing list