[tor-commits] [metrics-web/master] Cache JSON-formatted graph data for performance reasons.

karsten at torproject.org karsten at torproject.org
Tue Jul 31 10:35:10 UTC 2012


commit c433f5c77e0168aa2c8cbde1cea1de37327c88fc
Author: Karsten Loesing <karsten.loesing at gmx.net>
Date:   Tue Jul 31 12:32:40 2012 +0200

    Cache JSON-formatted graph data for performance reasons.
    
    Previously, we were only caching the .csv contents generated by R, but we
    were converting the .csv to .json in each request.  Turns out we can save
    over a second per request here for large .csv files by caching the .json
    contents.  Let's do it.
---
 src/org/torproject/ernie/web/CsvServlet.java       |   10 ++-
 src/org/torproject/ernie/web/GraphDataServlet.java |   62 +++++++++++-----
 src/org/torproject/ernie/web/RObject.java          |   11 ++-
 src/org/torproject/ernie/web/RObjectGenerator.java |   74 +++++---------------
 4 files changed, 76 insertions(+), 81 deletions(-)

diff --git a/src/org/torproject/ernie/web/CsvServlet.java b/src/org/torproject/ernie/web/CsvServlet.java
index 5e170e3..6fdcaac 100644
--- a/src/org/torproject/ernie/web/CsvServlet.java
+++ b/src/org/torproject/ernie/web/CsvServlet.java
@@ -2,7 +2,10 @@
  * See LICENSE for licensing information */
 package org.torproject.ernie.web;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.util.SortedSet;
 import java.util.logging.Logger;
 
@@ -75,16 +78,17 @@ public class CsvServlet extends HttpServlet {
 
     /* Request CSV file from R object generator, which asks Rserve to
      * generate it. */
-    String csvFileContent = this.rObjectGenerator.generateCsv(
+    RObject csvFile = this.rObjectGenerator.generateCsv(
         requestedCsvFile, true);
 
-    /* Make sure that we have a graph to return. */
-    if (csvFileContent == null) {
+    /* Make sure that we have a .csv file to return. */
+    if (csvFile == null) {
       response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
       return;
     }
 
     /* Write CSV file to response. */
+    String csvFileContent = new String(csvFile.getBytes());
     response.setContentType("text/csv");
     response.setHeader("Content-Length", String.valueOf(
         csvFileContent.length()));
diff --git a/src/org/torproject/ernie/web/GraphDataServlet.java b/src/org/torproject/ernie/web/GraphDataServlet.java
index 209aefe..5b79c1b 100644
--- a/src/org/torproject/ernie/web/GraphDataServlet.java
+++ b/src/org/torproject/ernie/web/GraphDataServlet.java
@@ -7,6 +7,7 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.SortedSet;
@@ -112,16 +113,49 @@ public class GraphDataServlet extends HttpServlet {
 
     /* Request CSV file from R object generator, which may ask Rserve to
      * generate it. */
-    String csvFileContent = this.rObjectGenerator.generateCsv(
-        requestedCsvFile, true);
+    RObject csvFile = this.rObjectGenerator.generateCsv(requestedCsvFile,
+        true);
 
     /* Make sure that we have a CSV to convert into JSON. */
-    if (csvFileContent == null) {
+    if (csvFile == null) {
       response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
       return;
     }
 
-    /* Convert CSV to JSON format. */
+    /* Look up if we converted this CSV to JSON format before.  If not,
+     * convert it now. */
+    String jsonString;
+    if (!this.lastConvertedCsvFile.containsKey(requestedJsonFile) ||
+        this.lastConvertedCsvFile.get(requestedJsonFile) <
+        csvFile.getLastModified()) {
+      jsonString = this.convertCsvToJson(requestedJsonFile,
+          new String(csvFile.getBytes()));
+      this.lastConvertedCsvFile.put(requestedJsonFile,
+          csvFile.getLastModified());
+      this.convertedCsvFiles.put(requestedJsonFile, jsonString);
+    } else {
+      jsonString = this.convertedCsvFiles.get(requestedJsonFile);
+    }
+
+    /* Make sure we have a JSON string to return. */
+    if (jsonString == null) {
+      response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      return;
+    }
+
+    /* Write JSON string to response. */
+    response.setHeader("Access-Control-Allow-Origin", "*");
+    response.setContentType("application/json");
+    response.setCharacterEncoding("utf-8");
+    response.getWriter().print(jsonString);
+  }
+
+  private Map<String, Long> lastConvertedCsvFile =
+      new HashMap<String, Long>();
+  private Map<String, String> convertedCsvFiles =
+      new HashMap<String, String>();
+  private String convertCsvToJson(String requestedJsonFile,
+      String csvFileContent) {
     String jsonString = null;
     try {
       BufferedReader br = new BufferedReader(new StringReader(
@@ -148,17 +182,14 @@ public class GraphDataServlet extends HttpServlet {
         }
       }
       if (columns == null || dateCol < 0 || valueCols.isEmpty()) {
-        response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
-        return;
+        return null;
       }
       SortedMap<String, SortedSet<String>> graphs =
           new TreeMap<String, SortedSet<String>>();
       while ((line = br.readLine()) != null) {
         String[] elements = line.split(",");
         if (elements.length != columns.length) {
-          response.sendError(
-              HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
-          return;
+          return null;
         }
         String date = elements[dateCol];
         String variable = "";
@@ -225,18 +256,11 @@ public class GraphDataServlet extends HttpServlet {
       br.close();
       jsonString = "[" + sb.toString().substring(1) + "\n]";
     } catch (IOException e) {
-      response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
-      return;
+      return null;
     } catch (ParseException e) {
-      response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
-      return;
+      return null;
     }
-
-    /* Write JSON file to response. */
-    response.setHeader("Access-Control-Allow-Origin", "*");
-    response.setContentType("application/json");
-    response.setCharacterEncoding("utf-8");
-    response.getWriter().print(jsonString);
+    return jsonString;
   }
 }
 
diff --git a/src/org/torproject/ernie/web/RObject.java b/src/org/torproject/ernie/web/RObject.java
index 4ee5cdc..25dac52 100644
--- a/src/org/torproject/ernie/web/RObject.java
+++ b/src/org/torproject/ernie/web/RObject.java
@@ -5,14 +5,19 @@ package org.torproject.ernie.web;
 public class RObject {
   private byte[] bytes;
   private String fileName;
-  public RObject(byte[] bytes, String fileName) {
+  private long lastModified;
+  public RObject(byte[] bytes, String fileName, long lastModified) {
     this.bytes = bytes;
     this.fileName = fileName;
+    this.lastModified = lastModified;
   }
   public String getFileName() {
-    return fileName;
+    return this.fileName;
   }
   public byte[] getBytes() {
-    return bytes;
+    return this.bytes;
+  }
+  public long getLastModified() {
+    return this.lastModified;
   }
 }
diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java
index 1b56f31..aea9389 100644
--- a/src/org/torproject/ernie/web/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/RObjectGenerator.java
@@ -173,69 +173,27 @@ public class RObjectGenerator implements ServletContextListener {
     String imageFilename = imageFilenameBuilder.toString();
     rQueryBuilder.append("path = '%s')");
     String rQuery = rQueryBuilder.toString();
-    byte[] graphBytes = this.generateGraph(rQuery, imageFilename,
-        checkCache);
-    if (graphBytes != null) {
-      return new RObject(graphBytes, imageFilename);
-    } else {
-      return null;
-    }
-  }
-
-  /* Generate a graph using the given R query that has a placeholder for
-   * the absolute path to the image to be created. */
-  private byte[] generateGraph(String rQuery, String imageFilename,
-      boolean checkCache) {
-
-    /* See if we need to generate this graph. */
     File imageFile = new File(this.cachedGraphsDirectory + "/"
         + imageFilename);
-    byte[] imageBytes = this.generateRObject(rQuery, imageFile,
+    return this.generateRObject(rQuery, imageFile, imageFilename,
         checkCache);
-
-    /* Return the graph bytes. */
-    return imageBytes;
   }
 
   public SortedSet<String> getAvailableCsvFiles() {
     return this.availableCsvFiles;
   }
 
-  public String generateCsv(String requestedCsvFile, boolean checkCache) {
+  public RObject generateCsv(String requestedCsvFile,
+      boolean checkCache) {
     /* Prepare filename and R query string. */
     String rQuery = "export_" + requestedCsvFile.replaceAll("-", "_")
         + "(path = '%s')";
     String csvFilename = requestedCsvFile + ".csv";
-    return this.generateCsv(rQuery, csvFilename, checkCache);
-  }
-
-  /* Generate a comma-separated value file using the given R query that
-   * has a placeholder for the absolute path to the file to be created. */
-  private String generateCsv(String rQuery, String csvFilename,
-      boolean checkCache) {
 
     /* See if we need to generate this .csv file. */
     File csvFile = new File(this.cachedGraphsDirectory + "/"
         + csvFilename);
-    byte[] csvBytes = this.generateRObject(rQuery, csvFile, checkCache);
-
-    /* Read the text file from disk and write it to a string. */
-    String result = null;
-    try {
-      StringBuilder sb = new StringBuilder();
-      BufferedReader br = new BufferedReader(new InputStreamReader(
-          new ByteArrayInputStream(csvBytes)));
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        sb.append(line + "\n");
-      }
-      result = sb.toString();
-    } catch (IOException e) {
-      return null;
-    }
-
-    /* Return the csv file content. */
-    return result;
+    return this.generateRObject(rQuery, csvFile, csvFilename, checkCache);
   }
 
   public List<Map<String, String>> generateTable(String tableName,
@@ -295,7 +253,7 @@ public class RObjectGenerator implements ServletContextListener {
     File tableFile = new File(this.cachedGraphsDirectory + "/"
         + tableFilename);
     byte[] tableBytes = this.generateRObject(rQuery, tableFile,
-        checkCache);
+        tableFilename, checkCache).getBytes();
 
     /* Write the table content to a map. */
     List<Map<String, String>> result = null;
@@ -329,15 +287,15 @@ public class RObjectGenerator implements ServletContextListener {
 
   /* Generate an R object in a separate worker thread, or wait for an
    * already running worker thread to finish and get its result. */
-  private byte[] generateRObject(String rQuery, File rObjectFile,
-      boolean checkCache) {
+  private RObject generateRObject(String rQuery, File rObjectFile,
+      String fileName, boolean checkCache) {
     RObjectGeneratorWorker worker = null;
     synchronized (this.rObjectGeneratorThreads) {
       if (this.rObjectGeneratorThreads.containsKey(rQuery)) {
         worker = this.rObjectGeneratorThreads.get(rQuery);
       } else {
         worker = new RObjectGeneratorWorker(rQuery, rObjectFile,
-            checkCache);
+            fileName, checkCache);
         this.rObjectGeneratorThreads.put(rQuery, worker);
         worker.start();
       }
@@ -352,7 +310,7 @@ public class RObjectGenerator implements ServletContextListener {
         this.rObjectGeneratorThreads.remove(rQuery);
       }
     }
-    return worker.getRObjectBytes();
+    return worker.getRObject();
   }
 
   private Map<String, RObjectGeneratorWorker> rObjectGeneratorThreads =
@@ -362,13 +320,15 @@ public class RObjectGenerator implements ServletContextListener {
 
     private String rQuery;
     private File rObjectFile;
+    private String fileName;
     private boolean checkCache;
-    private byte[] result = null;
+    private RObject result = null;
 
     public RObjectGeneratorWorker(String rQuery, File rObjectFile,
-        boolean checkCache) {
+        String fileName, boolean checkCache) {
       this.rQuery = rQuery;
       this.rObjectFile = rObjectFile;
+      this.fileName = fileName;
       this.checkCache = checkCache;
     }
 
@@ -401,6 +361,7 @@ public class RObjectGenerator implements ServletContextListener {
       }
 
       /* Read the R object from disk and write it to a byte array. */
+      long lastModified = this.rObjectFile.lastModified();
       try {
         BufferedInputStream bis = new BufferedInputStream(
             new FileInputStream(this.rObjectFile), 1024);
@@ -411,14 +372,15 @@ public class RObjectGenerator implements ServletContextListener {
           baos.write(buffer, 0, length);
         }
         bis.close();
-        this.result = baos.toByteArray();
+        this.result = new RObject(baos.toByteArray(), this.fileName,
+            lastModified);
       } catch (IOException e) {
         return;
       }
     }
 
-    public byte[] getRObjectBytes() {
-      return result;
+    public RObject getRObject() {
+      return this.result;
     }
   }
 }



More information about the tor-commits mailing list