[tor-commits] [tor/master] Use openssl's counter mode implementation when we have 1.0.0 or later

nickm at torproject.org nickm at torproject.org
Fri Nov 25 15:41:44 UTC 2011


commit 9814019a54a9a62e27b9e95eec5c9f0e5519fc57
Author: Nick Mathewson <nickm at torproject.org>
Date:   Sun Nov 20 21:43:14 2011 -0500

    Use openssl's counter mode implementation when we have 1.0.0 or later
    
    This shaves about 7% off our per-cell AES crypto time for me; the
    effect for accelerated AES crypto should be even more, since the AES
    calculation itself will make an even smaller portion of the
    counter-mode performance.
    
    (We don't want to do this for pre-1.0.0 OpenSSL, since our AES_CTR
    implementation was actually faster than OpenSSL's there, by about
    10%.)
    
    Fixes issue #4526.
---
 changes/aes_hackery |    8 +++--
 src/common/aes.c    |   88 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/changes/aes_hackery b/changes/aes_hackery
index 82aae23..b22cefe 100644
--- a/changes/aes_hackery
+++ b/changes/aes_hackery
@@ -4,11 +4,13 @@
       relatively few servers should still be on any version of OpenSSL
       that doesn't have good optimized assembly AES.
 
-  o Major features:
+  o Major features (AES performance):
     - Use OpenSSL's EVP interface for AES encryption, so that all
       AES operations can use hardware acceleration (if present).
       Resolves issue #4442.
     - But only use the EVP interface when AES acceleration is enabled,
       to avoid a performance regression.  Resolves issue #4525.
-
-
+    - When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode
+      implementation; it makes AES_CTR about 7% faster than our old one
+      (which was about 10% faster than the one OpenSSL used to provide).
+      Resolves issue #4526.
diff --git a/src/common/aes.c b/src/common/aes.c
index 53c80a3..cec6899 100644
--- a/src/common/aes.c
+++ b/src/common/aes.c
@@ -17,6 +17,11 @@
 #include <openssl/aes.h>
 #include <openssl/evp.h>
 #include <openssl/engine.h>
+#if OPENSSL_VERSION_NUMBER >= 0x10000000L
+/* See comments about which counter mode implementation to use below. */
+#include <openssl/modes.h>
+#define USE_OPENSSL_CTR
+#endif
 #include "compat.h"
 #include "aes.h"
 #include "util.h"
@@ -35,7 +40,13 @@
  * faster than indirecting through the EVP layer.
  */
 
-/* Include OpenSSL headers as needed. */
+/* We have 2 strategies for counter mode: use our own, or use OpenSSL's.
+ *
+ * Here we have a counter mode that's faster than the one shipping with
+ * OpenSSL pre-1.0 (by about 10%!).  But OpenSSL 1.0.0 added a counter mode
+ * implementation faster than the one here (by about 7%).  So we pick which
+ * one to used based on the Openssl version above.
+ */
 
 /*======================================================================*/
 /* Interface to AES code, and counter implementation */
@@ -48,7 +59,7 @@ struct aes_cnt_cipher {
     AES_KEY aes;
   } key;
 
-#if !defined(WORDS_BIGENDIAN)
+#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR)
 #define USING_COUNTER_VARS
   /** These four values, together, implement a 128-bit counter, with
    * counter0 as the low-order word and counter3 as the high-order word. */
@@ -70,7 +81,11 @@ struct aes_cnt_cipher {
   /** The encrypted value of ctr_buf. */
   uint8_t buf[16];
   /** Our current stream position within buf. */
+#ifdef USE_OPENSSL_CTR
+  unsigned int pos;
+#else
   uint8_t pos;
+#endif
 
   /** True iff we're using the evp implementation of this cipher. */
   uint8_t using_evp;
@@ -110,6 +125,7 @@ evaluate_evp_for_aes(int force_val)
   return 0;
 }
 
+#ifndef USE_OPENSSL_CTR
 #if !defined(USING_COUNTER_VARS)
 #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
 #else
@@ -138,6 +154,7 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher)
     AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes);
   }
 }
+#endif
 
 /**
  * Return a newly allocated counter-mode AES128 cipher implementation.
@@ -171,6 +188,7 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
     AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes);
     cipher->using_evp = 0;
   }
+
 #ifdef USING_COUNTER_VARS
   cipher->counter0 = 0;
   cipher->counter1 = 0;
@@ -181,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
   memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
 
   cipher->pos = 0;
+
+#ifdef USE_OPENSSL_CTR
+  memset(cipher->buf, 0, sizeof(cipher->buf));
+#else
   _aes_fill_buf(cipher);
+#endif
 }
 
 /** Release storage held by <b>cipher</b>
@@ -206,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher)
 #define UPDATE_CTR_BUF(c, n)
 #endif
 
+#ifdef USE_OPENSSL_CTR
+/* Helper function to use EVP with openssl's counter-mode wrapper. */
+static void evp_block128_fn(const uint8_t in[16],
+                            uint8_t out[16],
+                            const void *key)
+{
+  EVP_CIPHER_CTX *ctx = (void*)key;
+  int inl=16, outl=16;
+  EVP_EncryptUpdate(ctx, out, &outl, in, inl);
+}
+#endif
+
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in
  * <b>output</b>.  Uses the key in <b>cipher</b>, and advances the counter
  * by <b>len</b> bytes as it encrypts.
@@ -214,20 +249,29 @@ void
 aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
           char *output)
 {
-  /* This function alone is up to 5% of our runtime in some profiles; anything
-   * we could do to make it faster would be great.
-   *
-   * Experimenting suggests that unrolling the inner loop into a switch
-   * statement doesn't help.  What does seem to help is making the input and
-   * output buffers word aligned, and never crypting anything besides an
-   * integer number of words at a time -- it shaves maybe 4-5% of the per-byte
-   * encryption time measured by bench_aes. We can't do that with the current
-   * Tor protocol, though: Tor really likes to crypt things in 509-byte
-   * chunks.
-   *
-   * If we were really ambitous, we'd force len to be a multiple of the block
-   * size, and shave maybe another 4-5% off.
-   */
+#ifdef USE_OPENSSL_CTR
+  if (cipher->using_evp) {
+    /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h.  If
+     * it weren't disabled, it might be better just to use that.
+     */
+    CRYPTO_ctr128_encrypt((const unsigned char *)input,
+                          (unsigned char *)output,
+                          len,
+                          &cipher->key.evp,
+                          cipher->ctr_buf.buf,
+                          cipher->buf,
+                          &cipher->pos,
+                          evp_block128_fn);
+  } else {
+    AES_ctr128_encrypt((const unsigned char *)input,
+                       (unsigned char *)output,
+                       len,
+                       &cipher->key.aes,
+                       cipher->ctr_buf.buf,
+                       cipher->buf,
+                       &cipher->pos);
+  }
+#else
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
 
@@ -250,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
   }
+#endif
 }
 
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
@@ -259,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
 void
 aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
 {
-
-  /* XXXX This function is up to 5% of our runtime in some profiles;
-   * we should look into unrolling some of the loops; taking advantage
-   * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x,
-   * though. */
+#ifdef USE_OPENSSL_CTR
+  aes_crypt(cipher, data, len, data);
+#else
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
 
@@ -286,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
   }
+#endif
 }
 
 /** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value
@@ -302,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv)
   cipher->pos = 0;
   memcpy(cipher->ctr_buf.buf, iv, 16);
 
+#ifndef USE_OPENSSL_CTR
   _aes_fill_buf(cipher);
+#endif
 }
 





More information about the tor-commits mailing list