[tor-bugs] #17799 [Core Tor/Tor]: Hash All PRNG output before use

Mon Apr 25 10:01:31 UTC 2016

#17799: Hash All PRNG output before use
-------------------------------+----------------------------------------
 Reporter:  teor               |          Owner:  nickm
     Type:  defect             |         Status:  needs_review
 Priority:  Medium             |      Milestone:  Tor: 0.2.9.x-final
Component:  Core Tor/Tor       |        Version:  Tor: unspecified
 Severity:  Normal             |     Resolution:
 Keywords:  TorCoreTeam201604  |  Actual Points:
Parent ID:                     |         Points:  small/medium-remaining
 Reviewer:  asn                |        Sponsor:
-------------------------------+----------------------------------------

Comment (by cypherpunks):

 Here are the rest of my comments.  I wrote them inline while reading so
 I'm going to put a diff here.  I hope that's not a problem.  Also, I'm
 embedding it, instead of attaching, so that people can quote, if they want
 to reply. You can comfortably read it in a diff viewer
 (emacs/vim/meld/whatever).

 Remarks:
 * I'm not familiar with Tor's code base, forgive me if I question
 "obvious" things.
 * This is a read-only review.
 * I didn't look too hard into the openssl hookup thingie.
 * Winblows code was ignored.

 {{{
 --- crypto_rng.c        1970-01-01 00:00:00.000000000 +0000
 +++ crypto_rng-review.c 1970-01-01 00:00:00.000000000 +0000
 @@ -60,7 +60,7 @@
  /* Use SHAKE256 */
  #define PRNG_SHAKE_BITS ( 256 )

 -/* Ideal rate to add or remove bytes bytes to avoid needless Keccak-f
 calls */
 +/* Ideal rate to add or remove bytes to avoid needless Keccak-f calls */
  #define PRNG_SHAKE_RATE ( KECCAK_RATE(PRNG_SHAKE_BITS) )

  /* How many Keccak-f calls do we do per buffer fill? Designed to keep the
 @@ -113,7 +113,7 @@ typedef struct shake_prng_t {
    /**
     * How many bytes are left for the user in buf?
     * Invariant: we never return to user code with remaining == 0.
 -   * Invariant: remaining <= sizeof(sh.buf)
 +   * Invariant: remaining <= sizeof(shake_output.buf)
     */
    uint16_t remaining;
    /**
 @@ -140,6 +140,8 @@ typedef struct shake_prng_t {
      uint8_t buf[PRNG_SHAKE_RATE * PRNG_SHAKE_COUNT - PRNG_CARRYFORWARD];
    } shake_output;
  } shake_prng_t;
 +// cpunk: Why is it important for the integer members of shake_prng_t to
 be
 +// fixed-size?

  /**
   * After how many refills of the buffer should we reseed from the OS
 prng?
 @@ -167,10 +169,13 @@ typedef struct shake_prng_t {
   */
  #define PRNG_LIBC_BYTES 64

 -/** This mutex protects the field the_prng, and all members of the_prng.
 */
 -static tor_mutex_t prng_mutex;
 -/** This field is the singleton prng allocated for Tor. */
 +/** This points to the singleton shake_prng_t allocated for Tor. */
 +// -> cpunk: Bitrotted: it's a pointer and to a structure, not field.
  static shake_prng_t *the_prng = NULL;
 +/** This mutex protects all members of the_prng. */
 +// -> cpunk: It clearly isn't being used to protect the pointer itself;
 also,
 +//    moved below the declaration of the_prng, since the comment
 references it.
 +static tor_mutex_t prng_mutex;

  static void shake_prng_reseed(shake_prng_t *prng);
  static void shake_prng_refill(shake_prng_t *prng,
 @@ -194,6 +199,7 @@ static void shake_prng_test_invariants(c
  /**
   * Allocate and return memory for use in a shake PRNG. Set *<b>sz_out</b>
 to
   * the actual number of bytes allocated.
 + * -> cpunk: What sz_out?
   */
  static void *
  new_prng_page(void)
 @@ -235,6 +241,7 @@ free_prng_page(void *page)
    memwipe(page, 0, sz);
  #ifdef HAVE_MLOCK
    munlock(page, sz);
 +  // cpunk: munmap will do this, just saying.
  #endif
    munmap(page, sz);
  }
 @@ -296,6 +303,8 @@ free_prng_page(void *page)
  void
  crypto_init_shake_prng(void)
  {
 +  tor_assert(the_prng == NULL);
 +
    /* Initialize the mutex */
    tor_mutex_init_nonrecursive(&prng_mutex);

 @@ -304,10 +313,10 @@ crypto_init_shake_prng(void)
    /* We're trying to be about one page. */
    tor_assert(sizeof(prng->shake_output) <= 4096);

 -  /* Technically, the C compiler is allowed to pad prng->sh for
 alignment.
 -   * It shouldn't; nobody reasonable would define alignof(char) to
 something
 -   * other than 1.  But let's make sure that the structure is as big as
 we
 -   * want.
 +  /* Technically, the C compiler is allowed to pad prng->shake_output for
 +   * alignment.  It shouldn't; nobody reasonable would define
 alignof(char) to
 +   * something other than 1.  But let's make sure that the structure is
 as big
 +   * as we want.
     */
    tor_assert(sizeof(prng->shake_output) % PRNG_SHAKE_RATE == 0);

 @@ -315,8 +324,17 @@ crypto_init_shake_prng(void)
    shake_prng_reseed(prng);

    shake_prng_test_invariants(prng);
 +  // cpunk: shake_prng_reseed() already checked the invariants before
 +  // returning.  Explicitly stating post- and pre-conditions would help
 in
 +  // reducing redundance (besides being a basic good thing everyone
 should do).

    /* THEN put it in the static variable. */
 +  // -> cpunk: What's so remarkable about THEN setting it?  The functions
 +  //    called above don't look at the_prng; also, this part is obviously
 not
 +  //    thread-safe, so the API must require that this function be called
 and
 +  //    completed _before_ any possible concurrent access, and so it's
 +  //    completely irrelevant at which point in the function you set the
 +  //    variable.
    the_prng = prng;

  #ifdef REPLACE_OPENSSL_RAND
 @@ -340,10 +358,16 @@ shake_prng_reseed(shake_prng_t *prng)
     *
     * (We don't need to worry about threads seeing an unseeded PRNG, since
     * we don't expose it to them till after it's seeded at least once.)
 +   * -> cpunk: I say the reason why threads don't see an unseeded PRNG is
 that
 +   *    _init() has to be called before any other function and before any
 +   *    potential concurrent access, and it will call _reseed(), that's
 it.  In
 +   *    any case, this explanation is probably better in _init() or the
 header
 +   *    file as API documentation.
     */
    if (crypto_strongest_rand_raw(buf, PRNG_OS_BYTES)) {
      log_err(LD_CRYPTO, "Couldn't get os entropy to reseed shake prng.
 Dying.");
      tor_assert(0);
 +    // cpunk: What happens here on "coverage" builds?
    }

    tor_mutex_acquire(&prng_mutex);
 @@ -354,6 +378,11 @@ shake_prng_reseed(shake_prng_t *prng)
    prng->reseeding = 0;
    prng->last_reseeded = time(NULL);
    /* We reseeded from the OS, so now we can forget any old pid we were
 in. */
 +  // -> cpunk: Why?  There's already a dedicated api for doing this:
 +  //    _postfork().  Doing it here prevents _test_invariants() from
 failing
 +  //    (all other invariants holding) if after a fork _reseed() is
 called
 +  //    instead of _postfork(), and that's bad: _postfork() does more
 than
 +  //    _reseed(), for example it also relocks the mmapped region!
  #ifdef HAVE_PID
    prng->pid = getpid();
  #endif
 @@ -364,12 +393,16 @@ shake_prng_reseed(shake_prng_t *prng)
  }

 +// cpunk: Maybe note why this is done: to avoid closing the loop and
 causing
 +// infinite recursion.
  #ifdef REPLACE_OPENSSL_RAND
  #define openssl_RAND_bytes(b,n) RAND_OpenSSL()->bytes((b), (n))
  #define openssl_RAND_add(b,n,e) RAND_OpenSSL()->add((b), (n), (e))
  #else
  #define openssl_RAND_bytes(b,n) RAND_bytes((b),(n))
  #define openssl_RAND_add(b,n,e) do {} while (0)
 +// cpunk: _add() is not used in this case.  Better not to define it, let
 the
 +// compilation fail if it's used when it shouldn't.
  #endif

  /**
 @@ -391,6 +424,7 @@ shake_prng_refill(shake_prng_t *prng, co
  {
    /* Structure for our fixed-length inputs. We leave any unused fields
 set
     * to 0, since that's easier than adding them conditionally. */
 +  // -> cpunk: What do you mean?  You do add one field conditionally.
    struct {
      uint8_t from_ourself[PRNG_CARRYFORWARD];
      uint8_t from_openssl[PRNG_OPENSSL_BYTES];
 @@ -402,6 +436,8 @@ shake_prng_refill(shake_prng_t *prng, co
    const char tweak[] = "shake prng update";

    memset(&input, 0, sizeof(input));
 +  // cpunk: You could do "... input = { {0}, {0}, {0} }" instead,
 especially if
 +  // you're really not going to conditionally add fields.

    ++prng->refill_count;

 @@ -452,7 +488,8 @@ shake_prng_getbytes_raw(shake_prng_t *pr
    /* While we still want any bytes... */
    while (n) {
      /* How many bytes should we extract this time through the loop? */
 -    size_t sz = n > prng->remaining ? prng->remaining : n;
 +    // cpunk: Much clearer:
 +    size_t sz = MIN(n, prng->remaining);

      /* Extract the bytes and clear them from the buffer as we do
       * (for backtracking resistance) */
 @@ -486,6 +523,21 @@ shake_prng_getbytes_raw(shake_prng_t *pr
   */
  static void
  shake_prng_getbytes_large(shake_prng_t *prng, uint8_t *out, size_t n)
 +// cpunk: So, this function has a different behaviour, compared to
 _raw(), in
 +// that it pulls in less external entropy (openssl, libc) for the same
 amount
 +// of bytes.  Is this significant?  Does it change its cryptographic
 profile?
 +// Compare the following two:
 +//
 +//   1)
 +//     crypto_rand(buf, desired_size)
 +//
 +//   2)
 +//     not_large_size = 128
 +//     for (i = 0; i < desired_size/not_large_size; ++i)
 +//       crypto_rand(buf + i*not_large_size, not_large_size)
 +//     crypto_rand(buf + i*not_large_size, desired_size -
 i*not_large_size)
 +//
 +// Where desired_size is large enough to cause refill(s).
  {
    keccak_state shake;
    uint8_t buf[PRNG_CARRYFORWARD];
 @@ -562,6 +614,8 @@ crypto_rand_unmocked(char *to, size_t n)
   * MUST be called before using the PRNG again.  Most likely, Tor will
   * detect that you've messed up and crash.  But if you're unlucky, the
 PRNG
   * output will be (unsecurely!) repeated.
 + * -> cpunk: Oh, I get it, the same state in both processes results in
 the same
 + *    output.  The output is /duplicated/ better than repeated.
   *
   * Callers MUST NOT hold the mutex.
   */
 @@ -571,6 +625,15 @@ crypto_shake_prng_postfork(void)
    tor_mutex_acquire(&prng_mutex);
    shake_prng_t *prng = the_prng;
    /* Prevent anything else touching the PRNG while this is happening. */
 +  // -> cpunk: This is nonsensical.  What's the point of this?  It does
 not
 +  //    prevent anything from touching the_prng as there are plenty of
 unlocked
 +  //    accesses.  But even if you go and fix all of those, what kind of
 API is
 +  //    this?  You can grab the mutex, no problem, but oh, btw, grabbing
 the
 +  //    mutex has no value because you might simply dereference
 +  //    null at any time... Hint: null dereferences can be redeemed for
 ponies.
 +  //    -> I think I see why you did this.  It's because
 shake_prng_reseed()
 +  //       tries to grab the lock itself, right?  Still, this is
 wretchedly
 +  //       broken.
    the_prng = NULL;
    tor_mutex_release(&prng_mutex);

 @@ -602,12 +665,10 @@ crypto_shake_prng_check_reseed(int force
    const time_t now = time(NULL);

    tor_mutex_acquire(&prng_mutex);
 -  int should_reseed = 0;
 -  if (! the_prng->reseeding) {
 -    should_reseed = force ||
 -      (the_prng->refill_count > PRNG_RESEED_AFTER) ||
 -      (the_prng->last_reseeded < now - PRNG_RESEED_AFTER_TIME);
 -  }
 +  // cpunk: This is equivalent and IMO clearer:
 +  int should_reseed = !the_prng->reseeding && (force
 +     || (the_prng->refill_count > PRNG_RESEED_AFTER)
 +     || (the_prng->last_reseeded < now - PRNG_RESEED_AFTER_TIME));
    if (should_reseed) {
      /* We set 'reseeding' here, and check it above, so that we don't
 launch two
       * simultaneous reseeds. */
 @@ -618,10 +679,10 @@ crypto_shake_prng_check_reseed(int force
     */
    tor_mutex_release(&prng_mutex);

 -  if (!should_reseed)
 -    return;
 -
 -  shake_prng_reseed(the_prng);
 +  // cpunk: Single points of exit are nice, especially when they are as
 +  // readable as the alternative:
 +  if (should_reseed)
 +    shake_prng_reseed(the_prng);
  }

  /**
 @@ -630,11 +691,16 @@ crypto_shake_prng_check_reseed(int force
  void
  crypto_teardown_shake_prng(void)
  {
 +  // cpunk: I would tor_assert(the_prng != NULL) here, but maybe you
 would
 +  // if (!the_prng) return;
    tor_mutex_acquire(&prng_mutex);
    shake_prng_t *prng = the_prng;
    the_prng = NULL;
    shake_prng_test_invariants(prng);
    tor_mutex_release(&prng_mutex);
 +  // cpunk: As with the init(), it should be pretty obvious that this
 function
 +  // cannot run concurrently with others in the api, locking or not; so
 maybe
 +  // document that?

    free_prng_page(prng);
  }
 @@ -693,6 +759,7 @@ static void
  ossl_shake_cleanup(void)
  {
    crypto_teardown_shake_prng(); /* ???? */
 +                                // -> cpunk: What is it?
  }

  static void
 @@ -703,8 +770,11 @@ ossl_shake_add(const void *buf, int num,

      /* And feed a little back to openssl. */
      uint8_t b[16];
 -    shake_prng_getbytes(the_prng, b, 16);
 -    openssl_RAND_add(b, 16, entropy > 16 ? 16.0 : entropy);
 +    // cpunk: Using sizeof(b) and MIN() it's clearer, IMO.  (Whatever
 size you
 +    // give b is going to be so small that conversion/comparison to
 double must
 +    // be safe, or we would have much bigger problems.)
 +    shake_prng_getbytes(the_prng, b, sizeof(b));
 +    openssl_RAND_add(b, sizeof(b), MIN(entropy, sizeof(b)));
    } else {
      /* Openssl thinks it's being clever; it just told us what time it is
       * or something like that.  This isn't worth stopping for.
 @@ -734,6 +804,25 @@ static void
  usurp_openssl_rand_method(void)
  {
    RAND_set_rand_method(&ossl_shake_method);
 +  // cpunk: From
 https://www.openssl.org/docs/manmaster/crypto/RAND_set_rand_method.html
 +  //
 +  //   RAND_set_default_method() makes meth the method for PRNG use. NB:
 This
 +  //   is true only whilst no ENGINE has been set as a default for RAND,
 so
 +  //   this function is no longer recommended.
 +  //
 +  // And later (RAND_METHOD is a typedef for rand_meth_st):
 +  //
 +  //   RAND_METHOD implementations are grouped together with other
 algorithmic
 +  //   APIs (eg. RSA_METHOD, EVP_CIPHER, etc) in ENGINE modules.  If a
 default
 +  //   ENGINE is specified for RAND functionality using an ENGINE API
 function,
 +  //   that will override any RAND defaults set using the RAND API (ie.
 +  //   RAND_set_rand_method()). For this reason, the ENGINE API is the
 +  //   recommended way to control default implementations for use in RAND
 and
 +  //   other cryptographic algorithms.
 +  //
 +  // IIUC, openssl seems to be saying this should be implemented as an
 engine
 +  // instead; or maybe it should be compiled with OPENSSL_NO_ENGINE to
 make
 +  // sure?  Just a thought.
  }
  #endif

 }}}

--
Ticket URL: <https://trac.torproject.org/projects/tor/ticket/17799#comment:35>
Tor Bug Tracker & Wiki <https://trac.torproject.org/>
The Tor Project: anonymity online