[tor-commits] [tor/maint-0.3.1] Add an address-set backend using a bloom filter.

nickm at torproject.org nickm at torproject.org
Fri Feb 16 14:56:18 UTC 2018


commit 46bd2aed915f17d520f9ff237262d1510fe25e12
Author: Nick Mathewson <nickm at torproject.org>
Date:   Wed Feb 7 09:49:35 2018 -0500

    Add an address-set backend using a bloom filter.
    
    We're going to need this to make our anti-DoS code (see 24902) more
    robust.
---
 src/common/address.c     |  22 +++++++++
 src/common/address.h     |   2 +
 src/common/address_set.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++
 src/common/address_set.h |  33 +++++++++++++
 src/common/include.am    |   2 +
 5 files changed, 179 insertions(+)

diff --git a/src/common/address.c b/src/common/address.c
index 773e68855..1bd52d24b 100644
--- a/src/common/address.c
+++ b/src/common/address.c
@@ -1200,6 +1200,28 @@ tor_addr_hash(const tor_addr_t *addr)
   }
 }
 
+/** As tor_addr_hash, but use a particular siphash key. */
+uint64_t
+tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr)
+{
+  /* This is duplicate code with tor_addr_hash, since this function needs to
+   * be backportable all the way to 0.2.9. */
+
+  switch (tor_addr_family(addr)) {
+  case AF_INET:
+    return siphash24(&addr->addr.in_addr.s_addr, 4, key);
+  case AF_UNSPEC:
+    return 0x4e4d5342;
+  case AF_INET6:
+    return siphash24(&addr->addr.in6_addr.s6_addr, 16, key);
+  default:
+    /* LCOV_EXCL_START */
+    tor_fragile_assert();
+    return 0;
+    /* LCOV_EXCL_END */
+  }
+}
+
 /** Return a newly allocated string with a representation of <b>addr</b>. */
 char *
 tor_addr_to_str_dup(const tor_addr_t *addr)
diff --git a/src/common/address.h b/src/common/address.h
index 51db42c31..d57abd0d9 100644
--- a/src/common/address.h
+++ b/src/common/address.h
@@ -228,6 +228,8 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
 #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
 
 uint64_t tor_addr_hash(const tor_addr_t *addr);
+struct sipkey;
+uint64_t tor_addr_keyed_hash(const struct sipkey *key, const tor_addr_t *addr);
 int tor_addr_is_v4(const tor_addr_t *addr);
 int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
                           const char *filename, int lineno);
diff --git a/src/common/address_set.c b/src/common/address_set.c
new file mode 100644
index 000000000..df7022174
--- /dev/null
+++ b/src/common/address_set.c
@@ -0,0 +1,120 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file address_set.c
+ * \brief Implementation for a set of addresses.
+ *
+ * This module was first written on a semi-emergency basis to improve the
+ * robustness of the anti-DoS module.  As such, it's written in a pretty
+ * conservative way, and should be susceptible to improvement later on.
+ **/
+
+#include "orconfig.h"
+#include "address_set.h"
+#include "address.h"
+#include "compat.h"
+#include "container.h"
+#include "crypto.h"
+#include "util.h"
+#include "siphash.h"
+
+/** How many 64-bit siphash values to extract per address */
+#define N_HASHES 2
+/** How many bloom-filter bits we set per address. This is twice the N_HASHES
+ * value, since we split the siphash outcome two 32-bit values. */
+#define N_BITS_PER_ITEM (N_HASHES * 2)
+
+/* XXXX This code is largely duplicated with digestset_t.  We should merge
+ * them together into a common bloom-filter implementation.  I'm keeping
+ * them separate for now, though, since this module needs to be backported
+ * all the way to 0.2.9.
+ *
+ * The main difference between digestset_t and this code is that we use
+ * independent siphashes rather than messing around with bit-shifts.  The
+ * approach here is probably more sound, and we should prefer it if&when we
+ * unify the implementations.
+ **/
+
+struct address_set_t {
+  /** siphash keys to make N_HASHES independent hashes for each address. */
+  struct sipkey key[N_HASHES];
+  int mask; /**< One less than the number of bits in <b>ba</b>; always one less
+             * than a power of two. */
+  bitarray_t *ba; /**< A bit array to implement the Bloom filter. */
+};
+
+/**
+ * Allocate and return an address_set, suitable for holding up to
+ * <b>max_address_guess</b> distinct values.
+ */
+address_set_t *
+address_set_new(int max_addresses_guess)
+{
+  /* See digestset_new() for rationale on this equation. */
+  int n_bits = 1u << (tor_log2(max_addresses_guess)+5);
+
+  address_set_t *set = tor_malloc_zero(sizeof(address_set_t));
+  set->mask = n_bits - 1;
+  set->ba = bitarray_init_zero(n_bits);
+  crypto_rand((char*) set->key, sizeof(set->key));
+
+  return set;
+}
+
+/**
+ * Release all storage associated with <b>set</b>
+ */
+void
+address_set_free(address_set_t *set)
+{
+  if (! set)
+    return;
+
+  bitarray_free(set->ba);
+  tor_free(set);
+}
+
+/** Yield the bit index corresponding to 'val' for set. */
+#define BIT(set, val) ((val) & (set)->mask)
+
+/**
+ * Add <b>addr</b> to <b>set</b>.
+ *
+ * All future queries for <b>addr</b> in set will return true. Removing
+ * items is not possible.
+ */
+void
+address_set_add(address_set_t *set, const struct tor_addr_t *addr)
+{
+  int i;
+  for (i = 0; i < N_HASHES; ++i) {
+    uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
+    uint32_t high_bits = (uint32_t)(h >> 32);
+    uint32_t low_bits = (uint32_t)(h);
+    bitarray_set(set->ba, BIT(set, high_bits));
+    bitarray_set(set->ba, BIT(set, low_bits));
+  }
+}
+
+/**
+ * Return true if <b>addr</b> if a member of <b>set</b>.  (And probably,
+ * return false if <b>addr</b> is not a member of set.)
+ */
+int
+address_set_probably_contains(address_set_t *set,
+                              const struct tor_addr_t *addr)
+{
+  int i, matches = 0;
+  for (i = 0; i < N_HASHES; ++i) {
+    uint64_t h = tor_addr_keyed_hash(&set->key[i], addr);
+    uint32_t high_bits = (uint32_t)(h >> 32);
+    uint32_t low_bits = (uint32_t)(h);
+    // Note that !! is necessary here, since bitarray_is_set does not
+    // necessarily return 1 on true.
+    matches += !! bitarray_is_set(set->ba, BIT(set, high_bits));
+    matches += !! bitarray_is_set(set->ba, BIT(set, low_bits));
+  }
+  return matches == N_BITS_PER_ITEM;
+}
+
diff --git a/src/common/address_set.h b/src/common/address_set.h
new file mode 100644
index 000000000..568528c89
--- /dev/null
+++ b/src/common/address_set.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file addressset.h
+ * \brief Types to handle sets of addresses.
+ *
+ * This module was first written on a semi-emergency basis to improve the
+ * robustness of the anti-DoS module.  As such, it's written in a pretty
+ * conservative way, and should be susceptible to improvement later on.
+ **/
+
+#ifndef TOR_ADDRESS_SET_H
+#define TOR_ADDRESS_SET_H
+
+#include "orconfig.h"
+
+/**
+ * An address_set_t represents a set of tor_addr_t values. The implementation
+ * is probabilistic: false negatives cannot occur but false positives are
+ * possible.
+ */
+typedef struct address_set_t address_set_t;
+struct tor_addr_t;
+
+address_set_t *address_set_new(int max_addresses_guess);
+void address_set_free(address_set_t *set);
+void address_set_add(address_set_t *set, const struct tor_addr_t *addr);
+int address_set_probably_contains(address_set_t *set,
+                                  const struct tor_addr_t *addr);
+
+#endif
+
diff --git a/src/common/include.am b/src/common/include.am
index 40c463c9d..cb307e9d5 100644
--- a/src/common/include.am
+++ b/src/common/include.am
@@ -80,6 +80,7 @@ src_common_libor_ctime_testing_a_CFLAGS = @CFLAGS_CONSTTIME@ $(TEST_CFLAGS)
 
 LIBOR_A_SRC = \
   src/common/address.c					\
+  src/common/address_set.c				\
   src/common/backtrace.c				\
   src/common/compat.c					\
   src/common/compat_threads.c				\
@@ -135,6 +136,7 @@ src_common_libor_event_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
 
 COMMONHEADERS = \
   src/common/address.h				\
+  src/common/address_set.h			\
   src/common/backtrace.h			\
   src/common/aes.h				\
   src/common/ciphers.inc			\





More information about the tor-commits mailing list