[tor-commits] [tor/master] Backend for compact node-family representation.

nickm at torproject.org nickm at torproject.org
Mon Nov 19 13:27:56 UTC 2018


commit 83be4d2bbd7a4ed584f42d043558a4026c4a449d
Author: Nick Mathewson <nickm at torproject.org>
Date:   Tue Aug 28 11:34:31 2018 -0400

    Backend for compact node-family representation.
    
    This representation is meant to save memory in microdescriptors --
    we can't use it in routerinfo_t yet, since those families need to be
    encoded losslessly for directory voting to work.
    
    This representation saves memory in three ways:
       1. It uses only one allocation per family.  (The old way used a
          smartlist (2 allocs) plus one strdup per entry.)
       2. It stores identity digests in binary, not hex.
       3. It keeps families in a canonical format, memoizes, and
          reference-counts them.
    
    Part of #27359.
---
 src/core/include.am                  |   3 +
 src/feature/nodelist/nodefamily.c    | 373 +++++++++++++++++++++++++++++++++++
 src/feature/nodelist/nodefamily.h    |  47 +++++
 src/feature/nodelist/nodefamily_st.h |  48 +++++
 src/test/test_nodelist.c             | 251 ++++++++++++++++++++++-
 5 files changed, 721 insertions(+), 1 deletion(-)

diff --git a/src/core/include.am b/src/core/include.am
index 1b8ef2ac5..003ef8421 100644
--- a/src/core/include.am
+++ b/src/core/include.am
@@ -106,6 +106,7 @@ LIBTOR_APP_A_SOURCES = 				\
 	src/feature/nodelist/microdesc.c	\
 	src/feature/nodelist/networkstatus.c	\
 	src/feature/nodelist/nickname.c		\
+	src/feature/nodelist/nodefamily.c	\
 	src/feature/nodelist/nodelist.c		\
 	src/feature/nodelist/node_select.c	\
 	src/feature/nodelist/routerinfo.c	\
@@ -340,6 +341,8 @@ noinst_HEADERS +=					\
 	src/feature/nodelist/networkstatus_voter_info_st.h	\
 	src/feature/nodelist/nickname.h			\
 	src/feature/nodelist/node_st.h			\
+	src/feature/nodelist/nodefamily.h	        \
+	src/feature/nodelist/nodefamily_st.h    	\
 	src/feature/nodelist/nodelist.h			\
 	src/feature/nodelist/node_select.h		\
 	src/feature/nodelist/routerinfo.h		\
diff --git a/src/feature/nodelist/nodefamily.c b/src/feature/nodelist/nodefamily.c
new file mode 100644
index 000000000..6b504c0ac
--- /dev/null
+++ b/src/feature/nodelist/nodefamily.c
@@ -0,0 +1,373 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file nodefamily.c
+ * \brief Code to manipulate encoded, reference-counted node families.  We
+ *  use these tricks to save space, since these families would otherwise
+ *  require a large number of tiny allocations.
+ **/
+
+#include "core/or/or.h"
+#include "feature/nodelist/nickname.h"
+#include "feature/nodelist/nodefamily.h"
+#include "feature/nodelist/nodefamily_st.h"
+#include "feature/nodelist/nodelist.h"
+#include "feature/relay/router.h"
+#include "feature/nodelist/routerlist.h"
+
+#include "ht.h"
+#include "siphash.h"
+
+#include "lib/container/smartlist.h"
+#include "lib/ctime/di_ops.h"
+#include "lib/defs/digest_sizes.h"
+#include "lib/log/util_bug.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Allocate and return a blank node family with space to hold <b>n_members</b>
+ * members.
+ */
+static nodefamily_t *
+nodefamily_alloc(int n_members)
+{
+  size_t alloc_len = offsetof(nodefamily_t, family_members) +
+    NODEFAMILY_ARRAY_SIZE(n_members);
+  nodefamily_t *nf = tor_malloc_zero(alloc_len);
+  nf->n_members = n_members;
+  return nf;
+}
+
+/**
+ * Hashtable hash implementation.
+ */
+static inline unsigned int
+nodefamily_hash(const nodefamily_t *nf)
+{
+  return (unsigned) siphash24g(nf->family_members,
+                               NODEFAMILY_ARRAY_SIZE(nf->n_members));
+}
+
+/**
+ * Hashtable equality implementation.
+ */
+static inline unsigned int
+nodefamily_eq(const nodefamily_t *a, const nodefamily_t *b)
+{
+  return (a->n_members == b->n_members) &&
+    fast_memeq(a->family_members, b->family_members,
+               NODEFAMILY_ARRAY_SIZE(a->n_members));
+}
+
+static HT_HEAD(nodefamily_map, nodefamily_t) the_node_families
+  = HT_INITIALIZER();
+
+HT_PROTOTYPE(nodefamily_map, nodefamily_t, ht_ent, nodefamily_hash,
+             nodefamily_eq)
+HT_GENERATE2(nodefamily_map, nodefamily_t, ht_ent, nodefamily_hash,
+             node_family_eq, 0.6, tor_reallocarray_, tor_free_)
+
+/**
+ * Parse the family declaration in <b>s</b>, returning the canonical
+ * <b>nodefamily_t</b> for its members.  Return NULL on error.
+ *
+ * If <b>rsa_id_self</b> is provided, it is a DIGEST_LEN-byte digest
+ * for the router that declared this family: insert it into the
+ * family declaration if it is not there already.
+ *
+ * If NF_WARN_MALFORMED is set in <b>flags</b>, warn about any
+ * elements that we can't parse.  (By default, we log at info.)
+ *
+ * If NF_REJECT_MALFORMED is set in <b>flags</b>, treat any unparseable
+ * elements as an error. (By default, we simply omit them.)
+ **/
+nodefamily_t *
+nodefamily_parse(const char *s, const uint8_t *rsa_id_self,
+                 unsigned flags)
+{
+  smartlist_t *sl = smartlist_new();
+  smartlist_split_string(sl, s, NULL, SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+  nodefamily_t *result = nodefamily_from_members(sl, rsa_id_self, flags);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_free(sl);
+  return result;
+}
+
+/**
+ * qsort helper for encoded nodefamily elements.
+ **/
+static int
+compare_members(const void *a, const void *b)
+{
+  return fast_memcmp(a, b, NODEFAMILY_MEMBER_LEN);
+}
+
+/**
+ * Parse the member strings in <b>members</b>, returning their canonical
+ * <b>nodefamily_t</b>.  Return NULL on error.
+ *
+ * If <b>rsa_id_self</b> is provided, it is a DIGEST_LEN-byte digest
+ * for the router that declared this family: insert it into the
+ * family declaration if it is not there already.
+ *
+ * The <b>flags</b> element is interpreted as in nodefamily_parse().
+ **/
+nodefamily_t *
+nodefamily_from_members(const smartlist_t *members,
+                        const uint8_t *rsa_id_self,
+                        unsigned flags)
+{
+  const int n_self = rsa_id_self ? 1 : 0;
+  int n_bad_elements = 0;
+  int n_members = smartlist_len(members) + n_self;
+  nodefamily_t *tmp = nodefamily_alloc(n_members);
+  uint8_t *ptr = NODEFAMILY_MEMBER_PTR(tmp, 0);
+
+  SMARTLIST_FOREACH_BEGIN(members, const char *, cp) {
+    bool bad_element = true;
+    if (is_legal_nickname(cp)) {
+      ptr[0] = NODEFAMILY_BY_NICKNAME;
+      tor_assert(strlen(cp) < DIGEST_LEN); // guaranteed by is_legal_nickname
+      memcpy(ptr+1, cp, strlen(cp));
+      bad_element = false;
+    } else if (is_legal_hexdigest(cp)) {
+      char digest_buf[DIGEST_LEN];
+      char nn_buf[MAX_NICKNAME_LEN+1];
+      char nn_char=0;
+      if (hex_digest_nickname_decode(cp, digest_buf, &nn_char, nn_buf)==0) {
+        bad_element = false;
+        ptr[0] = NODEFAMILY_BY_RSA_ID;
+        memcpy(ptr+1, digest_buf, DIGEST_LEN);
+      }
+    }
+
+    if (bad_element) {
+      const int severity = (flags & NF_WARN_MALFORMED) ? LOG_WARN : LOG_INFO;
+      log_fn(severity, LD_GENERAL,
+             "Bad element %s while parsing a node family.",
+             escaped(cp));
+      ++n_bad_elements;
+    } else {
+      ptr += NODEFAMILY_MEMBER_LEN;
+    }
+  } SMARTLIST_FOREACH_END(cp);
+
+  if (n_bad_elements && (flags & NF_REJECT_MALFORMED))
+    goto err;
+
+  if (rsa_id_self) {
+    /* Add self. */
+    ptr[0] = NODEFAMILY_BY_RSA_ID;
+    memcpy(ptr+1, rsa_id_self, DIGEST_LEN);
+  }
+
+  n_members -= n_bad_elements;
+
+  /* Sort tmp into canonical order. */
+  qsort(tmp->family_members, n_members, NODEFAMILY_MEMBER_LEN,
+        compare_members);
+
+  /* Remove duplicates. */
+  int i;
+  for (i = 0; i < n_members-1; ++i) {
+    uint8_t *thisptr = NODEFAMILY_MEMBER_PTR(tmp, i);
+    uint8_t *nextptr = NODEFAMILY_MEMBER_PTR(tmp, i+1);
+    if (fast_memeq(thisptr, nextptr, NODEFAMILY_MEMBER_LEN)) {
+      memmove(thisptr, nextptr, (n_members-i-1)*NODEFAMILY_MEMBER_LEN);
+      --n_members;
+      --i;
+    }
+  }
+  int n_members_alloc = tmp->n_members;
+  tmp->n_members = n_members;
+
+  /* See if we already allocated this family. */
+  nodefamily_t *found = HT_FIND(nodefamily_map, &the_node_families, tmp);
+  if (found) {
+    /* If we did, great: incref it and return it. */
+    ++found->refcnt;
+    tor_free(tmp);
+    return found;
+  } else {
+    /* If not, insert it into the hashtable. */
+    if (n_members_alloc != n_members) {
+      /* Compact the family if needed */
+      nodefamily_t *tmp2 = nodefamily_alloc(n_members);
+      memcpy(tmp2->family_members, tmp->family_members,
+             n_members * NODEFAMILY_MEMBER_LEN);
+      tor_free(tmp);
+      tmp = tmp2;
+    }
+
+    tmp->refcnt = 1;
+    HT_INSERT(nodefamily_map, &the_node_families, tmp);
+    return tmp;
+  }
+
+ err:
+  tor_free(tmp);
+  return NULL;
+}
+
+/**
+ * Drop our reference to <b>family</b>, freeing it if there are no more
+ * references.
+ */
+void
+nodefamily_free_(nodefamily_t *family)
+{
+  if (family == NULL)
+    return;
+
+  --family->refcnt;
+
+  if (family->refcnt == 0) {
+    HT_REMOVE(nodefamily_map, &the_node_families, family);
+    tor_free(family);
+  }
+}
+
+/**
+ * Return true iff <b>family</b> contains the SHA1 RSA1024 identity
+ * <b>rsa_id</b>.
+ */
+bool
+nodefamily_contains_rsa_id(const nodefamily_t *family,
+                           const uint8_t *rsa_id)
+{
+  if (family == NULL)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < family->n_members; ++i) {
+    const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+    if (ptr[0] == NODEFAMILY_BY_RSA_ID &&
+        fast_memeq(ptr+1, rsa_id, DIGEST_LEN)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * Return true iff <b>family</b> contains the nickname <b>name</b>.
+ */
+bool
+nodefamily_contains_nickname(const nodefamily_t *family,
+                             const char *name)
+{
+  if (family == NULL)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < family->n_members; ++i) {
+    const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+    // note that the strcasecmp() is safe because there is always at least one
+    // NUL in the encoded nickname, because all legal nicknames are less than
+    // DIGEST_LEN bytes long.
+    if (ptr[0] == NODEFAMILY_BY_NICKNAME && !strcasecmp((char*)ptr+1, name)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * Return true if <b>family</b> contains the nickname or the RSA ID for
+ * <b>node</b>
+ **/
+bool
+nodefamily_contains_node(const nodefamily_t *family,
+                         const node_t *node)
+{
+  return
+    nodefamily_contains_nickname(family, node_get_nickname(node))
+    ||
+    nodefamily_contains_rsa_id(family, node_get_rsa_id_digest(node));
+}
+
+/**
+ * Look up every entry in <b>family</b>, and add add the corresponding
+ * node_t to <b>out</b>.
+ **/
+void
+nodefamily_add_nodes_to_smartlist(const nodefamily_t *family,
+                                  smartlist_t *out)
+{
+  if (!family)
+    return;
+
+  unsigned i;
+  for (i = 0; i < family->n_members; ++i) {
+    const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+    const node_t *node = NULL;
+    switch (ptr[0]) {
+      case NODEFAMILY_BY_NICKNAME:
+        node = node_get_by_nickname((char*)ptr+1, NNF_NO_WARN_UNNAMED);
+        break;
+      case NODEFAMILY_BY_RSA_ID:
+        node = node_get_by_id((char*)ptr+1);
+        break;
+      default:
+        /* LCOV_EXCL_START */
+        tor_assert_nonfatal_unreached();
+        break;
+        /* LCOV_EXCL_STOP */
+    }
+    if (node)
+      smartlist_add(out, (void *)node);
+  }
+}
+
+/**
+ * Encode <b>family</b> as a space-separated string.
+ */
+char *
+nodefamily_format(const nodefamily_t *family)
+{
+  if (!family)
+    return tor_strdup("");
+
+  unsigned i;
+  smartlist_t *sl = smartlist_new();
+  for (i = 0; i < family->n_members; ++i) {
+    const uint8_t *ptr = NODEFAMILY_MEMBER_PTR(family, i);
+    switch (ptr[0]) {
+      case NODEFAMILY_BY_NICKNAME:
+        smartlist_add_strdup(sl, (char*)ptr+1);
+        break;
+      case NODEFAMILY_BY_RSA_ID: {
+        char buf[HEX_DIGEST_LEN+2];
+        buf[0]='$';
+        base16_encode(buf+1, sizeof(buf)-1, (char*)ptr+1, DIGEST_LEN);
+        smartlist_add_strdup(sl, buf);
+        break;
+      }
+      default:
+        /* LCOV_EXCL_START */
+        tor_assert_nonfatal_unreached();
+        break;
+        /* LCOV_EXCL_STOP */
+    }
+  }
+
+  char *result = smartlist_join_strings(sl, " ", 0, NULL);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_free(sl);
+  return result;
+}
+
+/**
+ * Free all storage held in the nodefamily map.
+ **/
+void
+nodefamily_free_all(void)
+{
+  HT_CLEAR(nodefamily_map, &the_node_families);
+}
diff --git a/src/feature/nodelist/nodefamily.h b/src/feature/nodelist/nodefamily.h
new file mode 100644
index 000000000..342f161a0
--- /dev/null
+++ b/src/feature/nodelist/nodefamily.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file nodefamily.h
+ * \brief Header file for nodefamily.c.
+ **/
+
+#ifndef TOR_NODEFAMILY_H
+#define TOR_NODEFAMILY_H
+
+#include "lib/malloc/malloc.h"
+#include <stdbool.h>
+
+typedef struct nodefamily_t nodefamily_t;
+struct node_t;
+struct smartlist_t;
+
+#define NF_WARN_MALFORMED    (1u<<0)
+#define NF_REJECT_MALFORMED  (1u<<1)
+
+nodefamily_t *nodefamily_parse(const char *s,
+                               const uint8_t *rsa_id_self,
+                               unsigned flags);
+nodefamily_t *nodefamily_from_members(const struct smartlist_t *members,
+                                      const uint8_t *rsa_id_self,
+                                      unsigned flags);
+void nodefamily_free_(nodefamily_t *family);
+#define nodefamily_free(family) \
+  FREE_AND_NULL(nodefamily_t, nodefamily_free_, (family))
+
+bool nodefamily_contains_rsa_id(const nodefamily_t *family,
+                                const uint8_t *rsa_id);
+bool nodefamily_contains_nickname(const nodefamily_t *family,
+                                  const char *name);
+bool nodefamily_contains_node(const nodefamily_t *family,
+                              const struct node_t *node);
+void nodefamily_add_nodes_to_smartlist(const nodefamily_t *family,
+                                       struct smartlist_t *out);
+char *nodefamily_format(const nodefamily_t *family);
+
+void nodefamily_free_all(void);
+
+#endif
diff --git a/src/feature/nodelist/nodefamily_st.h b/src/feature/nodelist/nodefamily_st.h
new file mode 100644
index 000000000..f88ada494
--- /dev/null
+++ b/src/feature/nodelist/nodefamily_st.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_NODEFAMILY_ST_H
+#define TOR_NODEFAMILY_ST_H
+
+#include "orconfig.h"
+#include "ht.h"
+
+struct nodefamily_t {
+  /** Entry for this nodefamily_t within the hashtable. */
+  HT_ENTRY(nodefamily_t) ht_ent;
+  /** Reference count.  (The hashtable is not treated as a reference */
+  uint32_t refcnt;
+  /** Number of items encoded in <b>family_members</b>. */
+  uint32_t n_members;
+  /* A byte-array encoding the members of this family.  We encode each member
+   * as one byte to indicate whether it's a nickname or a fingerprint, plus
+   * DIGEST_LEN bytes of data.  The entries are lexically sorted.
+   */
+  uint8_t family_members[FLEXIBLE_ARRAY_MEMBER];
+};
+
+#define NODEFAMILY_MEMBER_LEN (1+DIGEST_LEN)
+
+/** Tag byte, indicates that the following bytes are a NUL-padded nickname.
+ */
+#define NODEFAMILY_BY_NICKNAME 0
+/** Tag byte, indicates that the following bytes are a RSA1024 SHA1 ID.
+ */
+#define NODEFAMILY_BY_RSA_ID 1
+
+/**
+ * Number of bytes to allocate in the array for a nodefamily_t with N members.
+ **/
+#define NODEFAMILY_ARRAY_SIZE(n) \
+  ((n) * NODEFAMILY_MEMBER_LEN)
+
+/**
+ * Pointer to the i'th member of <b>nf</b>, as encoded.
+ */
+#define NODEFAMILY_MEMBER_PTR(nf, i) \
+  (&((nf)->family_members[(i) * NODEFAMILY_MEMBER_LEN]))
+
+#endif
diff --git a/src/test/test_nodelist.c b/src/test/test_nodelist.c
index 1af6db68e..2dbd949b7 100644
--- a/src/test/test_nodelist.c
+++ b/src/test/test_nodelist.c
@@ -9,12 +9,14 @@
 #include "core/or/or.h"
 #include "lib/crypt_ops/crypto_rand.h"
 #include "feature/nodelist/networkstatus.h"
+#include "feature/nodelist/nodefamily.h"
 #include "feature/nodelist/nodelist.h"
 #include "feature/nodelist/torcert.h"
 
 #include "feature/nodelist/microdesc_st.h"
 #include "feature/nodelist/networkstatus_st.h"
 #include "feature/nodelist/node_st.h"
+#include "feature/nodelist/nodefamily_st.h"
 #include "feature/nodelist/routerinfo_st.h"
 #include "feature/nodelist/routerstatus_st.h"
 
@@ -231,6 +233,251 @@ test_nodelist_ed_id(void *arg)
 #undef N_NODES
 }
 
+static void
+test_nodelist_nodefamily(void *arg)
+{
+  (void)arg;
+  /* hex ID digests */
+  const char h1[] = "5B435D6869206861206C65207363617270652070";
+  const char h2[] = "75C3B220616E6461726520696E206769726F2061";
+  const char h3[] = "2074726F766172206461206D616E67696172652C";
+  const char h4[] = "206D656E747265206E6F6E2076616C65206C2769";
+  const char h5[] = "6E766572736F2E202D2D5072696D6F204C657669";
+
+  /* binary ID digests */
+  uint8_t d1[DIGEST_LEN], d2[DIGEST_LEN], d3[DIGEST_LEN], d4[DIGEST_LEN],
+    d5[DIGEST_LEN];
+  base16_decode((char*)d1, sizeof(d1), h1, strlen(h1));
+  base16_decode((char*)d2, sizeof(d2), h2, strlen(h2));
+  base16_decode((char*)d3, sizeof(d3), h3, strlen(h3));
+  base16_decode((char*)d4, sizeof(d4), h4, strlen(h4));
+  base16_decode((char*)d5, sizeof(d5), h5, strlen(h5));
+
+  char *enc=NULL, *enc2=NULL;
+
+  nodefamily_t *nf1 = NULL;
+  nodefamily_t *nf2 = NULL;
+  nodefamily_t *nf3 = NULL;
+
+  /* Make sure that sorting and de-duplication work. */
+  tor_asprintf(&enc, "$%s hello", h1);
+  nf1 = nodefamily_parse(enc, NULL, 0);
+  tt_assert(nf1);
+  tor_free(enc);
+
+  tor_asprintf(&enc, "hello hello $%s hello", h1);
+  nf2 = nodefamily_parse(enc, NULL, 0);
+  tt_assert(nf2);
+  tt_ptr_op(nf1, OP_EQ, nf2);
+  tor_free(enc);
+
+  tor_asprintf(&enc, "%s $%s hello", h1, h1);
+  nf3 = nodefamily_parse(enc, NULL, 0);
+  tt_assert(nf3);
+  tt_ptr_op(nf1, OP_EQ, nf3);
+  tor_free(enc);
+
+  tt_assert(nodefamily_contains_rsa_id(nf1, d1));
+  tt_assert(! nodefamily_contains_rsa_id(nf1, d2));
+  tt_assert(nodefamily_contains_nickname(nf1, "hello"));
+  tt_assert(nodefamily_contains_nickname(nf1, "HELLO"));
+  tt_assert(! nodefamily_contains_nickname(nf1, "goodbye"));
+
+  tt_int_op(nf1->refcnt, OP_EQ, 3);
+  nodefamily_free(nf3);
+  tt_int_op(nf1->refcnt, OP_EQ, 2);
+
+  /* Try parsing with a provided self RSA digest. */
+  nf3 = nodefamily_parse("hello ", d1, 0);
+  tt_assert(nf3);
+  tt_ptr_op(nf1, OP_EQ, nf3);
+
+  /* Do we get the expected result when we re-encode? */
+  tor_asprintf(&enc, "hello $%s", h1);
+  enc2 = nodefamily_format(nf1);
+  tt_str_op(enc2, OP_EQ, enc);
+  tor_free(enc2);
+  tor_free(enc);
+
+  /* Make sure that we get a different result if we give a different digest. */
+  nodefamily_free(nf3);
+  tor_asprintf(&enc, "hello $%s hello", h3);
+  nf3 = nodefamily_parse(enc, NULL, 0);
+  tt_assert(nf3);
+  tt_ptr_op(nf1, OP_NE, nf3);
+  tor_free(enc);
+
+  tt_assert(nodefamily_contains_rsa_id(nf3, d3));
+  tt_assert(! nodefamily_contains_rsa_id(nf3, d2));
+  tt_assert(! nodefamily_contains_rsa_id(nf3, d1));
+  tt_assert(nodefamily_contains_nickname(nf3, "hello"));
+  tt_assert(! nodefamily_contains_nickname(nf3, "goodbye"));
+
+  nodefamily_free(nf1);
+  nodefamily_free(nf2);
+  nodefamily_free(nf3);
+
+  /* Try one with several digests, all with nicknames appended, in different
+     formats. */
+  tor_asprintf(&enc, "%s $%s $%s=res $%s~ist", h1, h2, h3, h4);
+  nf1 = nodefamily_parse(enc, d5, 0);
+  tt_assert(nf1);
+  tt_assert(nodefamily_contains_rsa_id(nf1, d1));
+  tt_assert(nodefamily_contains_rsa_id(nf1, d2));
+  tt_assert(nodefamily_contains_rsa_id(nf1, d3));
+  tt_assert(nodefamily_contains_rsa_id(nf1, d4));
+  tt_assert(nodefamily_contains_rsa_id(nf1, d5));
+  /* Nicknames aren't preserved when ids are present, since node naming is
+   * deprecated */
+  tt_assert(! nodefamily_contains_nickname(nf3, "res"));
+  tor_free(enc);
+  tor_asprintf(&enc, "$%s $%s $%s $%s $%s", h4, h3, h1, h5, h2);
+  enc2 = nodefamily_format(nf1);
+  tt_str_op(enc, OP_EQ, enc2);
+  tor_free(enc);
+  tor_free(enc2);
+
+  /* Try ones where we parse the empty string. */
+  nf2 = nodefamily_parse("", NULL, 0);
+  nf3 = nodefamily_parse("", d4, 0);
+  tt_assert(nf2);
+  tt_assert(nf3);
+  tt_ptr_op(nf2, OP_NE, nf3);
+
+  tt_assert(! nodefamily_contains_rsa_id(nf2, d4));
+  tt_assert(nodefamily_contains_rsa_id(nf3, d4));
+  tt_assert(! nodefamily_contains_rsa_id(nf2, d5));
+  tt_assert(! nodefamily_contains_rsa_id(nf3, d5));
+  tt_assert(! nodefamily_contains_nickname(nf2, "fred"));
+  tt_assert(! nodefamily_contains_nickname(nf3, "bosco"));
+
+  /* The NULL family should contain nothing. */
+  tt_assert(! nodefamily_contains_rsa_id(NULL, d4));
+  tt_assert(! nodefamily_contains_rsa_id(NULL, d5));
+
+ done:
+  tor_free(enc);
+  tor_free(enc2);
+  nodefamily_free(nf1);
+  nodefamily_free(nf2);
+  nodefamily_free(nf3);
+}
+
+static void
+test_nodelist_nodefamily_parse_err(void *arg)
+{
+  (void)arg;
+  nodefamily_t *nf1 = NULL;
+  char *enc = NULL;
+  const char *semibogus =
+    "sdakljfdslkfjdsaklfjdkl9sdf " // too long for nickname
+    "$jkASDFLkjsadfjhkl " // not hex
+    "$7468696e67732d696e2d7468656d73656c766573 " // ok
+    "reticulatogranulate "// ok
+    "$73656d69616e7468726f706f6c6f676963616c6c79 " // too long for hex
+    "$616273656e746d696e6465646e6573736573" // too short for hex
+    ;
+
+  setup_capture_of_logs(LOG_WARN);
+
+  // We only get two items when we parse this.
+  for (int reject = 0; reject <= 1; ++reject) {
+    for (int log_at_warn = 0; log_at_warn <= 1; ++log_at_warn) {
+      unsigned flags = log_at_warn ? NF_WARN_MALFORMED : 0;
+      flags |= reject ? NF_REJECT_MALFORMED : 0;
+      nf1 = nodefamily_parse(semibogus, NULL, flags);
+      if (reject) {
+        tt_assert(nf1 == NULL);
+      } else {
+        tt_assert(nf1);
+        enc = nodefamily_format(nf1);
+        tt_str_op(enc, OP_EQ,
+                  "reticulatogranulate "
+                  "$7468696E67732D696E2D7468656D73656C766573");
+        tor_free(enc);
+      }
+
+      if (log_at_warn) {
+        expect_log_msg_containing("$616273656e746d696e6465646e6573736573");
+        expect_log_msg_containing("sdakljfdslkfjdsaklfjdkl9sdf");
+      } else {
+        tt_int_op(mock_saved_log_n_entries(), OP_EQ, 0);
+      }
+      mock_clean_saved_logs();
+    }
+  }
+
+ done:
+  tor_free(enc);
+  nodefamily_free(nf1);
+  teardown_capture_of_logs();
+}
+
+static const node_t *
+mock_node_get_by_id(const char *id)
+{
+  if (fast_memeq(id, "!!!!!!!!!!!!!!!!!!!!", DIGEST_LEN))
+    return NULL;
+
+  // use tor_free, not node_free.
+  node_t *fake_node = tor_malloc_zero(sizeof(node_t));
+  memcpy(fake_node->identity, id, DIGEST_LEN);
+  return fake_node;
+}
+
+static const node_t *
+mock_node_get_by_nickname(const char *nn, unsigned flags)
+{
+  (void)flags;
+  if (!strcmp(nn, "nonesuch"))
+    return NULL;
+
+  // use tor_free, not node_free.
+  node_t *fake_node = tor_malloc_zero(sizeof(node_t));
+  strlcpy(fake_node->identity, nn, DIGEST_LEN);
+  return fake_node;
+}
+
+static void
+test_nodelist_nodefamily_lookup(void *arg)
+{
+  (void)arg;
+  MOCK(node_get_by_nickname, mock_node_get_by_nickname);
+  MOCK(node_get_by_id, mock_node_get_by_id);
+  smartlist_t *sl = smartlist_new();
+  nodefamily_t *nf1 = NULL;
+  char *mem_op_hex_tmp = NULL;
+
+  // 'null' is allowed.
+  nodefamily_add_nodes_to_smartlist(NULL, sl);
+  tt_int_op(smartlist_len(sl), OP_EQ, 0);
+
+  // Try a real family
+  nf1 = nodefamily_parse("$EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE "
+                         "$2121212121212121212121212121212121212121 "
+                         "$3333333333333333333333333333333333333333 "
+                         "erewhon nonesuch", NULL, 0);
+  tt_assert(nf1);
+  nodefamily_add_nodes_to_smartlist(nf1, sl);
+  // There were 5 elements; 2 were dropped because the mocked lookup failed.
+  tt_int_op(smartlist_len(sl), OP_EQ, 3);
+
+  const node_t *n = smartlist_get(sl, 0);
+  tt_str_op(n->identity, OP_EQ, "erewhon");
+  n = smartlist_get(sl, 1);
+  test_memeq_hex(n->identity, "3333333333333333333333333333333333333333");
+  n = smartlist_get(sl, 2);
+  test_memeq_hex(n->identity, "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE");
+
+ done:
+  UNMOCK(node_get_by_nickname);
+  UNMOCK(node_get_by_id);
+  SMARTLIST_FOREACH(sl, node_t *, fake_node, tor_free(fake_node));
+  smartlist_free(sl);
+  nodefamily_free(nf1);
+  tor_free(mem_op_hex_tmp);
+}
+
 #define NODE(name, flags) \
   { #name, test_nodelist_##name, (flags), NULL, NULL }
 
@@ -239,6 +486,8 @@ struct testcase_t nodelist_tests[] = {
   NODE(node_get_verbose_nickname_not_named, TT_FORK),
   NODE(node_is_dir, TT_FORK),
   NODE(ed_id, TT_FORK),
+  NODE(nodefamily, TT_FORK),
+  NODE(nodefamily_parse_err, TT_FORK),
+  NODE(nodefamily_lookup, TT_FORK),
   END_OF_TESTCASES
 };
-





More information about the tor-commits mailing list