[tor-commits] [tor/master] Move token parsing code to parsecommon.{c|h}

nickm at torproject.org nickm at torproject.org
Fri Nov 4 18:48:11 UTC 2016


commit e8c12175fe61e4666d093b1ac85cf9bbae281f48
Author: David Goulet <dgoulet at ev0ke.net>
Date:   Fri Mar 11 13:26:04 2016 -0500

    Move token parsing code to parsecommon.{c|h}
    
    Signed-off-by: David Goulet <dgoulet at torproject.org>
    Signed-off-by: George Kadianakis <desnacked at riseup.net>
---
 src/or/include.am    |   2 +
 src/or/parsecommon.c | 453 ++++++++++++++++++++++++++++++++
 src/or/parsecommon.h | 292 +++++++++++++++++++++
 src/or/routerparse.c | 715 +--------------------------------------------------
 4 files changed, 749 insertions(+), 713 deletions(-)

diff --git a/src/or/include.am b/src/or/include.am
index f9199dd..3ae45cb 100644
--- a/src/or/include.am
+++ b/src/or/include.am
@@ -60,6 +60,7 @@ LIBTOR_A_SOURCES = \
 	src/or/shared_random.c			\
 	src/or/shared_random_state.c		\
 	src/or/transports.c				\
+	src/or/parsecommon.c			\
 	src/or/periodic.c				\
 	src/or/policies.c				\
 	src/or/reasons.c				\
@@ -172,6 +173,7 @@ ORHEADERS = \
 	src/or/shared_random.h			\
 	src/or/shared_random_state.h		\
 	src/or/transports.h				\
+	src/or/parsecommon.h			\
 	src/or/periodic.h				\
 	src/or/policies.h				\
 	src/or/reasons.h				\
diff --git a/src/or/parsecommon.c b/src/or/parsecommon.c
new file mode 100644
index 0000000..72e69e8
--- /dev/null
+++ b/src/or/parsecommon.c
@@ -0,0 +1,453 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file parsecommon.c
+ * \brief Common code to parse and validate various type of descriptors.
+ **/
+
+#include "parsecommon.h"
+#include "torlog.h"
+#include "util_format.h"
+
+#define MIN_ANNOTATION A_PURPOSE
+#define MAX_ANNOTATION A_UNKNOWN_
+
+#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
+#define ALLOC(sz) memarea_alloc(area,sz)
+#define STRDUP(str) memarea_strdup(area,str)
+#define STRNDUP(str,n) memarea_strndup(area,(str),(n))
+
+#define RET_ERR(msg)                                               \
+  STMT_BEGIN                                                       \
+    if (tok) token_clear(tok);                                      \
+    tok = ALLOC_ZERO(sizeof(directory_token_t));                   \
+    tok->tp = ERR_;                                                \
+    tok->error = STRDUP(msg);                                      \
+    goto done_tokenizing;                                          \
+  STMT_END
+
+/** Free all resources allocated for <b>tok</b> */
+void
+token_clear(directory_token_t *tok)
+{
+  if (tok->key)
+    crypto_pk_free(tok->key);
+}
+
+/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
+ * them to <b>out</b>.  Parse according to the token rules in <b>table</b>.
+ * Caller must free tokens in <b>out</b>.  If <b>end</b> is NULL, use the
+ * entire string.
+ */
+int
+tokenize_string(memarea_t *area,
+                const char *start, const char *end, smartlist_t *out,
+                token_rule_t *table, int flags)
+{
+  const char **s;
+  directory_token_t *tok = NULL;
+  int counts[NIL_];
+  int i;
+  int first_nonannotation;
+  int prev_len = smartlist_len(out);
+  tor_assert(area);
+
+  s = &start;
+  if (!end) {
+    end = start+strlen(start);
+  } else {
+    /* it's only meaningful to check for nuls if we got an end-of-string ptr */
+    if (memchr(start, '\0', end-start)) {
+      log_warn(LD_DIR, "parse error: internal NUL character.");
+      return -1;
+    }
+  }
+  for (i = 0; i < NIL_; ++i)
+    counts[i] = 0;
+
+  SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
+
+  while (*s < end && (!tok || tok->tp != EOF_)) {
+    tok = get_next_token(area, s, end, table);
+    if (tok->tp == ERR_) {
+      log_warn(LD_DIR, "parse error: %s", tok->error);
+      token_clear(tok);
+      return -1;
+    }
+    ++counts[tok->tp];
+    smartlist_add(out, tok);
+    *s = eat_whitespace_eos(*s, end);
+  }
+
+  if (flags & TS_NOCHECK)
+    return 0;
+
+  if ((flags & TS_ANNOTATIONS_OK)) {
+    first_nonannotation = -1;
+    for (i = 0; i < smartlist_len(out); ++i) {
+      tok = smartlist_get(out, i);
+      if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
+        first_nonannotation = i;
+        break;
+      }
+    }
+    if (first_nonannotation < 0) {
+      log_warn(LD_DIR, "parse error: item contains only annotations");
+      return -1;
+    }
+    for (i=first_nonannotation;  i < smartlist_len(out); ++i) {
+      tok = smartlist_get(out, i);
+      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
+        log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
+        return -1;
+      }
+    }
+    if ((flags & TS_NO_NEW_ANNOTATIONS)) {
+      if (first_nonannotation != prev_len) {
+        log_warn(LD_DIR, "parse error: Unexpected annotations.");
+        return -1;
+      }
+    }
+  } else {
+    for (i=0;  i < smartlist_len(out); ++i) {
+      tok = smartlist_get(out, i);
+      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
+        log_warn(LD_DIR, "parse error: no annotations allowed.");
+        return -1;
+      }
+    }
+    first_nonannotation = 0;
+  }
+  for (i = 0; table[i].t; ++i) {
+    if (counts[table[i].v] < table[i].min_cnt) {
+      log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
+      return -1;
+    }
+    if (counts[table[i].v] > table[i].max_cnt) {
+      log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
+      return -1;
+    }
+    if (table[i].pos & AT_START) {
+      if (smartlist_len(out) < 1 ||
+          (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
+        log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
+        return -1;
+      }
+    }
+    if (table[i].pos & AT_END) {
+      if (smartlist_len(out) < 1 ||
+          (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
+        log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
+        return -1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+/** Helper: parse space-separated arguments from the string <b>s</b> ending at
+ * <b>eol</b>, and store them in the args field of <b>tok</b>.  Store the
+ * number of parsed elements into the n_args field of <b>tok</b>.  Allocate
+ * all storage in <b>area</b>.  Return the number of arguments parsed, or
+ * return -1 if there was an insanely high number of arguments. */
+static inline int
+get_token_arguments(memarea_t *area, directory_token_t *tok,
+                    const char *s, const char *eol)
+{
+/** Largest number of arguments we'll accept to any token, ever. */
+#define MAX_ARGS 512
+  char *mem = memarea_strndup(area, s, eol-s);
+  char *cp = mem;
+  int j = 0;
+  char *args[MAX_ARGS];
+  while (*cp) {
+    if (j == MAX_ARGS)
+      return -1;
+    args[j++] = cp;
+    cp = (char*)find_whitespace(cp);
+    if (!cp || !*cp)
+      break; /* End of the line. */
+    *cp++ = '\0';
+    cp = (char*)eat_whitespace(cp);
+  }
+  tok->n_args = j;
+  tok->args = memarea_memdup(area, args, j*sizeof(char*));
+  return j;
+#undef MAX_ARGS
+}
+
+/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
+ * the object syntax of <b>o_syn</b>.  Allocate all storage in <b>area</b>.
+ * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
+ * conform to the syntax we wanted.
+ **/
+static inline directory_token_t *
+token_check_object(memarea_t *area, const char *kwd,
+                   directory_token_t *tok, obj_syntax o_syn)
+{
+  char ebuf[128];
+  switch (o_syn) {
+    case NO_OBJ:
+      /* No object is allowed for this token. */
+      if (tok->object_body) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
+        RET_ERR(ebuf);
+      }
+      if (tok->key) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
+        RET_ERR(ebuf);
+      }
+      break;
+    case NEED_OBJ:
+      /* There must be a (non-key) object. */
+      if (!tok->object_body) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
+        RET_ERR(ebuf);
+      }
+      break;
+    case NEED_KEY_1024: /* There must be a 1024-bit public key. */
+    case NEED_SKEY_1024: /* There must be a 1024-bit private key. */
+      if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
+                     kwd, crypto_pk_num_bits(tok->key));
+        RET_ERR(ebuf);
+      }
+      /* fall through */
+    case NEED_KEY: /* There must be some kind of key. */
+      if (!tok->key) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
+        RET_ERR(ebuf);
+      }
+      if (o_syn != NEED_SKEY_1024) {
+        if (crypto_pk_key_is_private(tok->key)) {
+          tor_snprintf(ebuf, sizeof(ebuf),
+               "Private key given for %s, which wants a public key", kwd);
+          RET_ERR(ebuf);
+        }
+      } else { /* o_syn == NEED_SKEY_1024 */
+        if (!crypto_pk_key_is_private(tok->key)) {
+          tor_snprintf(ebuf, sizeof(ebuf),
+               "Public key given for %s, which wants a private key", kwd);
+          RET_ERR(ebuf);
+        }
+      }
+      break;
+    case OBJ_OK:
+      /* Anything goes with this token. */
+      break;
+  }
+
+ done_tokenizing:
+  return tok;
+}
+
+
+
+/** Helper function: read the next token from *s, advance *s to the end of the
+ * token, and return the parsed token.  Parse *<b>s</b> according to the list
+ * of tokens in <b>table</b>.
+ */
+directory_token_t *
+get_next_token(memarea_t *area,
+               const char **s, const char *eos, token_rule_t *table)
+{
+  /** Reject any object at least this big; it is probably an overflow, an
+   * attack, a bug, or some other nonsense. */
+#define MAX_UNPARSED_OBJECT_SIZE (128*1024)
+  /** Reject any line at least this big; it is probably an overflow, an
+   * attack, a bug, or some other nonsense. */
+#define MAX_LINE_LENGTH (128*1024)
+
+  const char *next, *eol, *obstart;
+  size_t obname_len;
+  int i;
+  directory_token_t *tok;
+  obj_syntax o_syn = NO_OBJ;
+  char ebuf[128];
+  const char *kwd = "";
+
+  tor_assert(area);
+  tok = ALLOC_ZERO(sizeof(directory_token_t));
+  tok->tp = ERR_;
+
+  /* Set *s to first token, eol to end-of-line, next to after first token */
+  *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
+  tor_assert(eos >= *s);
+  eol = memchr(*s, '\n', eos-*s);
+  if (!eol)
+    eol = eos;
+  if (eol - *s > MAX_LINE_LENGTH) {
+    RET_ERR("Line far too long");
+  }
+
+  next = find_whitespace_eos(*s, eol);
+
+  if (!strcmp_len(*s, "opt", next-*s)) {
+    /* Skip past an "opt" at the start of the line. */
+    *s = eat_whitespace_eos_no_nl(next, eol);
+    next = find_whitespace_eos(*s, eol);
+  } else if (*s == eos) {  /* If no "opt", and end-of-line, line is invalid */
+    RET_ERR("Unexpected EOF");
+  }
+
+  /* Search the table for the appropriate entry.  (I tried a binary search
+   * instead, but it wasn't any faster.) */
+  for (i = 0; table[i].t ; ++i) {
+    if (!strcmp_len(*s, table[i].t, next-*s)) {
+      /* We've found the keyword. */
+      kwd = table[i].t;
+      tok->tp = table[i].v;
+      o_syn = table[i].os;
+      *s = eat_whitespace_eos_no_nl(next, eol);
+      /* We go ahead whether there are arguments or not, so that tok->args is
+       * always set if we want arguments. */
+      if (table[i].concat_args) {
+        /* The keyword takes the line as a single argument */
+        tok->args = ALLOC(sizeof(char*));
+        tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
+        tok->n_args = 1;
+      } else {
+        /* This keyword takes multiple arguments. */
+        if (get_token_arguments(area, tok, *s, eol)<0) {
+          tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
+          RET_ERR(ebuf);
+        }
+        *s = eol;
+      }
+      if (tok->n_args < table[i].min_args) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
+        RET_ERR(ebuf);
+      } else if (tok->n_args > table[i].max_args) {
+        tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
+        RET_ERR(ebuf);
+      }
+      break;
+    }
+  }
+
+  if (tok->tp == ERR_) {
+    /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
+    if (**s == '@')
+      tok->tp = A_UNKNOWN_;
+    else
+      tok->tp = K_OPT;
+    tok->args = ALLOC(sizeof(char*));
+    tok->args[0] = STRNDUP(*s, eol-*s);
+    tok->n_args = 1;
+    o_syn = OBJ_OK;
+  }
+
+  /* Check whether there's an object present */
+  *s = eat_whitespace_eos(eol, eos);  /* Scan from end of first line */
+  tor_assert(eos >= *s);
+  eol = memchr(*s, '\n', eos-*s);
+  if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
+    goto check_object;
+
+  obstart = *s; /* Set obstart to start of object spec */
+  if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
+      strcmp_len(eol-5, "-----", 5) ||           /* nuls or invalid endings */
+      (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) {     /* name too long */
+    RET_ERR("Malformed object: bad begin line");
+  }
+  tok->object_type = STRNDUP(*s+11, eol-*s-16);
+  obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
+  *s = eol+1;    /* Set *s to possible start of object data (could be eos) */
+
+  /* Go to the end of the object */
+  next = tor_memstr(*s, eos-*s, "-----END ");
+  if (!next) {
+    RET_ERR("Malformed object: missing object end line");
+  }
+  tor_assert(eos >= next);
+  eol = memchr(next, '\n', eos-next);
+  if (!eol)  /* end-of-line marker, or eos if there's no '\n' */
+    eol = eos;
+  /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
+  if ((size_t)(eol-next) != 9+obname_len+5 ||
+      strcmp_len(next+9, tok->object_type, obname_len) ||
+      strcmp_len(eol-5, "-----", 5)) {
+    tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
+             tok->object_type);
+    ebuf[sizeof(ebuf)-1] = '\0';
+    RET_ERR(ebuf);
+  }
+  if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
+    RET_ERR("Couldn't parse object: missing footer or object much too big.");
+
+  if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
+    tok->key = crypto_pk_new();
+    if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart))
+      RET_ERR("Couldn't parse public key.");
+  } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */
+    tok->key = crypto_pk_new();
+    if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart))
+      RET_ERR("Couldn't parse private key.");
+  } else { /* If it's something else, try to base64-decode it */
+    int r;
+    tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */
+    r = base64_decode(tok->object_body, next-*s, *s, next-*s);
+    if (r<0)
+      RET_ERR("Malformed object: bad base64-encoded data");
+    tok->object_size = r;
+  }
+  *s = eol;
+
+ check_object:
+  tok = token_check_object(area, kwd, tok, o_syn);
+
+ done_tokenizing:
+  return tok;
+
+#undef RET_ERR
+#undef ALLOC
+#undef ALLOC_ZERO
+#undef STRDUP
+#undef STRNDUP
+}
+
+
+/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
+ * with an assert if no such keyword is found.
+ */
+directory_token_t *
+find_by_keyword_(smartlist_t *s, directory_keyword keyword,
+                 const char *keyword_as_string)
+{
+  directory_token_t *tok = find_opt_by_keyword(s, keyword);
+  if (PREDICT_UNLIKELY(!tok)) {
+    log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
+         "been validated. Internal error.", keyword_as_string, (int)keyword);
+    tor_assert(tok);
+  }
+  return tok;
+}
+
+/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
+ * NULL if no such keyword is found.
+ */
+directory_token_t *
+find_opt_by_keyword(smartlist_t *s, directory_keyword keyword)
+{
+  SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
+  return NULL;
+}
+
+/** If there are any directory_token_t entries in <b>s</b> whose keyword is
+ * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
+ * in the same order in which they occur in <b>s</b>.  Otherwise return
+ * NULL. */
+smartlist_t *
+find_all_by_keyword(smartlist_t *s, directory_keyword k)
+{
+  smartlist_t *out = NULL;
+  SMARTLIST_FOREACH(s, directory_token_t *, t,
+                    if (t->tp == k) {
+                    if (!out)
+                    out = smartlist_new();
+                    smartlist_add(out, t);
+                    });
+  return out;
+}
diff --git a/src/or/parsecommon.h b/src/or/parsecommon.h
new file mode 100644
index 0000000..3bb89ce
--- /dev/null
+++ b/src/or/parsecommon.h
@@ -0,0 +1,292 @@
+/* Copyright (c) 2016, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file parsecommon.h
+ * \brief Header file for parsecommon.c
+ **/
+
+
+#ifndef TOR_PARSECOMMON_H
+#define TOR_PARSECOMMON_H
+
+#include "container.h"
+#include "crypto.h"
+#include "memarea.h"
+
+/** Enumeration of possible token types.  The ones starting with K_ correspond
+* to directory 'keywords'. A_ is for an annotation, R or C is related to
+* hidden services, ERR_ is an error in the tokenizing process, EOF_ is an
+* end-of-file marker, and NIL_ is used to encode not-a-token.
+*/
+typedef enum {
+  K_ACCEPT = 0,
+  K_ACCEPT6,
+  K_DIRECTORY_SIGNATURE,
+  K_RECOMMENDED_SOFTWARE,
+  K_REJECT,
+  K_REJECT6,
+  K_ROUTER,
+  K_SIGNED_DIRECTORY,
+  K_SIGNING_KEY,
+  K_ONION_KEY,
+  K_ONION_KEY_NTOR,
+  K_ROUTER_SIGNATURE,
+  K_PUBLISHED,
+  K_RUNNING_ROUTERS,
+  K_ROUTER_STATUS,
+  K_PLATFORM,
+  K_OPT,
+  K_BANDWIDTH,
+  K_CONTACT,
+  K_NETWORK_STATUS,
+  K_UPTIME,
+  K_DIR_SIGNING_KEY,
+  K_FAMILY,
+  K_FINGERPRINT,
+  K_HIBERNATING,
+  K_READ_HISTORY,
+  K_WRITE_HISTORY,
+  K_NETWORK_STATUS_VERSION,
+  K_DIR_SOURCE,
+  K_DIR_OPTIONS,
+  K_CLIENT_VERSIONS,
+  K_SERVER_VERSIONS,
+  K_OR_ADDRESS,
+  K_ID,
+  K_P,
+  K_P6,
+  K_R,
+  K_A,
+  K_S,
+  K_V,
+  K_W,
+  K_M,
+  K_EXTRA_INFO,
+  K_EXTRA_INFO_DIGEST,
+  K_CACHES_EXTRA_INFO,
+  K_HIDDEN_SERVICE_DIR,
+  K_ALLOW_SINGLE_HOP_EXITS,
+  K_IPV6_POLICY,
+  K_ROUTER_SIG_ED25519,
+  K_IDENTITY_ED25519,
+  K_MASTER_KEY_ED25519,
+  K_ONION_KEY_CROSSCERT,
+  K_NTOR_ONION_KEY_CROSSCERT,
+
+  K_DIRREQ_END,
+  K_DIRREQ_V2_IPS,
+  K_DIRREQ_V3_IPS,
+  K_DIRREQ_V2_REQS,
+  K_DIRREQ_V3_REQS,
+  K_DIRREQ_V2_SHARE,
+  K_DIRREQ_V3_SHARE,
+  K_DIRREQ_V2_RESP,
+  K_DIRREQ_V3_RESP,
+  K_DIRREQ_V2_DIR,
+  K_DIRREQ_V3_DIR,
+  K_DIRREQ_V2_TUN,
+  K_DIRREQ_V3_TUN,
+  K_ENTRY_END,
+  K_ENTRY_IPS,
+  K_CELL_END,
+  K_CELL_PROCESSED,
+  K_CELL_QUEUED,
+  K_CELL_TIME,
+  K_CELL_CIRCS,
+  K_EXIT_END,
+  K_EXIT_WRITTEN,
+  K_EXIT_READ,
+  K_EXIT_OPENED,
+
+  K_DIR_KEY_CERTIFICATE_VERSION,
+  K_DIR_IDENTITY_KEY,
+  K_DIR_KEY_PUBLISHED,
+  K_DIR_KEY_EXPIRES,
+  K_DIR_KEY_CERTIFICATION,
+  K_DIR_KEY_CROSSCERT,
+  K_DIR_ADDRESS,
+  K_DIR_TUNNELLED,
+
+  K_VOTE_STATUS,
+  K_VALID_AFTER,
+  K_FRESH_UNTIL,
+  K_VALID_UNTIL,
+  K_VOTING_DELAY,
+
+  K_KNOWN_FLAGS,
+  K_PARAMS,
+  K_BW_WEIGHTS,
+  K_VOTE_DIGEST,
+  K_CONSENSUS_DIGEST,
+  K_ADDITIONAL_DIGEST,
+  K_ADDITIONAL_SIGNATURE,
+  K_CONSENSUS_METHODS,
+  K_CONSENSUS_METHOD,
+  K_LEGACY_DIR_KEY,
+  K_DIRECTORY_FOOTER,
+  K_SIGNING_CERT_ED,
+  K_SR_FLAG,
+  K_COMMIT,
+  K_PREVIOUS_SRV,
+  K_CURRENT_SRV,
+  K_PACKAGE,
+
+  A_PURPOSE,
+  A_LAST_LISTED,
+  A_UNKNOWN_,
+
+  R_RENDEZVOUS_SERVICE_DESCRIPTOR,
+  R_VERSION,
+  R_PERMANENT_KEY,
+  R_SECRET_ID_PART,
+  R_PUBLICATION_TIME,
+  R_PROTOCOL_VERSIONS,
+  R_INTRODUCTION_POINTS,
+  R_SIGNATURE,
+
+  R_IPO_IDENTIFIER,
+  R_IPO_IP_ADDRESS,
+  R_IPO_ONION_PORT,
+  R_IPO_ONION_KEY,
+  R_IPO_SERVICE_KEY,
+
+  C_CLIENT_NAME,
+  C_DESCRIPTOR_COOKIE,
+  C_CLIENT_KEY,
+
+  ERR_,
+  EOF_,
+  NIL_
+} directory_keyword;
+
+/** Structure to hold a single directory token.
+ *
+ * We parse a directory by breaking it into "tokens", each consisting
+ * of a keyword, a line full of arguments, and a binary object.  The
+ * arguments and object are both optional, depending on the keyword
+ * type.
+ *
+ * This structure is only allocated in memareas; do not allocate it on
+ * the heap, or token_clear() won't work.
+ */
+typedef struct directory_token_t {
+  directory_keyword tp;        /**< Type of the token. */
+  int n_args:30;               /**< Number of elements in args */
+  char **args;                 /**< Array of arguments from keyword line. */
+
+  char *object_type;           /**< -----BEGIN [object_type]-----*/
+  size_t object_size;          /**< Bytes in object_body */
+  char *object_body;           /**< Contents of object, base64-decoded. */
+
+  crypto_pk_t *key;        /**< For public keys only.  Heap-allocated. */
+
+  char *error;                 /**< For ERR_ tokens only. */
+} directory_token_t;
+
+/** We use a table of rules to decide how to parse each token type. */
+
+/** Rules for whether the keyword needs an object. */
+typedef enum {
+  NO_OBJ,        /**< No object, ever. */
+  NEED_OBJ,      /**< Object is required. */
+  NEED_SKEY_1024,/**< Object is required, and must be a 1024 bit private key */
+  NEED_KEY_1024, /**< Object is required, and must be a 1024 bit public key */
+  NEED_KEY,      /**< Object is required, and must be a public key. */
+  OBJ_OK,        /**< Object is optional. */
+} obj_syntax;
+
+#define AT_START 1
+#define AT_END 2
+
+#define TS_ANNOTATIONS_OK 1
+#define TS_NOCHECK 2
+#define TS_NO_NEW_ANNOTATIONS 4
+
+/*
+ * Helper macros to define token tables.  's' is a string, 't' is a
+ * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an
+ * object syntax.
+ *
+ */
+
+/** Appears to indicate the end of a table. */
+#define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 }
+/** An item with no restrictions: used for obsolete document types */
+#define T(s,t,a,o)    { s, t, a, o, 0, INT_MAX, 0, 0 }
+/** An item with no restrictions on multiplicity or location. */
+#define T0N(s,t,a,o)  { s, t, a, o, 0, INT_MAX, 0, 0 }
+/** An item that must appear exactly once */
+#define T1(s,t,a,o)   { s, t, a, o, 1, 1, 0, 0 }
+/** An item that must appear exactly once, at the start of the document */
+#define T1_START(s,t,a,o)   { s, t, a, o, 1, 1, AT_START, 0 }
+/** An item that must appear exactly once, at the end of the document */
+#define T1_END(s,t,a,o)   { s, t, a, o, 1, 1, AT_END, 0 }
+/** An item that must appear one or more times */
+#define T1N(s,t,a,o)  { s, t, a, o, 1, INT_MAX, 0, 0 }
+/** An item that must appear no more than once */
+#define T01(s,t,a,o)  { s, t, a, o, 0, 1, 0, 0 }
+/** An annotation that must appear no more than once */
+#define A01(s,t,a,o)  { s, t, a, o, 0, 1, 0, 1 }
+
+/* Argument multiplicity: any number of arguments. */
+#define ARGS        0,INT_MAX,0
+/* Argument multiplicity: no arguments. */
+#define NO_ARGS     0,0,0
+/* Argument multiplicity: concatenate all arguments. */
+#define CONCAT_ARGS 1,1,1
+/* Argument multiplicity: at least <b>n</b> arguments. */
+#define GE(n)       n,INT_MAX,0
+/* Argument multiplicity: exactly <b>n</b> arguments. */
+#define EQ(n)       n,n,0
+
+/** Determines the parsing rules for a single token type. */
+typedef struct token_rule_t {
+  /** The string value of the keyword identifying the type of item. */
+  const char *t;
+  /** The corresponding directory_keyword enum. */
+  directory_keyword v;
+  /** Minimum number of arguments for this item */
+  int min_args;
+  /** Maximum number of arguments for this item */
+  int max_args;
+  /** If true, we concatenate all arguments for this item into a single
+   * string. */
+  int concat_args;
+  /** Requirements on object syntax for this item. */
+  obj_syntax os;
+  /** Lowest number of times this item may appear in a document. */
+  int min_cnt;
+  /** Highest number of times this item may appear in a document. */
+  int max_cnt;
+  /** One or more of AT_START/AT_END to limit where the item may appear in a
+   * document. */
+  int pos;
+  /** True iff this token is an annotation. */
+  int is_annotation;
+} token_rule_t;
+
+void token_clear(directory_token_t *tok);
+
+int tokenize_string(memarea_t *area,
+                    const char *start, const char *end,
+                    smartlist_t *out,
+                    token_rule_t *table,
+                    int flags);
+directory_token_t *get_next_token(memarea_t *area,
+                                  const char **s,
+                                  const char *eos,
+                                  token_rule_t *table);
+
+directory_token_t *find_by_keyword_(smartlist_t *s,
+                                    directory_keyword keyword,
+                                    const char *keyword_str);
+
+#define find_by_keyword(s, keyword) \
+  find_by_keyword_((s), (keyword), #keyword)
+
+directory_token_t *find_opt_by_keyword(smartlist_t *s,
+                                       directory_keyword keyword);
+smartlist_t * find_all_by_keyword(smartlist_t *s, directory_keyword k);
+
+#endif /* TOR_PARSECOMMON_H */
diff --git a/src/or/routerparse.c b/src/or/routerparse.c
index 03f8f4e..ef6273b 100644
--- a/src/or/routerparse.c
+++ b/src/or/routerparse.c
@@ -16,6 +16,7 @@
 #include "circuitstats.h"
 #include "dirserv.h"
 #include "dirvote.h"
+#include "parsecommon.h"
 #include "policies.h"
 #include "rendcommon.h"
 #include "router.h"
@@ -36,259 +37,6 @@
 
 /****************************************************************************/
 
-/** Enumeration of possible token types.  The ones starting with K_ correspond
- * to directory 'keywords'. A_ is for an annotation, R or C is related to
- * hidden services, ERR_ is an error in the tokenizing process, EOF_ is an
- * end-of-file marker, and NIL_ is used to encode not-a-token.
- */
-typedef enum {
-  K_ACCEPT = 0,
-  K_ACCEPT6,
-  K_DIRECTORY_SIGNATURE,
-  K_RECOMMENDED_SOFTWARE,
-  K_REJECT,
-  K_REJECT6,
-  K_ROUTER,
-  K_SIGNED_DIRECTORY,
-  K_SIGNING_KEY,
-  K_ONION_KEY,
-  K_ONION_KEY_NTOR,
-  K_ROUTER_SIGNATURE,
-  K_PUBLISHED,
-  K_RUNNING_ROUTERS,
-  K_ROUTER_STATUS,
-  K_PLATFORM,
-  K_OPT,
-  K_BANDWIDTH,
-  K_CONTACT,
-  K_NETWORK_STATUS,
-  K_UPTIME,
-  K_DIR_SIGNING_KEY,
-  K_FAMILY,
-  K_FINGERPRINT,
-  K_HIBERNATING,
-  K_READ_HISTORY,
-  K_WRITE_HISTORY,
-  K_NETWORK_STATUS_VERSION,
-  K_DIR_SOURCE,
-  K_DIR_OPTIONS,
-  K_CLIENT_VERSIONS,
-  K_SERVER_VERSIONS,
-  K_OR_ADDRESS,
-  K_ID,
-  K_P,
-  K_P6,
-  K_R,
-  K_A,
-  K_S,
-  K_V,
-  K_W,
-  K_M,
-  K_EXTRA_INFO,
-  K_EXTRA_INFO_DIGEST,
-  K_CACHES_EXTRA_INFO,
-  K_HIDDEN_SERVICE_DIR,
-  K_ALLOW_SINGLE_HOP_EXITS,
-  K_IPV6_POLICY,
-  K_ROUTER_SIG_ED25519,
-  K_IDENTITY_ED25519,
-  K_MASTER_KEY_ED25519,
-  K_ONION_KEY_CROSSCERT,
-  K_NTOR_ONION_KEY_CROSSCERT,
-
-  K_DIRREQ_END,
-  K_DIRREQ_V2_IPS,
-  K_DIRREQ_V3_IPS,
-  K_DIRREQ_V2_REQS,
-  K_DIRREQ_V3_REQS,
-  K_DIRREQ_V2_SHARE,
-  K_DIRREQ_V3_SHARE,
-  K_DIRREQ_V2_RESP,
-  K_DIRREQ_V3_RESP,
-  K_DIRREQ_V2_DIR,
-  K_DIRREQ_V3_DIR,
-  K_DIRREQ_V2_TUN,
-  K_DIRREQ_V3_TUN,
-  K_ENTRY_END,
-  K_ENTRY_IPS,
-  K_CELL_END,
-  K_CELL_PROCESSED,
-  K_CELL_QUEUED,
-  K_CELL_TIME,
-  K_CELL_CIRCS,
-  K_EXIT_END,
-  K_EXIT_WRITTEN,
-  K_EXIT_READ,
-  K_EXIT_OPENED,
-
-  K_DIR_KEY_CERTIFICATE_VERSION,
-  K_DIR_IDENTITY_KEY,
-  K_DIR_KEY_PUBLISHED,
-  K_DIR_KEY_EXPIRES,
-  K_DIR_KEY_CERTIFICATION,
-  K_DIR_KEY_CROSSCERT,
-  K_DIR_ADDRESS,
-  K_DIR_TUNNELLED,
-
-  K_VOTE_STATUS,
-  K_VALID_AFTER,
-  K_FRESH_UNTIL,
-  K_VALID_UNTIL,
-  K_VOTING_DELAY,
-
-  K_KNOWN_FLAGS,
-  K_PARAMS,
-  K_BW_WEIGHTS,
-  K_VOTE_DIGEST,
-  K_CONSENSUS_DIGEST,
-  K_ADDITIONAL_DIGEST,
-  K_ADDITIONAL_SIGNATURE,
-  K_CONSENSUS_METHODS,
-  K_CONSENSUS_METHOD,
-  K_LEGACY_DIR_KEY,
-  K_DIRECTORY_FOOTER,
-  K_SIGNING_CERT_ED,
-  K_SR_FLAG,
-  K_COMMIT,
-  K_PREVIOUS_SRV,
-  K_CURRENT_SRV,
-  K_PACKAGE,
-
-  A_PURPOSE,
-  A_LAST_LISTED,
-  A_UNKNOWN_,
-
-  R_RENDEZVOUS_SERVICE_DESCRIPTOR,
-  R_VERSION,
-  R_PERMANENT_KEY,
-  R_SECRET_ID_PART,
-  R_PUBLICATION_TIME,
-  R_PROTOCOL_VERSIONS,
-  R_INTRODUCTION_POINTS,
-  R_SIGNATURE,
-
-  R_IPO_IDENTIFIER,
-  R_IPO_IP_ADDRESS,
-  R_IPO_ONION_PORT,
-  R_IPO_ONION_KEY,
-  R_IPO_SERVICE_KEY,
-
-  C_CLIENT_NAME,
-  C_DESCRIPTOR_COOKIE,
-  C_CLIENT_KEY,
-
-  ERR_,
-  EOF_,
-  NIL_
-} directory_keyword;
-
-#define MIN_ANNOTATION A_PURPOSE
-#define MAX_ANNOTATION A_UNKNOWN_
-
-/** Structure to hold a single directory token.
- *
- * We parse a directory by breaking it into "tokens", each consisting
- * of a keyword, a line full of arguments, and a binary object.  The
- * arguments and object are both optional, depending on the keyword
- * type.
- *
- * This structure is only allocated in memareas; do not allocate it on
- * the heap, or token_clear() won't work.
- */
-typedef struct directory_token_t {
-  directory_keyword tp;        /**< Type of the token. */
-  int n_args:30;               /**< Number of elements in args */
-  char **args;                 /**< Array of arguments from keyword line. */
-
-  char *object_type;           /**< -----BEGIN [object_type]-----*/
-  size_t object_size;          /**< Bytes in object_body */
-  char *object_body;           /**< Contents of object, base64-decoded. */
-
-  crypto_pk_t *key;        /**< For public keys only.  Heap-allocated. */
-
-  char *error;                 /**< For ERR_ tokens only. */
-} directory_token_t;
-
-/* ********************************************************************** */
-
-/** We use a table of rules to decide how to parse each token type. */
-
-/** Rules for whether the keyword needs an object. */
-typedef enum {
-  NO_OBJ,        /**< No object, ever. */
-  NEED_OBJ,      /**< Object is required. */
-  NEED_SKEY_1024,/**< Object is required, and must be a 1024 bit private key */
-  NEED_KEY_1024, /**< Object is required, and must be a 1024 bit public key */
-  NEED_KEY,      /**< Object is required, and must be a public key. */
-  OBJ_OK,        /**< Object is optional. */
-} obj_syntax;
-
-#define AT_START 1
-#define AT_END 2
-
-/** Determines the parsing rules for a single token type. */
-typedef struct token_rule_t {
-  /** The string value of the keyword identifying the type of item. */
-  const char *t;
-  /** The corresponding directory_keyword enum. */
-  directory_keyword v;
-  /** Minimum number of arguments for this item */
-  int min_args;
-  /** Maximum number of arguments for this item */
-  int max_args;
-  /** If true, we concatenate all arguments for this item into a single
-   * string. */
-  int concat_args;
-  /** Requirements on object syntax for this item. */
-  obj_syntax os;
-  /** Lowest number of times this item may appear in a document. */
-  int min_cnt;
-  /** Highest number of times this item may appear in a document. */
-  int max_cnt;
-  /** One or more of AT_START/AT_END to limit where the item may appear in a
-   * document. */
-  int pos;
-  /** True iff this token is an annotation. */
-  int is_annotation;
-} token_rule_t;
-
-/*
- * Helper macros to define token tables.  's' is a string, 't' is a
- * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an
- * object syntax.
- *
- */
-
-/** Appears to indicate the end of a table. */
-#define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 }
-/** An item with no restrictions: used for obsolete document types */
-#define T(s,t,a,o)    { s, t, a, o, 0, INT_MAX, 0, 0 }
-/** An item with no restrictions on multiplicity or location. */
-#define T0N(s,t,a,o)  { s, t, a, o, 0, INT_MAX, 0, 0 }
-/** An item that must appear exactly once */
-#define T1(s,t,a,o)   { s, t, a, o, 1, 1, 0, 0 }
-/** An item that must appear exactly once, at the start of the document */
-#define T1_START(s,t,a,o)   { s, t, a, o, 1, 1, AT_START, 0 }
-/** An item that must appear exactly once, at the end of the document */
-#define T1_END(s,t,a,o)   { s, t, a, o, 1, 1, AT_END, 0 }
-/** An item that must appear one or more times */
-#define T1N(s,t,a,o)  { s, t, a, o, 1, INT_MAX, 0, 0 }
-/** An item that must appear no more than once */
-#define T01(s,t,a,o)  { s, t, a, o, 0, 1, 0, 0 }
-/** An annotation that must appear no more than once */
-#define A01(s,t,a,o)  { s, t, a, o, 0, 1, 0, 1 }
-
-/* Argument multiplicity: any number of arguments. */
-#define ARGS        0,INT_MAX,0
-/* Argument multiplicity: no arguments. */
-#define NO_ARGS     0,0,0
-/* Argument multiplicity: concatenate all arguments. */
-#define CONCAT_ARGS 1,1,1
-/* Argument multiplicity: at least <b>n</b> arguments. */
-#define GE(n)       n,INT_MAX,0
-/* Argument multiplicity: exactly <b>n</b> arguments. */
-#define EQ(n)       n,n,0
-
 /** List of tokens recognized in router descriptors */
 static token_rule_t routerdesc_token_table[] = {
   T0N("reject",              K_REJECT,              ARGS,    NO_OBJ ),
@@ -556,28 +304,8 @@ static int router_get_hashes_impl(const char *s, size_t s_len,
                                   common_digests_t *digests,
                                   const char *start_str, const char *end_str,
                                   char end_char);
-static void token_clear(directory_token_t *tok);
-static smartlist_t *find_all_by_keyword(smartlist_t *s, directory_keyword k);
 static smartlist_t *find_all_exitpolicy(smartlist_t *s);
-static directory_token_t *find_by_keyword_(smartlist_t *s,
-                                           directory_keyword keyword,
-                                           const char *keyword_str);
-#define find_by_keyword(s, keyword) find_by_keyword_((s), (keyword), #keyword)
-static directory_token_t *find_opt_by_keyword(smartlist_t *s,
-                                              directory_keyword keyword);
-
-#define TS_ANNOTATIONS_OK 1
-#define TS_NOCHECK 2
-#define TS_NO_NEW_ANNOTATIONS 4
-static int tokenize_string(memarea_t *area,
-                           const char *start, const char *end,
-                           smartlist_t *out,
-                           token_rule_t *table,
-                           int flags);
-static directory_token_t *get_next_token(memarea_t *area,
-                                         const char **s,
-                                         const char *eos,
-                                         token_rule_t *table);
+
 #define CST_CHECK_AUTHORITY   (1<<0)
 #define CST_NO_CHECK_OBJTYPE  (1<<1)
 static int check_signature_token(const char *digest,
@@ -4636,445 +4364,6 @@ assert_addr_policy_ok(smartlist_t *lst)
   });
 }
 
-/*
- * Low-level tokenizer for router descriptors and directories.
- */
-
-/** Free all resources allocated for <b>tok</b> */
-static void
-token_clear(directory_token_t *tok)
-{
-  if (tok->key)
-    crypto_pk_free(tok->key);
-}
-
-#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
-#define ALLOC(sz) memarea_alloc(area,sz)
-#define STRDUP(str) memarea_strdup(area,str)
-#define STRNDUP(str,n) memarea_strndup(area,(str),(n))
-
-#define RET_ERR(msg)                                               \
-  STMT_BEGIN                                                       \
-    if (tok) token_clear(tok);                                      \
-    tok = ALLOC_ZERO(sizeof(directory_token_t));                   \
-    tok->tp = ERR_;                                                \
-    tok->error = STRDUP(msg);                                      \
-    goto done_tokenizing;                                          \
-  STMT_END
-
-/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
- * the object syntax of <b>o_syn</b>.  Allocate all storage in <b>area</b>.
- * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
- * conform to the syntax we wanted.
- **/
-static inline directory_token_t *
-token_check_object(memarea_t *area, const char *kwd,
-                   directory_token_t *tok, obj_syntax o_syn)
-{
-  char ebuf[128];
-  switch (o_syn) {
-    case NO_OBJ:
-      /* No object is allowed for this token. */
-      if (tok->object_body) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
-        RET_ERR(ebuf);
-      }
-      if (tok->key) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
-        RET_ERR(ebuf);
-      }
-      break;
-    case NEED_OBJ:
-      /* There must be a (non-key) object. */
-      if (!tok->object_body) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
-        RET_ERR(ebuf);
-      }
-      break;
-    case NEED_KEY_1024: /* There must be a 1024-bit public key. */
-    case NEED_SKEY_1024: /* There must be a 1024-bit private key. */
-      if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
-                     kwd, crypto_pk_num_bits(tok->key));
-        RET_ERR(ebuf);
-      }
-      /* fall through */
-    case NEED_KEY: /* There must be some kind of key. */
-      if (!tok->key) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
-        RET_ERR(ebuf);
-      }
-      if (o_syn != NEED_SKEY_1024) {
-        if (crypto_pk_key_is_private(tok->key)) {
-          tor_snprintf(ebuf, sizeof(ebuf),
-               "Private key given for %s, which wants a public key", kwd);
-          RET_ERR(ebuf);
-        }
-      } else { /* o_syn == NEED_SKEY_1024 */
-        if (!crypto_pk_key_is_private(tok->key)) {
-          tor_snprintf(ebuf, sizeof(ebuf),
-               "Public key given for %s, which wants a private key", kwd);
-          RET_ERR(ebuf);
-        }
-      }
-      break;
-    case OBJ_OK:
-      /* Anything goes with this token. */
-      break;
-  }
-
- done_tokenizing:
-  return tok;
-}
-
-/** Helper: parse space-separated arguments from the string <b>s</b> ending at
- * <b>eol</b>, and store them in the args field of <b>tok</b>.  Store the
- * number of parsed elements into the n_args field of <b>tok</b>.  Allocate
- * all storage in <b>area</b>.  Return the number of arguments parsed, or
- * return -1 if there was an insanely high number of arguments. */
-static inline int
-get_token_arguments(memarea_t *area, directory_token_t *tok,
-                    const char *s, const char *eol)
-{
-/** Largest number of arguments we'll accept to any token, ever. */
-#define MAX_ARGS 512
-  char *mem = memarea_strndup(area, s, eol-s);
-  char *cp = mem;
-  int j = 0;
-  char *args[MAX_ARGS];
-  while (*cp) {
-    if (j == MAX_ARGS)
-      return -1;
-    args[j++] = cp;
-    cp = (char*)find_whitespace(cp);
-    if (!cp || !*cp)
-      break; /* End of the line. */
-    *cp++ = '\0';
-    cp = (char*)eat_whitespace(cp);
-  }
-  tok->n_args = j;
-  tok->args = memarea_memdup(area, args, j*sizeof(char*));
-  return j;
-#undef MAX_ARGS
-}
-
-/** Helper function: read the next token from *s, advance *s to the end of the
- * token, and return the parsed token.  Parse *<b>s</b> according to the list
- * of tokens in <b>table</b>.
- */
-static directory_token_t *
-get_next_token(memarea_t *area,
-               const char **s, const char *eos, token_rule_t *table)
-{
-  /** Reject any object at least this big; it is probably an overflow, an
-   * attack, a bug, or some other nonsense. */
-#define MAX_UNPARSED_OBJECT_SIZE (128*1024)
-  /** Reject any line at least this big; it is probably an overflow, an
-   * attack, a bug, or some other nonsense. */
-#define MAX_LINE_LENGTH (128*1024)
-
-  const char *next, *eol, *obstart;
-  size_t obname_len;
-  int i;
-  directory_token_t *tok;
-  obj_syntax o_syn = NO_OBJ;
-  char ebuf[128];
-  const char *kwd = "";
-
-  tor_assert(area);
-  tok = ALLOC_ZERO(sizeof(directory_token_t));
-  tok->tp = ERR_;
-
-  /* Set *s to first token, eol to end-of-line, next to after first token */
-  *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
-  tor_assert(eos >= *s);
-  eol = memchr(*s, '\n', eos-*s);
-  if (!eol)
-    eol = eos;
-  if (eol - *s > MAX_LINE_LENGTH) {
-    RET_ERR("Line far too long");
-  }
-
-  next = find_whitespace_eos(*s, eol);
-
-  if (!strcmp_len(*s, "opt", next-*s)) {
-    /* Skip past an "opt" at the start of the line. */
-    *s = eat_whitespace_eos_no_nl(next, eol);
-    next = find_whitespace_eos(*s, eol);
-  } else if (*s == eos) {  /* If no "opt", and end-of-line, line is invalid */
-    RET_ERR("Unexpected EOF");
-  }
-
-  /* Search the table for the appropriate entry.  (I tried a binary search
-   * instead, but it wasn't any faster.) */
-  for (i = 0; table[i].t ; ++i) {
-    if (!strcmp_len(*s, table[i].t, next-*s)) {
-      /* We've found the keyword. */
-      kwd = table[i].t;
-      tok->tp = table[i].v;
-      o_syn = table[i].os;
-      *s = eat_whitespace_eos_no_nl(next, eol);
-      /* We go ahead whether there are arguments or not, so that tok->args is
-       * always set if we want arguments. */
-      if (table[i].concat_args) {
-        /* The keyword takes the line as a single argument */
-        tok->args = ALLOC(sizeof(char*));
-        tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
-        tok->n_args = 1;
-      } else {
-        /* This keyword takes multiple arguments. */
-        if (get_token_arguments(area, tok, *s, eol)<0) {
-          tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
-          RET_ERR(ebuf);
-        }
-        *s = eol;
-      }
-      if (tok->n_args < table[i].min_args) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
-        RET_ERR(ebuf);
-      } else if (tok->n_args > table[i].max_args) {
-        tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
-        RET_ERR(ebuf);
-      }
-      break;
-    }
-  }
-
-  if (tok->tp == ERR_) {
-    /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
-    if (**s == '@')
-      tok->tp = A_UNKNOWN_;
-    else
-      tok->tp = K_OPT;
-    tok->args = ALLOC(sizeof(char*));
-    tok->args[0] = STRNDUP(*s, eol-*s);
-    tok->n_args = 1;
-    o_syn = OBJ_OK;
-  }
-
-  /* Check whether there's an object present */
-  *s = eat_whitespace_eos(eol, eos);  /* Scan from end of first line */
-  tor_assert(eos >= *s);
-  eol = memchr(*s, '\n', eos-*s);
-  if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
-    goto check_object;
-
-  obstart = *s; /* Set obstart to start of object spec */
-  if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
-      strcmp_len(eol-5, "-----", 5) ||           /* nuls or invalid endings */
-      (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) {     /* name too long */
-    RET_ERR("Malformed object: bad begin line");
-  }
-  tok->object_type = STRNDUP(*s+11, eol-*s-16);
-  obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
-  *s = eol+1;    /* Set *s to possible start of object data (could be eos) */
-
-  /* Go to the end of the object */
-  next = tor_memstr(*s, eos-*s, "-----END ");
-  if (!next) {
-    RET_ERR("Malformed object: missing object end line");
-  }
-  tor_assert(eos >= next);
-  eol = memchr(next, '\n', eos-next);
-  if (!eol)  /* end-of-line marker, or eos if there's no '\n' */
-    eol = eos;
-  /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
-  if ((size_t)(eol-next) != 9+obname_len+5 ||
-      strcmp_len(next+9, tok->object_type, obname_len) ||
-      strcmp_len(eol-5, "-----", 5)) {
-    tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
-             tok->object_type);
-    ebuf[sizeof(ebuf)-1] = '\0';
-    RET_ERR(ebuf);
-  }
-  if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
-    RET_ERR("Couldn't parse object: missing footer or object much too big.");
-
-  if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
-    tok->key = crypto_pk_new();
-    if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart))
-      RET_ERR("Couldn't parse public key.");
-  } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */
-    tok->key = crypto_pk_new();
-    if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart))
-      RET_ERR("Couldn't parse private key.");
-  } else { /* If it's something else, try to base64-decode it */
-    int r;
-    tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */
-    r = base64_decode(tok->object_body, next-*s, *s, next-*s);
-    if (r<0)
-      RET_ERR("Malformed object: bad base64-encoded data");
-    tok->object_size = r;
-  }
-  *s = eol;
-
- check_object:
-  tok = token_check_object(area, kwd, tok, o_syn);
-
- done_tokenizing:
-  return tok;
-
-#undef RET_ERR
-#undef ALLOC
-#undef ALLOC_ZERO
-#undef STRDUP
-#undef STRNDUP
-}
-
-/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
- * them to <b>out</b>.  Parse according to the token rules in <b>table</b>.
- * Caller must free tokens in <b>out</b>.  If <b>end</b> is NULL, use the
- * entire string.
- */
-static int
-tokenize_string(memarea_t *area,
-                const char *start, const char *end, smartlist_t *out,
-                token_rule_t *table, int flags)
-{
-  const char **s;
-  directory_token_t *tok = NULL;
-  int counts[NIL_];
-  int i;
-  int first_nonannotation;
-  int prev_len = smartlist_len(out);
-  tor_assert(area);
-
-  s = &start;
-  if (!end) {
-    end = start+strlen(start);
-  } else {
-    /* it's only meaningful to check for nuls if we got an end-of-string ptr */
-    if (memchr(start, '\0', end-start)) {
-      log_warn(LD_DIR, "parse error: internal NUL character.");
-      return -1;
-    }
-  }
-  for (i = 0; i < NIL_; ++i)
-    counts[i] = 0;
-
-  SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
-
-  while (*s < end && (!tok || tok->tp != EOF_)) {
-    tok = get_next_token(area, s, end, table);
-    if (tok->tp == ERR_) {
-      log_warn(LD_DIR, "parse error: %s", tok->error);
-      token_clear(tok);
-      return -1;
-    }
-    ++counts[tok->tp];
-    smartlist_add(out, tok);
-    *s = eat_whitespace_eos(*s, end);
-  }
-
-  if (flags & TS_NOCHECK)
-    return 0;
-
-  if ((flags & TS_ANNOTATIONS_OK)) {
-    first_nonannotation = -1;
-    for (i = 0; i < smartlist_len(out); ++i) {
-      tok = smartlist_get(out, i);
-      if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
-        first_nonannotation = i;
-        break;
-      }
-    }
-    if (first_nonannotation < 0) {
-      log_warn(LD_DIR, "parse error: item contains only annotations");
-      return -1;
-    }
-    for (i=first_nonannotation;  i < smartlist_len(out); ++i) {
-      tok = smartlist_get(out, i);
-      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
-        log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
-        return -1;
-      }
-    }
-    if ((flags & TS_NO_NEW_ANNOTATIONS)) {
-      if (first_nonannotation != prev_len) {
-        log_warn(LD_DIR, "parse error: Unexpected annotations.");
-        return -1;
-      }
-    }
-  } else {
-    for (i=0;  i < smartlist_len(out); ++i) {
-      tok = smartlist_get(out, i);
-      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
-        log_warn(LD_DIR, "parse error: no annotations allowed.");
-        return -1;
-      }
-    }
-    first_nonannotation = 0;
-  }
-  for (i = 0; table[i].t; ++i) {
-    if (counts[table[i].v] < table[i].min_cnt) {
-      log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
-      return -1;
-    }
-    if (counts[table[i].v] > table[i].max_cnt) {
-      log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
-      return -1;
-    }
-    if (table[i].pos & AT_START) {
-      if (smartlist_len(out) < 1 ||
-          (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
-        log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
-        return -1;
-      }
-    }
-    if (table[i].pos & AT_END) {
-      if (smartlist_len(out) < 1 ||
-          (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
-        log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
-        return -1;
-      }
-    }
-  }
-  return 0;
-}
-
-/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
- * NULL if no such keyword is found.
- */
-static directory_token_t *
-find_opt_by_keyword(smartlist_t *s, directory_keyword keyword)
-{
-  SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
-  return NULL;
-}
-
-/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
- * with an assert if no such keyword is found.
- */
-static directory_token_t *
-find_by_keyword_(smartlist_t *s, directory_keyword keyword,
-                 const char *keyword_as_string)
-{
-  directory_token_t *tok = find_opt_by_keyword(s, keyword);
-  if (PREDICT_UNLIKELY(!tok)) {
-    log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
-         "been validated. Internal error.", keyword_as_string, (int)keyword);
-    tor_assert(tok);
-  }
-  return tok;
-}
-
-/** If there are any directory_token_t entries in <b>s</b> whose keyword is
- * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
- * in the same order in which they occur in <b>s</b>.  Otherwise return
- * NULL. */
-static smartlist_t *
-find_all_by_keyword(smartlist_t *s, directory_keyword k)
-{
-  smartlist_t *out = NULL;
-  SMARTLIST_FOREACH(s, directory_token_t *, t,
-                    if (t->tp == k) {
-                      if (!out)
-                        out = smartlist_new();
-                      smartlist_add(out, t);
-                    });
-  return out;
-}
-
 /** Return a newly allocated smartlist of all accept or reject tokens in
  * <b>s</b>.
  */





More information about the tor-commits mailing list