commit e8c12175fe61e4666d093b1ac85cf9bbae281f48 Author: David Goulet dgoulet@ev0ke.net Date: Fri Mar 11 13:26:04 2016 -0500
Move token parsing code to parsecommon.{c|h}
Signed-off-by: David Goulet dgoulet@torproject.org Signed-off-by: George Kadianakis desnacked@riseup.net --- src/or/include.am | 2 + src/or/parsecommon.c | 453 ++++++++++++++++++++++++++++++++ src/or/parsecommon.h | 292 +++++++++++++++++++++ src/or/routerparse.c | 715 +-------------------------------------------------- 4 files changed, 749 insertions(+), 713 deletions(-)
diff --git a/src/or/include.am b/src/or/include.am index f9199dd..3ae45cb 100644 --- a/src/or/include.am +++ b/src/or/include.am @@ -60,6 +60,7 @@ LIBTOR_A_SOURCES = \ src/or/shared_random.c \ src/or/shared_random_state.c \ src/or/transports.c \ + src/or/parsecommon.c \ src/or/periodic.c \ src/or/policies.c \ src/or/reasons.c \ @@ -172,6 +173,7 @@ ORHEADERS = \ src/or/shared_random.h \ src/or/shared_random_state.h \ src/or/transports.h \ + src/or/parsecommon.h \ src/or/periodic.h \ src/or/policies.h \ src/or/reasons.h \ diff --git a/src/or/parsecommon.c b/src/or/parsecommon.c new file mode 100644 index 0000000..72e69e8 --- /dev/null +++ b/src/or/parsecommon.c @@ -0,0 +1,453 @@ +/* Copyright (c) 2016, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file parsecommon.c + * \brief Common code to parse and validate various type of descriptors. + **/ + +#include "parsecommon.h" +#include "torlog.h" +#include "util_format.h" + +#define MIN_ANNOTATION A_PURPOSE +#define MAX_ANNOTATION A_UNKNOWN_ + +#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz) +#define ALLOC(sz) memarea_alloc(area,sz) +#define STRDUP(str) memarea_strdup(area,str) +#define STRNDUP(str,n) memarea_strndup(area,(str),(n)) + +#define RET_ERR(msg) \ + STMT_BEGIN \ + if (tok) token_clear(tok); \ + tok = ALLOC_ZERO(sizeof(directory_token_t)); \ + tok->tp = ERR_; \ + tok->error = STRDUP(msg); \ + goto done_tokenizing; \ + STMT_END + +/** Free all resources allocated for <b>tok</b> */ +void +token_clear(directory_token_t *tok) +{ + if (tok->key) + crypto_pk_free(tok->key); +} + +/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add + * them to <b>out</b>. Parse according to the token rules in <b>table</b>. + * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the + * entire string. + */ +int +tokenize_string(memarea_t *area, + const char *start, const char *end, smartlist_t *out, + token_rule_t *table, int flags) +{ + const char **s; + directory_token_t *tok = NULL; + int counts[NIL_]; + int i; + int first_nonannotation; + int prev_len = smartlist_len(out); + tor_assert(area); + + s = &start; + if (!end) { + end = start+strlen(start); + } else { + /* it's only meaningful to check for nuls if we got an end-of-string ptr */ + if (memchr(start, '\0', end-start)) { + log_warn(LD_DIR, "parse error: internal NUL character."); + return -1; + } + } + for (i = 0; i < NIL_; ++i) + counts[i] = 0; + + SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]); + + while (*s < end && (!tok || tok->tp != EOF_)) { + tok = get_next_token(area, s, end, table); + if (tok->tp == ERR_) { + log_warn(LD_DIR, "parse error: %s", tok->error); + token_clear(tok); + return -1; + } + ++counts[tok->tp]; + smartlist_add(out, tok); + *s = eat_whitespace_eos(*s, end); + } + + if (flags & TS_NOCHECK) + return 0; + + if ((flags & TS_ANNOTATIONS_OK)) { + first_nonannotation = -1; + for (i = 0; i < smartlist_len(out); ++i) { + tok = smartlist_get(out, i); + if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) { + first_nonannotation = i; + break; + } + } + if (first_nonannotation < 0) { + log_warn(LD_DIR, "parse error: item contains only annotations"); + return -1; + } + for (i=first_nonannotation; i < smartlist_len(out); ++i) { + tok = smartlist_get(out, i); + if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { + log_warn(LD_DIR, "parse error: Annotations mixed with keywords"); + return -1; + } + } + if ((flags & TS_NO_NEW_ANNOTATIONS)) { + if (first_nonannotation != prev_len) { + log_warn(LD_DIR, "parse error: Unexpected annotations."); + return -1; + } + } + } else { + for (i=0; i < smartlist_len(out); ++i) { + tok = smartlist_get(out, i); + if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { + log_warn(LD_DIR, "parse error: no annotations allowed."); + return -1; + } + } + first_nonannotation = 0; + } + for (i = 0; table[i].t; ++i) { + if (counts[table[i].v] < table[i].min_cnt) { + log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t); + return -1; + } + if (counts[table[i].v] > table[i].max_cnt) { + log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t); + return -1; + } + if (table[i].pos & AT_START) { + if (smartlist_len(out) < 1 || + (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) { + log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t); + return -1; + } + } + if (table[i].pos & AT_END) { + if (smartlist_len(out) < 1 || + (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) { + log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t); + return -1; + } + } + } + return 0; +} + + +/** Helper: parse space-separated arguments from the string <b>s</b> ending at + * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the + * number of parsed elements into the n_args field of <b>tok</b>. Allocate + * all storage in <b>area</b>. Return the number of arguments parsed, or + * return -1 if there was an insanely high number of arguments. */ +static inline int +get_token_arguments(memarea_t *area, directory_token_t *tok, + const char *s, const char *eol) +{ +/** Largest number of arguments we'll accept to any token, ever. */ +#define MAX_ARGS 512 + char *mem = memarea_strndup(area, s, eol-s); + char *cp = mem; + int j = 0; + char *args[MAX_ARGS]; + while (*cp) { + if (j == MAX_ARGS) + return -1; + args[j++] = cp; + cp = (char*)find_whitespace(cp); + if (!cp || !*cp) + break; /* End of the line. */ + *cp++ = '\0'; + cp = (char*)eat_whitespace(cp); + } + tok->n_args = j; + tok->args = memarea_memdup(area, args, j*sizeof(char*)); + return j; +#undef MAX_ARGS +} + +/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys + * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>. + * Return <b>tok</b> on success, or a new ERR_ token if the token didn't + * conform to the syntax we wanted. + **/ +static inline directory_token_t * +token_check_object(memarea_t *area, const char *kwd, + directory_token_t *tok, obj_syntax o_syn) +{ + char ebuf[128]; + switch (o_syn) { + case NO_OBJ: + /* No object is allowed for this token. */ + if (tok->object_body) { + tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd); + RET_ERR(ebuf); + } + if (tok->key) { + tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd); + RET_ERR(ebuf); + } + break; + case NEED_OBJ: + /* There must be a (non-key) object. */ + if (!tok->object_body) { + tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd); + RET_ERR(ebuf); + } + break; + case NEED_KEY_1024: /* There must be a 1024-bit public key. */ + case NEED_SKEY_1024: /* There must be a 1024-bit private key. */ + if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) { + tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits", + kwd, crypto_pk_num_bits(tok->key)); + RET_ERR(ebuf); + } + /* fall through */ + case NEED_KEY: /* There must be some kind of key. */ + if (!tok->key) { + tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd); + RET_ERR(ebuf); + } + if (o_syn != NEED_SKEY_1024) { + if (crypto_pk_key_is_private(tok->key)) { + tor_snprintf(ebuf, sizeof(ebuf), + "Private key given for %s, which wants a public key", kwd); + RET_ERR(ebuf); + } + } else { /* o_syn == NEED_SKEY_1024 */ + if (!crypto_pk_key_is_private(tok->key)) { + tor_snprintf(ebuf, sizeof(ebuf), + "Public key given for %s, which wants a private key", kwd); + RET_ERR(ebuf); + } + } + break; + case OBJ_OK: + /* Anything goes with this token. */ + break; + } + + done_tokenizing: + return tok; +} + + + +/** Helper function: read the next token from *s, advance *s to the end of the + * token, and return the parsed token. Parse *<b>s</b> according to the list + * of tokens in <b>table</b>. + */ +directory_token_t * +get_next_token(memarea_t *area, + const char **s, const char *eos, token_rule_t *table) +{ + /** Reject any object at least this big; it is probably an overflow, an + * attack, a bug, or some other nonsense. */ +#define MAX_UNPARSED_OBJECT_SIZE (128*1024) + /** Reject any line at least this big; it is probably an overflow, an + * attack, a bug, or some other nonsense. */ +#define MAX_LINE_LENGTH (128*1024) + + const char *next, *eol, *obstart; + size_t obname_len; + int i; + directory_token_t *tok; + obj_syntax o_syn = NO_OBJ; + char ebuf[128]; + const char *kwd = ""; + + tor_assert(area); + tok = ALLOC_ZERO(sizeof(directory_token_t)); + tok->tp = ERR_; + + /* Set *s to first token, eol to end-of-line, next to after first token */ + *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */ + tor_assert(eos >= *s); + eol = memchr(*s, '\n', eos-*s); + if (!eol) + eol = eos; + if (eol - *s > MAX_LINE_LENGTH) { + RET_ERR("Line far too long"); + } + + next = find_whitespace_eos(*s, eol); + + if (!strcmp_len(*s, "opt", next-*s)) { + /* Skip past an "opt" at the start of the line. */ + *s = eat_whitespace_eos_no_nl(next, eol); + next = find_whitespace_eos(*s, eol); + } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */ + RET_ERR("Unexpected EOF"); + } + + /* Search the table for the appropriate entry. (I tried a binary search + * instead, but it wasn't any faster.) */ + for (i = 0; table[i].t ; ++i) { + if (!strcmp_len(*s, table[i].t, next-*s)) { + /* We've found the keyword. */ + kwd = table[i].t; + tok->tp = table[i].v; + o_syn = table[i].os; + *s = eat_whitespace_eos_no_nl(next, eol); + /* We go ahead whether there are arguments or not, so that tok->args is + * always set if we want arguments. */ + if (table[i].concat_args) { + /* The keyword takes the line as a single argument */ + tok->args = ALLOC(sizeof(char*)); + tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */ + tok->n_args = 1; + } else { + /* This keyword takes multiple arguments. */ + if (get_token_arguments(area, tok, *s, eol)<0) { + tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd); + RET_ERR(ebuf); + } + *s = eol; + } + if (tok->n_args < table[i].min_args) { + tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd); + RET_ERR(ebuf); + } else if (tok->n_args > table[i].max_args) { + tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd); + RET_ERR(ebuf); + } + break; + } + } + + if (tok->tp == ERR_) { + /* No keyword matched; call it an "K_opt" or "A_unrecognized" */ + if (**s == '@') + tok->tp = A_UNKNOWN_; + else + tok->tp = K_OPT; + tok->args = ALLOC(sizeof(char*)); + tok->args[0] = STRNDUP(*s, eol-*s); + tok->n_args = 1; + o_syn = OBJ_OK; + } + + /* Check whether there's an object present */ + *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */ + tor_assert(eos >= *s); + eol = memchr(*s, '\n', eos-*s); + if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */ + goto check_object; + + obstart = *s; /* Set obstart to start of object spec */ + if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */ + strcmp_len(eol-5, "-----", 5) || /* nuls or invalid endings */ + (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */ + RET_ERR("Malformed object: bad begin line"); + } + tok->object_type = STRNDUP(*s+11, eol-*s-16); + obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */ + *s = eol+1; /* Set *s to possible start of object data (could be eos) */ + + /* Go to the end of the object */ + next = tor_memstr(*s, eos-*s, "-----END "); + if (!next) { + RET_ERR("Malformed object: missing object end line"); + } + tor_assert(eos >= next); + eol = memchr(next, '\n', eos-next); + if (!eol) /* end-of-line marker, or eos if there's no '\n' */ + eol = eos; + /* Validate the ending tag, which should be 9 + NAME + 5 + eol */ + if ((size_t)(eol-next) != 9+obname_len+5 || + strcmp_len(next+9, tok->object_type, obname_len) || + strcmp_len(eol-5, "-----", 5)) { + tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s", + tok->object_type); + ebuf[sizeof(ebuf)-1] = '\0'; + RET_ERR(ebuf); + } + if (next - *s > MAX_UNPARSED_OBJECT_SIZE) + RET_ERR("Couldn't parse object: missing footer or object much too big."); + + if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */ + tok->key = crypto_pk_new(); + if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart)) + RET_ERR("Couldn't parse public key."); + } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */ + tok->key = crypto_pk_new(); + if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart)) + RET_ERR("Couldn't parse private key."); + } else { /* If it's something else, try to base64-decode it */ + int r; + tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */ + r = base64_decode(tok->object_body, next-*s, *s, next-*s); + if (r<0) + RET_ERR("Malformed object: bad base64-encoded data"); + tok->object_size = r; + } + *s = eol; + + check_object: + tok = token_check_object(area, kwd, tok, o_syn); + + done_tokenizing: + return tok; + +#undef RET_ERR +#undef ALLOC +#undef ALLOC_ZERO +#undef STRDUP +#undef STRNDUP +} + + +/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail + * with an assert if no such keyword is found. + */ +directory_token_t * +find_by_keyword_(smartlist_t *s, directory_keyword keyword, + const char *keyword_as_string) +{ + directory_token_t *tok = find_opt_by_keyword(s, keyword); + if (PREDICT_UNLIKELY(!tok)) { + log_err(LD_BUG, "Missing %s [%d] in directory object that should have " + "been validated. Internal error.", keyword_as_string, (int)keyword); + tor_assert(tok); + } + return tok; +} + +/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return + * NULL if no such keyword is found. + */ +directory_token_t * +find_opt_by_keyword(smartlist_t *s, directory_keyword keyword) +{ + SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t); + return NULL; +} + +/** If there are any directory_token_t entries in <b>s</b> whose keyword is + * <b>k</b>, return a newly allocated smartlist_t containing all such entries, + * in the same order in which they occur in <b>s</b>. Otherwise return + * NULL. */ +smartlist_t * +find_all_by_keyword(smartlist_t *s, directory_keyword k) +{ + smartlist_t *out = NULL; + SMARTLIST_FOREACH(s, directory_token_t *, t, + if (t->tp == k) { + if (!out) + out = smartlist_new(); + smartlist_add(out, t); + }); + return out; +} diff --git a/src/or/parsecommon.h b/src/or/parsecommon.h new file mode 100644 index 0000000..3bb89ce --- /dev/null +++ b/src/or/parsecommon.h @@ -0,0 +1,292 @@ +/* Copyright (c) 2016, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file parsecommon.h + * \brief Header file for parsecommon.c + **/ + + +#ifndef TOR_PARSECOMMON_H +#define TOR_PARSECOMMON_H + +#include "container.h" +#include "crypto.h" +#include "memarea.h" + +/** Enumeration of possible token types. The ones starting with K_ correspond +* to directory 'keywords'. A_ is for an annotation, R or C is related to +* hidden services, ERR_ is an error in the tokenizing process, EOF_ is an +* end-of-file marker, and NIL_ is used to encode not-a-token. +*/ +typedef enum { + K_ACCEPT = 0, + K_ACCEPT6, + K_DIRECTORY_SIGNATURE, + K_RECOMMENDED_SOFTWARE, + K_REJECT, + K_REJECT6, + K_ROUTER, + K_SIGNED_DIRECTORY, + K_SIGNING_KEY, + K_ONION_KEY, + K_ONION_KEY_NTOR, + K_ROUTER_SIGNATURE, + K_PUBLISHED, + K_RUNNING_ROUTERS, + K_ROUTER_STATUS, + K_PLATFORM, + K_OPT, + K_BANDWIDTH, + K_CONTACT, + K_NETWORK_STATUS, + K_UPTIME, + K_DIR_SIGNING_KEY, + K_FAMILY, + K_FINGERPRINT, + K_HIBERNATING, + K_READ_HISTORY, + K_WRITE_HISTORY, + K_NETWORK_STATUS_VERSION, + K_DIR_SOURCE, + K_DIR_OPTIONS, + K_CLIENT_VERSIONS, + K_SERVER_VERSIONS, + K_OR_ADDRESS, + K_ID, + K_P, + K_P6, + K_R, + K_A, + K_S, + K_V, + K_W, + K_M, + K_EXTRA_INFO, + K_EXTRA_INFO_DIGEST, + K_CACHES_EXTRA_INFO, + K_HIDDEN_SERVICE_DIR, + K_ALLOW_SINGLE_HOP_EXITS, + K_IPV6_POLICY, + K_ROUTER_SIG_ED25519, + K_IDENTITY_ED25519, + K_MASTER_KEY_ED25519, + K_ONION_KEY_CROSSCERT, + K_NTOR_ONION_KEY_CROSSCERT, + + K_DIRREQ_END, + K_DIRREQ_V2_IPS, + K_DIRREQ_V3_IPS, + K_DIRREQ_V2_REQS, + K_DIRREQ_V3_REQS, + K_DIRREQ_V2_SHARE, + K_DIRREQ_V3_SHARE, + K_DIRREQ_V2_RESP, + K_DIRREQ_V3_RESP, + K_DIRREQ_V2_DIR, + K_DIRREQ_V3_DIR, + K_DIRREQ_V2_TUN, + K_DIRREQ_V3_TUN, + K_ENTRY_END, + K_ENTRY_IPS, + K_CELL_END, + K_CELL_PROCESSED, + K_CELL_QUEUED, + K_CELL_TIME, + K_CELL_CIRCS, + K_EXIT_END, + K_EXIT_WRITTEN, + K_EXIT_READ, + K_EXIT_OPENED, + + K_DIR_KEY_CERTIFICATE_VERSION, + K_DIR_IDENTITY_KEY, + K_DIR_KEY_PUBLISHED, + K_DIR_KEY_EXPIRES, + K_DIR_KEY_CERTIFICATION, + K_DIR_KEY_CROSSCERT, + K_DIR_ADDRESS, + K_DIR_TUNNELLED, + + K_VOTE_STATUS, + K_VALID_AFTER, + K_FRESH_UNTIL, + K_VALID_UNTIL, + K_VOTING_DELAY, + + K_KNOWN_FLAGS, + K_PARAMS, + K_BW_WEIGHTS, + K_VOTE_DIGEST, + K_CONSENSUS_DIGEST, + K_ADDITIONAL_DIGEST, + K_ADDITIONAL_SIGNATURE, + K_CONSENSUS_METHODS, + K_CONSENSUS_METHOD, + K_LEGACY_DIR_KEY, + K_DIRECTORY_FOOTER, + K_SIGNING_CERT_ED, + K_SR_FLAG, + K_COMMIT, + K_PREVIOUS_SRV, + K_CURRENT_SRV, + K_PACKAGE, + + A_PURPOSE, + A_LAST_LISTED, + A_UNKNOWN_, + + R_RENDEZVOUS_SERVICE_DESCRIPTOR, + R_VERSION, + R_PERMANENT_KEY, + R_SECRET_ID_PART, + R_PUBLICATION_TIME, + R_PROTOCOL_VERSIONS, + R_INTRODUCTION_POINTS, + R_SIGNATURE, + + R_IPO_IDENTIFIER, + R_IPO_IP_ADDRESS, + R_IPO_ONION_PORT, + R_IPO_ONION_KEY, + R_IPO_SERVICE_KEY, + + C_CLIENT_NAME, + C_DESCRIPTOR_COOKIE, + C_CLIENT_KEY, + + ERR_, + EOF_, + NIL_ +} directory_keyword; + +/** Structure to hold a single directory token. + * + * We parse a directory by breaking it into "tokens", each consisting + * of a keyword, a line full of arguments, and a binary object. The + * arguments and object are both optional, depending on the keyword + * type. + * + * This structure is only allocated in memareas; do not allocate it on + * the heap, or token_clear() won't work. + */ +typedef struct directory_token_t { + directory_keyword tp; /**< Type of the token. */ + int n_args:30; /**< Number of elements in args */ + char **args; /**< Array of arguments from keyword line. */ + + char *object_type; /**< -----BEGIN [object_type]-----*/ + size_t object_size; /**< Bytes in object_body */ + char *object_body; /**< Contents of object, base64-decoded. */ + + crypto_pk_t *key; /**< For public keys only. Heap-allocated. */ + + char *error; /**< For ERR_ tokens only. */ +} directory_token_t; + +/** We use a table of rules to decide how to parse each token type. */ + +/** Rules for whether the keyword needs an object. */ +typedef enum { + NO_OBJ, /**< No object, ever. */ + NEED_OBJ, /**< Object is required. */ + NEED_SKEY_1024,/**< Object is required, and must be a 1024 bit private key */ + NEED_KEY_1024, /**< Object is required, and must be a 1024 bit public key */ + NEED_KEY, /**< Object is required, and must be a public key. */ + OBJ_OK, /**< Object is optional. */ +} obj_syntax; + +#define AT_START 1 +#define AT_END 2 + +#define TS_ANNOTATIONS_OK 1 +#define TS_NOCHECK 2 +#define TS_NO_NEW_ANNOTATIONS 4 + +/* + * Helper macros to define token tables. 's' is a string, 't' is a + * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an + * object syntax. + * + */ + +/** Appears to indicate the end of a table. */ +#define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 } +/** An item with no restrictions: used for obsolete document types */ +#define T(s,t,a,o) { s, t, a, o, 0, INT_MAX, 0, 0 } +/** An item with no restrictions on multiplicity or location. */ +#define T0N(s,t,a,o) { s, t, a, o, 0, INT_MAX, 0, 0 } +/** An item that must appear exactly once */ +#define T1(s,t,a,o) { s, t, a, o, 1, 1, 0, 0 } +/** An item that must appear exactly once, at the start of the document */ +#define T1_START(s,t,a,o) { s, t, a, o, 1, 1, AT_START, 0 } +/** An item that must appear exactly once, at the end of the document */ +#define T1_END(s,t,a,o) { s, t, a, o, 1, 1, AT_END, 0 } +/** An item that must appear one or more times */ +#define T1N(s,t,a,o) { s, t, a, o, 1, INT_MAX, 0, 0 } +/** An item that must appear no more than once */ +#define T01(s,t,a,o) { s, t, a, o, 0, 1, 0, 0 } +/** An annotation that must appear no more than once */ +#define A01(s,t,a,o) { s, t, a, o, 0, 1, 0, 1 } + +/* Argument multiplicity: any number of arguments. */ +#define ARGS 0,INT_MAX,0 +/* Argument multiplicity: no arguments. */ +#define NO_ARGS 0,0,0 +/* Argument multiplicity: concatenate all arguments. */ +#define CONCAT_ARGS 1,1,1 +/* Argument multiplicity: at least <b>n</b> arguments. */ +#define GE(n) n,INT_MAX,0 +/* Argument multiplicity: exactly <b>n</b> arguments. */ +#define EQ(n) n,n,0 + +/** Determines the parsing rules for a single token type. */ +typedef struct token_rule_t { + /** The string value of the keyword identifying the type of item. */ + const char *t; + /** The corresponding directory_keyword enum. */ + directory_keyword v; + /** Minimum number of arguments for this item */ + int min_args; + /** Maximum number of arguments for this item */ + int max_args; + /** If true, we concatenate all arguments for this item into a single + * string. */ + int concat_args; + /** Requirements on object syntax for this item. */ + obj_syntax os; + /** Lowest number of times this item may appear in a document. */ + int min_cnt; + /** Highest number of times this item may appear in a document. */ + int max_cnt; + /** One or more of AT_START/AT_END to limit where the item may appear in a + * document. */ + int pos; + /** True iff this token is an annotation. */ + int is_annotation; +} token_rule_t; + +void token_clear(directory_token_t *tok); + +int tokenize_string(memarea_t *area, + const char *start, const char *end, + smartlist_t *out, + token_rule_t *table, + int flags); +directory_token_t *get_next_token(memarea_t *area, + const char **s, + const char *eos, + token_rule_t *table); + +directory_token_t *find_by_keyword_(smartlist_t *s, + directory_keyword keyword, + const char *keyword_str); + +#define find_by_keyword(s, keyword) \ + find_by_keyword_((s), (keyword), #keyword) + +directory_token_t *find_opt_by_keyword(smartlist_t *s, + directory_keyword keyword); +smartlist_t * find_all_by_keyword(smartlist_t *s, directory_keyword k); + +#endif /* TOR_PARSECOMMON_H */ diff --git a/src/or/routerparse.c b/src/or/routerparse.c index 03f8f4e..ef6273b 100644 --- a/src/or/routerparse.c +++ b/src/or/routerparse.c @@ -16,6 +16,7 @@ #include "circuitstats.h" #include "dirserv.h" #include "dirvote.h" +#include "parsecommon.h" #include "policies.h" #include "rendcommon.h" #include "router.h" @@ -36,259 +37,6 @@
/****************************************************************************/
-/** Enumeration of possible token types. The ones starting with K_ correspond - * to directory 'keywords'. A_ is for an annotation, R or C is related to - * hidden services, ERR_ is an error in the tokenizing process, EOF_ is an - * end-of-file marker, and NIL_ is used to encode not-a-token. - */ -typedef enum { - K_ACCEPT = 0, - K_ACCEPT6, - K_DIRECTORY_SIGNATURE, - K_RECOMMENDED_SOFTWARE, - K_REJECT, - K_REJECT6, - K_ROUTER, - K_SIGNED_DIRECTORY, - K_SIGNING_KEY, - K_ONION_KEY, - K_ONION_KEY_NTOR, - K_ROUTER_SIGNATURE, - K_PUBLISHED, - K_RUNNING_ROUTERS, - K_ROUTER_STATUS, - K_PLATFORM, - K_OPT, - K_BANDWIDTH, - K_CONTACT, - K_NETWORK_STATUS, - K_UPTIME, - K_DIR_SIGNING_KEY, - K_FAMILY, - K_FINGERPRINT, - K_HIBERNATING, - K_READ_HISTORY, - K_WRITE_HISTORY, - K_NETWORK_STATUS_VERSION, - K_DIR_SOURCE, - K_DIR_OPTIONS, - K_CLIENT_VERSIONS, - K_SERVER_VERSIONS, - K_OR_ADDRESS, - K_ID, - K_P, - K_P6, - K_R, - K_A, - K_S, - K_V, - K_W, - K_M, - K_EXTRA_INFO, - K_EXTRA_INFO_DIGEST, - K_CACHES_EXTRA_INFO, - K_HIDDEN_SERVICE_DIR, - K_ALLOW_SINGLE_HOP_EXITS, - K_IPV6_POLICY, - K_ROUTER_SIG_ED25519, - K_IDENTITY_ED25519, - K_MASTER_KEY_ED25519, - K_ONION_KEY_CROSSCERT, - K_NTOR_ONION_KEY_CROSSCERT, - - K_DIRREQ_END, - K_DIRREQ_V2_IPS, - K_DIRREQ_V3_IPS, - K_DIRREQ_V2_REQS, - K_DIRREQ_V3_REQS, - K_DIRREQ_V2_SHARE, - K_DIRREQ_V3_SHARE, - K_DIRREQ_V2_RESP, - K_DIRREQ_V3_RESP, - K_DIRREQ_V2_DIR, - K_DIRREQ_V3_DIR, - K_DIRREQ_V2_TUN, - K_DIRREQ_V3_TUN, - K_ENTRY_END, - K_ENTRY_IPS, - K_CELL_END, - K_CELL_PROCESSED, - K_CELL_QUEUED, - K_CELL_TIME, - K_CELL_CIRCS, - K_EXIT_END, - K_EXIT_WRITTEN, - K_EXIT_READ, - K_EXIT_OPENED, - - K_DIR_KEY_CERTIFICATE_VERSION, - K_DIR_IDENTITY_KEY, - K_DIR_KEY_PUBLISHED, - K_DIR_KEY_EXPIRES, - K_DIR_KEY_CERTIFICATION, - K_DIR_KEY_CROSSCERT, - K_DIR_ADDRESS, - K_DIR_TUNNELLED, - - K_VOTE_STATUS, - K_VALID_AFTER, - K_FRESH_UNTIL, - K_VALID_UNTIL, - K_VOTING_DELAY, - - K_KNOWN_FLAGS, - K_PARAMS, - K_BW_WEIGHTS, - K_VOTE_DIGEST, - K_CONSENSUS_DIGEST, - K_ADDITIONAL_DIGEST, - K_ADDITIONAL_SIGNATURE, - K_CONSENSUS_METHODS, - K_CONSENSUS_METHOD, - K_LEGACY_DIR_KEY, - K_DIRECTORY_FOOTER, - K_SIGNING_CERT_ED, - K_SR_FLAG, - K_COMMIT, - K_PREVIOUS_SRV, - K_CURRENT_SRV, - K_PACKAGE, - - A_PURPOSE, - A_LAST_LISTED, - A_UNKNOWN_, - - R_RENDEZVOUS_SERVICE_DESCRIPTOR, - R_VERSION, - R_PERMANENT_KEY, - R_SECRET_ID_PART, - R_PUBLICATION_TIME, - R_PROTOCOL_VERSIONS, - R_INTRODUCTION_POINTS, - R_SIGNATURE, - - R_IPO_IDENTIFIER, - R_IPO_IP_ADDRESS, - R_IPO_ONION_PORT, - R_IPO_ONION_KEY, - R_IPO_SERVICE_KEY, - - C_CLIENT_NAME, - C_DESCRIPTOR_COOKIE, - C_CLIENT_KEY, - - ERR_, - EOF_, - NIL_ -} directory_keyword; - -#define MIN_ANNOTATION A_PURPOSE -#define MAX_ANNOTATION A_UNKNOWN_ - -/** Structure to hold a single directory token. - * - * We parse a directory by breaking it into "tokens", each consisting - * of a keyword, a line full of arguments, and a binary object. The - * arguments and object are both optional, depending on the keyword - * type. - * - * This structure is only allocated in memareas; do not allocate it on - * the heap, or token_clear() won't work. - */ -typedef struct directory_token_t { - directory_keyword tp; /**< Type of the token. */ - int n_args:30; /**< Number of elements in args */ - char **args; /**< Array of arguments from keyword line. */ - - char *object_type; /**< -----BEGIN [object_type]-----*/ - size_t object_size; /**< Bytes in object_body */ - char *object_body; /**< Contents of object, base64-decoded. */ - - crypto_pk_t *key; /**< For public keys only. Heap-allocated. */ - - char *error; /**< For ERR_ tokens only. */ -} directory_token_t; - -/* ********************************************************************** */ - -/** We use a table of rules to decide how to parse each token type. */ - -/** Rules for whether the keyword needs an object. */ -typedef enum { - NO_OBJ, /**< No object, ever. */ - NEED_OBJ, /**< Object is required. */ - NEED_SKEY_1024,/**< Object is required, and must be a 1024 bit private key */ - NEED_KEY_1024, /**< Object is required, and must be a 1024 bit public key */ - NEED_KEY, /**< Object is required, and must be a public key. */ - OBJ_OK, /**< Object is optional. */ -} obj_syntax; - -#define AT_START 1 -#define AT_END 2 - -/** Determines the parsing rules for a single token type. */ -typedef struct token_rule_t { - /** The string value of the keyword identifying the type of item. */ - const char *t; - /** The corresponding directory_keyword enum. */ - directory_keyword v; - /** Minimum number of arguments for this item */ - int min_args; - /** Maximum number of arguments for this item */ - int max_args; - /** If true, we concatenate all arguments for this item into a single - * string. */ - int concat_args; - /** Requirements on object syntax for this item. */ - obj_syntax os; - /** Lowest number of times this item may appear in a document. */ - int min_cnt; - /** Highest number of times this item may appear in a document. */ - int max_cnt; - /** One or more of AT_START/AT_END to limit where the item may appear in a - * document. */ - int pos; - /** True iff this token is an annotation. */ - int is_annotation; -} token_rule_t; - -/* - * Helper macros to define token tables. 's' is a string, 't' is a - * directory_keyword, 'a' is a trio of argument multiplicities, and 'o' is an - * object syntax. - * - */ - -/** Appears to indicate the end of a table. */ -#define END_OF_TABLE { NULL, NIL_, 0,0,0, NO_OBJ, 0, INT_MAX, 0, 0 } -/** An item with no restrictions: used for obsolete document types */ -#define T(s,t,a,o) { s, t, a, o, 0, INT_MAX, 0, 0 } -/** An item with no restrictions on multiplicity or location. */ -#define T0N(s,t,a,o) { s, t, a, o, 0, INT_MAX, 0, 0 } -/** An item that must appear exactly once */ -#define T1(s,t,a,o) { s, t, a, o, 1, 1, 0, 0 } -/** An item that must appear exactly once, at the start of the document */ -#define T1_START(s,t,a,o) { s, t, a, o, 1, 1, AT_START, 0 } -/** An item that must appear exactly once, at the end of the document */ -#define T1_END(s,t,a,o) { s, t, a, o, 1, 1, AT_END, 0 } -/** An item that must appear one or more times */ -#define T1N(s,t,a,o) { s, t, a, o, 1, INT_MAX, 0, 0 } -/** An item that must appear no more than once */ -#define T01(s,t,a,o) { s, t, a, o, 0, 1, 0, 0 } -/** An annotation that must appear no more than once */ -#define A01(s,t,a,o) { s, t, a, o, 0, 1, 0, 1 } - -/* Argument multiplicity: any number of arguments. */ -#define ARGS 0,INT_MAX,0 -/* Argument multiplicity: no arguments. */ -#define NO_ARGS 0,0,0 -/* Argument multiplicity: concatenate all arguments. */ -#define CONCAT_ARGS 1,1,1 -/* Argument multiplicity: at least <b>n</b> arguments. */ -#define GE(n) n,INT_MAX,0 -/* Argument multiplicity: exactly <b>n</b> arguments. */ -#define EQ(n) n,n,0 - /** List of tokens recognized in router descriptors */ static token_rule_t routerdesc_token_table[] = { T0N("reject", K_REJECT, ARGS, NO_OBJ ), @@ -556,28 +304,8 @@ static int router_get_hashes_impl(const char *s, size_t s_len, common_digests_t *digests, const char *start_str, const char *end_str, char end_char); -static void token_clear(directory_token_t *tok); -static smartlist_t *find_all_by_keyword(smartlist_t *s, directory_keyword k); static smartlist_t *find_all_exitpolicy(smartlist_t *s); -static directory_token_t *find_by_keyword_(smartlist_t *s, - directory_keyword keyword, - const char *keyword_str); -#define find_by_keyword(s, keyword) find_by_keyword_((s), (keyword), #keyword) -static directory_token_t *find_opt_by_keyword(smartlist_t *s, - directory_keyword keyword); - -#define TS_ANNOTATIONS_OK 1 -#define TS_NOCHECK 2 -#define TS_NO_NEW_ANNOTATIONS 4 -static int tokenize_string(memarea_t *area, - const char *start, const char *end, - smartlist_t *out, - token_rule_t *table, - int flags); -static directory_token_t *get_next_token(memarea_t *area, - const char **s, - const char *eos, - token_rule_t *table); + #define CST_CHECK_AUTHORITY (1<<0) #define CST_NO_CHECK_OBJTYPE (1<<1) static int check_signature_token(const char *digest, @@ -4636,445 +4364,6 @@ assert_addr_policy_ok(smartlist_t *lst) }); }
-/* - * Low-level tokenizer for router descriptors and directories. - */ - -/** Free all resources allocated for <b>tok</b> */ -static void -token_clear(directory_token_t *tok) -{ - if (tok->key) - crypto_pk_free(tok->key); -} - -#define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz) -#define ALLOC(sz) memarea_alloc(area,sz) -#define STRDUP(str) memarea_strdup(area,str) -#define STRNDUP(str,n) memarea_strndup(area,(str),(n)) - -#define RET_ERR(msg) \ - STMT_BEGIN \ - if (tok) token_clear(tok); \ - tok = ALLOC_ZERO(sizeof(directory_token_t)); \ - tok->tp = ERR_; \ - tok->error = STRDUP(msg); \ - goto done_tokenizing; \ - STMT_END - -/** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys - * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>. - * Return <b>tok</b> on success, or a new ERR_ token if the token didn't - * conform to the syntax we wanted. - **/ -static inline directory_token_t * -token_check_object(memarea_t *area, const char *kwd, - directory_token_t *tok, obj_syntax o_syn) -{ - char ebuf[128]; - switch (o_syn) { - case NO_OBJ: - /* No object is allowed for this token. */ - if (tok->object_body) { - tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd); - RET_ERR(ebuf); - } - if (tok->key) { - tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd); - RET_ERR(ebuf); - } - break; - case NEED_OBJ: - /* There must be a (non-key) object. */ - if (!tok->object_body) { - tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd); - RET_ERR(ebuf); - } - break; - case NEED_KEY_1024: /* There must be a 1024-bit public key. */ - case NEED_SKEY_1024: /* There must be a 1024-bit private key. */ - if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) { - tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits", - kwd, crypto_pk_num_bits(tok->key)); - RET_ERR(ebuf); - } - /* fall through */ - case NEED_KEY: /* There must be some kind of key. */ - if (!tok->key) { - tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd); - RET_ERR(ebuf); - } - if (o_syn != NEED_SKEY_1024) { - if (crypto_pk_key_is_private(tok->key)) { - tor_snprintf(ebuf, sizeof(ebuf), - "Private key given for %s, which wants a public key", kwd); - RET_ERR(ebuf); - } - } else { /* o_syn == NEED_SKEY_1024 */ - if (!crypto_pk_key_is_private(tok->key)) { - tor_snprintf(ebuf, sizeof(ebuf), - "Public key given for %s, which wants a private key", kwd); - RET_ERR(ebuf); - } - } - break; - case OBJ_OK: - /* Anything goes with this token. */ - break; - } - - done_tokenizing: - return tok; -} - -/** Helper: parse space-separated arguments from the string <b>s</b> ending at - * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the - * number of parsed elements into the n_args field of <b>tok</b>. Allocate - * all storage in <b>area</b>. Return the number of arguments parsed, or - * return -1 if there was an insanely high number of arguments. */ -static inline int -get_token_arguments(memarea_t *area, directory_token_t *tok, - const char *s, const char *eol) -{ -/** Largest number of arguments we'll accept to any token, ever. */ -#define MAX_ARGS 512 - char *mem = memarea_strndup(area, s, eol-s); - char *cp = mem; - int j = 0; - char *args[MAX_ARGS]; - while (*cp) { - if (j == MAX_ARGS) - return -1; - args[j++] = cp; - cp = (char*)find_whitespace(cp); - if (!cp || !*cp) - break; /* End of the line. */ - *cp++ = '\0'; - cp = (char*)eat_whitespace(cp); - } - tok->n_args = j; - tok->args = memarea_memdup(area, args, j*sizeof(char*)); - return j; -#undef MAX_ARGS -} - -/** Helper function: read the next token from *s, advance *s to the end of the - * token, and return the parsed token. Parse *<b>s</b> according to the list - * of tokens in <b>table</b>. - */ -static directory_token_t * -get_next_token(memarea_t *area, - const char **s, const char *eos, token_rule_t *table) -{ - /** Reject any object at least this big; it is probably an overflow, an - * attack, a bug, or some other nonsense. */ -#define MAX_UNPARSED_OBJECT_SIZE (128*1024) - /** Reject any line at least this big; it is probably an overflow, an - * attack, a bug, or some other nonsense. */ -#define MAX_LINE_LENGTH (128*1024) - - const char *next, *eol, *obstart; - size_t obname_len; - int i; - directory_token_t *tok; - obj_syntax o_syn = NO_OBJ; - char ebuf[128]; - const char *kwd = ""; - - tor_assert(area); - tok = ALLOC_ZERO(sizeof(directory_token_t)); - tok->tp = ERR_; - - /* Set *s to first token, eol to end-of-line, next to after first token */ - *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */ - tor_assert(eos >= *s); - eol = memchr(*s, '\n', eos-*s); - if (!eol) - eol = eos; - if (eol - *s > MAX_LINE_LENGTH) { - RET_ERR("Line far too long"); - } - - next = find_whitespace_eos(*s, eol); - - if (!strcmp_len(*s, "opt", next-*s)) { - /* Skip past an "opt" at the start of the line. */ - *s = eat_whitespace_eos_no_nl(next, eol); - next = find_whitespace_eos(*s, eol); - } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */ - RET_ERR("Unexpected EOF"); - } - - /* Search the table for the appropriate entry. (I tried a binary search - * instead, but it wasn't any faster.) */ - for (i = 0; table[i].t ; ++i) { - if (!strcmp_len(*s, table[i].t, next-*s)) { - /* We've found the keyword. */ - kwd = table[i].t; - tok->tp = table[i].v; - o_syn = table[i].os; - *s = eat_whitespace_eos_no_nl(next, eol); - /* We go ahead whether there are arguments or not, so that tok->args is - * always set if we want arguments. */ - if (table[i].concat_args) { - /* The keyword takes the line as a single argument */ - tok->args = ALLOC(sizeof(char*)); - tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */ - tok->n_args = 1; - } else { - /* This keyword takes multiple arguments. */ - if (get_token_arguments(area, tok, *s, eol)<0) { - tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd); - RET_ERR(ebuf); - } - *s = eol; - } - if (tok->n_args < table[i].min_args) { - tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd); - RET_ERR(ebuf); - } else if (tok->n_args > table[i].max_args) { - tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd); - RET_ERR(ebuf); - } - break; - } - } - - if (tok->tp == ERR_) { - /* No keyword matched; call it an "K_opt" or "A_unrecognized" */ - if (**s == '@') - tok->tp = A_UNKNOWN_; - else - tok->tp = K_OPT; - tok->args = ALLOC(sizeof(char*)); - tok->args[0] = STRNDUP(*s, eol-*s); - tok->n_args = 1; - o_syn = OBJ_OK; - } - - /* Check whether there's an object present */ - *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */ - tor_assert(eos >= *s); - eol = memchr(*s, '\n', eos-*s); - if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */ - goto check_object; - - obstart = *s; /* Set obstart to start of object spec */ - if (*s+16 >= eol || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */ - strcmp_len(eol-5, "-----", 5) || /* nuls or invalid endings */ - (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */ - RET_ERR("Malformed object: bad begin line"); - } - tok->object_type = STRNDUP(*s+11, eol-*s-16); - obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */ - *s = eol+1; /* Set *s to possible start of object data (could be eos) */ - - /* Go to the end of the object */ - next = tor_memstr(*s, eos-*s, "-----END "); - if (!next) { - RET_ERR("Malformed object: missing object end line"); - } - tor_assert(eos >= next); - eol = memchr(next, '\n', eos-next); - if (!eol) /* end-of-line marker, or eos if there's no '\n' */ - eol = eos; - /* Validate the ending tag, which should be 9 + NAME + 5 + eol */ - if ((size_t)(eol-next) != 9+obname_len+5 || - strcmp_len(next+9, tok->object_type, obname_len) || - strcmp_len(eol-5, "-----", 5)) { - tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s", - tok->object_type); - ebuf[sizeof(ebuf)-1] = '\0'; - RET_ERR(ebuf); - } - if (next - *s > MAX_UNPARSED_OBJECT_SIZE) - RET_ERR("Couldn't parse object: missing footer or object much too big."); - - if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */ - tok->key = crypto_pk_new(); - if (crypto_pk_read_public_key_from_string(tok->key, obstart, eol-obstart)) - RET_ERR("Couldn't parse public key."); - } else if (!strcmp(tok->object_type, "RSA PRIVATE KEY")) { /* private key */ - tok->key = crypto_pk_new(); - if (crypto_pk_read_private_key_from_string(tok->key, obstart, eol-obstart)) - RET_ERR("Couldn't parse private key."); - } else { /* If it's something else, try to base64-decode it */ - int r; - tok->object_body = ALLOC(next-*s); /* really, this is too much RAM. */ - r = base64_decode(tok->object_body, next-*s, *s, next-*s); - if (r<0) - RET_ERR("Malformed object: bad base64-encoded data"); - tok->object_size = r; - } - *s = eol; - - check_object: - tok = token_check_object(area, kwd, tok, o_syn); - - done_tokenizing: - return tok; - -#undef RET_ERR -#undef ALLOC -#undef ALLOC_ZERO -#undef STRDUP -#undef STRNDUP -} - -/** Read all tokens from a string between <b>start</b> and <b>end</b>, and add - * them to <b>out</b>. Parse according to the token rules in <b>table</b>. - * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the - * entire string. - */ -static int -tokenize_string(memarea_t *area, - const char *start, const char *end, smartlist_t *out, - token_rule_t *table, int flags) -{ - const char **s; - directory_token_t *tok = NULL; - int counts[NIL_]; - int i; - int first_nonannotation; - int prev_len = smartlist_len(out); - tor_assert(area); - - s = &start; - if (!end) { - end = start+strlen(start); - } else { - /* it's only meaningful to check for nuls if we got an end-of-string ptr */ - if (memchr(start, '\0', end-start)) { - log_warn(LD_DIR, "parse error: internal NUL character."); - return -1; - } - } - for (i = 0; i < NIL_; ++i) - counts[i] = 0; - - SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]); - - while (*s < end && (!tok || tok->tp != EOF_)) { - tok = get_next_token(area, s, end, table); - if (tok->tp == ERR_) { - log_warn(LD_DIR, "parse error: %s", tok->error); - token_clear(tok); - return -1; - } - ++counts[tok->tp]; - smartlist_add(out, tok); - *s = eat_whitespace_eos(*s, end); - } - - if (flags & TS_NOCHECK) - return 0; - - if ((flags & TS_ANNOTATIONS_OK)) { - first_nonannotation = -1; - for (i = 0; i < smartlist_len(out); ++i) { - tok = smartlist_get(out, i); - if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) { - first_nonannotation = i; - break; - } - } - if (first_nonannotation < 0) { - log_warn(LD_DIR, "parse error: item contains only annotations"); - return -1; - } - for (i=first_nonannotation; i < smartlist_len(out); ++i) { - tok = smartlist_get(out, i); - if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { - log_warn(LD_DIR, "parse error: Annotations mixed with keywords"); - return -1; - } - } - if ((flags & TS_NO_NEW_ANNOTATIONS)) { - if (first_nonannotation != prev_len) { - log_warn(LD_DIR, "parse error: Unexpected annotations."); - return -1; - } - } - } else { - for (i=0; i < smartlist_len(out); ++i) { - tok = smartlist_get(out, i); - if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { - log_warn(LD_DIR, "parse error: no annotations allowed."); - return -1; - } - } - first_nonannotation = 0; - } - for (i = 0; table[i].t; ++i) { - if (counts[table[i].v] < table[i].min_cnt) { - log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t); - return -1; - } - if (counts[table[i].v] > table[i].max_cnt) { - log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t); - return -1; - } - if (table[i].pos & AT_START) { - if (smartlist_len(out) < 1 || - (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) { - log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t); - return -1; - } - } - if (table[i].pos & AT_END) { - if (smartlist_len(out) < 1 || - (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) { - log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t); - return -1; - } - } - } - return 0; -} - -/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return - * NULL if no such keyword is found. - */ -static directory_token_t * -find_opt_by_keyword(smartlist_t *s, directory_keyword keyword) -{ - SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t); - return NULL; -} - -/** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail - * with an assert if no such keyword is found. - */ -static directory_token_t * -find_by_keyword_(smartlist_t *s, directory_keyword keyword, - const char *keyword_as_string) -{ - directory_token_t *tok = find_opt_by_keyword(s, keyword); - if (PREDICT_UNLIKELY(!tok)) { - log_err(LD_BUG, "Missing %s [%d] in directory object that should have " - "been validated. Internal error.", keyword_as_string, (int)keyword); - tor_assert(tok); - } - return tok; -} - -/** If there are any directory_token_t entries in <b>s</b> whose keyword is - * <b>k</b>, return a newly allocated smartlist_t containing all such entries, - * in the same order in which they occur in <b>s</b>. Otherwise return - * NULL. */ -static smartlist_t * -find_all_by_keyword(smartlist_t *s, directory_keyword k) -{ - smartlist_t *out = NULL; - SMARTLIST_FOREACH(s, directory_token_t *, t, - if (t->tp == k) { - if (!out) - out = smartlist_new(); - smartlist_add(out, t); - }); - return out; -} - /** Return a newly allocated smartlist of all accept or reject tokens in * <b>s</b>. */