commit 8a83c4b61397a5923257746f5b06418f8aef9959 Merge: aebc98d58 ad10cafd9 Author: Mike Perry mikeperry-git@torproject.org Date: Tue Sep 18 00:17:14 2018 +0000
Merge branch 'bug23512-v4-033' into bug23512-v4-master
changes/bug23512 | 6 +++ src/core/or/channeltls.h | 2 + src/core/or/circuitlist.c | 59 +++++++++++++++++++++++ src/core/or/circuitlist.h | 2 + src/core/or/or.h | 12 +++++ src/core/or/relay.c | 1 + src/core/or/scheduler_kist.c | 2 - src/feature/stats/rephist.c | 21 ++++---- src/feature/stats/rephist.h | 10 ++-- src/test/test_relay.c | 111 +++++++++++++++++++++++++++++++++++++++++-- 10 files changed, 207 insertions(+), 19 deletions(-)
diff --cc src/core/or/channeltls.h index 4a10d51d9,000000000..12715450b mode 100644,000000..100644 --- a/src/core/or/channeltls.h +++ b/src/core/or/channeltls.h @@@ -1,77 -1,0 +1,79 @@@ +/* * Copyright (c) 2012-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file channeltls.h + * \brief Header file for channeltls.c + **/ + +#ifndef TOR_CHANNELTLS_H +#define TOR_CHANNELTLS_H + +#include "core/or/or.h" +#include "core/or/channel.h" + +struct ed25519_public_key_t; +struct curve25519_public_key_t; + ++#define TLS_PER_CELL_OVERHEAD 29 ++ +#define BASE_CHAN_TO_TLS(c) (channel_tls_from_base((c))) +#define TLS_CHAN_TO_BASE(c) (channel_tls_to_base((c))) + +#define TLS_CHAN_MAGIC 0x8a192427U + +#ifdef TOR_CHANNEL_INTERNAL_ + +struct channel_tls_s { + /* Base channel_t struct */ + channel_t base_; + /* or_connection_t pointer */ + or_connection_t *conn; +}; + +#endif /* defined(TOR_CHANNEL_INTERNAL_) */ + +channel_t * channel_tls_connect(const tor_addr_t *addr, uint16_t port, + const char *id_digest, + const struct ed25519_public_key_t *ed_id); +channel_listener_t * channel_tls_get_listener(void); +channel_listener_t * channel_tls_start_listener(void); +channel_t * channel_tls_handle_incoming(or_connection_t *orconn); + +/* Casts */ + +channel_t * channel_tls_to_base(channel_tls_t *tlschan); +channel_tls_t * channel_tls_from_base(channel_t *chan); + +/* Things for connection_or.c to call back into */ +void channel_tls_handle_cell(cell_t *cell, or_connection_t *conn); +void channel_tls_handle_state_change_on_orconn(channel_tls_t *chan, + or_connection_t *conn, + uint8_t old_state, + uint8_t state); +void channel_tls_handle_var_cell(var_cell_t *var_cell, + or_connection_t *conn); +void channel_tls_update_marks(or_connection_t *conn); + +/* Cleanup at shutdown */ +void channel_tls_free_all(void); + +extern uint64_t stats_n_authorize_cells_processed; +extern uint64_t stats_n_authenticate_cells_processed; +extern uint64_t stats_n_versions_cells_processed; +extern uint64_t stats_n_netinfo_cells_processed; +extern uint64_t stats_n_vpadding_cells_processed; +extern uint64_t stats_n_certs_cells_processed; +extern uint64_t stats_n_auth_challenge_cells_processed; + +#ifdef CHANNELTLS_PRIVATE +STATIC void channel_tls_process_certs_cell(var_cell_t *cell, + channel_tls_t *tlschan); +STATIC void channel_tls_process_auth_challenge_cell(var_cell_t *cell, + channel_tls_t *tlschan); +STATIC void channel_tls_common_init(channel_tls_t *tlschan); +STATIC void channel_tls_process_authenticate_cell(var_cell_t *cell, + channel_tls_t *tlschan); +#endif /* defined(CHANNELTLS_PRIVATE) */ + +#endif /* !defined(TOR_CHANNELTLS_H) */ diff --cc src/core/or/circuitlist.c index 78ecadab7,000000000..7cf5e3445 mode 100644,000000..100644 --- a/src/core/or/circuitlist.c +++ b/src/core/or/circuitlist.c @@@ -1,2751 -1,0 +1,2810 @@@ +/* Copyright 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file circuitlist.c + * + * \brief Manage global structures that list and index circuits, and + * look up circuits within them. + * + * One of the most frequent operations in Tor occurs every time that + * a relay cell arrives on a channel. When that happens, we need to + * find which circuit it is associated with, based on the channel and the + * circuit ID in the relay cell. + * + * To handle that, we maintain a global list of circuits, and a hashtable + * mapping [channel,circID] pairs to circuits. Circuits are added to and + * removed from this mapping using circuit_set_p_circid_chan() and + * circuit_set_n_circid_chan(). To look up a circuit from this map, most + * callers should use circuit_get_by_circid_channel(), though + * circuit_get_by_circid_channel_even_if_marked() is appropriate under some + * circumstances. + * + * We also need to allow for the possibility that we have blocked use of a + * circuit ID (because we are waiting to send a DESTROY cell), but the + * circuit is not there any more. For that case, we allow placeholder + * entries in the table, using channel_mark_circid_unusable(). + * + * To efficiently handle a channel that has just opened, we also maintain a + * list of the circuits waiting for channels, so we can attach them as + * needed without iterating through the whole list of circuits, using + * circuit_get_all_pending_on_channel(). + * + * In this module, we also handle the list of circuits that have been + * marked for close elsewhere, and close them as needed. (We use this + * "mark now, close later" pattern here and elsewhere to avoid + * unpredictable recursion if we closed every circuit immediately upon + * realizing it needed to close.) See circuit_mark_for_close() for the + * mark function, and circuit_close_all_marked() for the close function. + * + * For hidden services, we need to be able to look up introduction point + * circuits and rendezvous circuits by cookie, key, etc. These are + * currently handled with linear searches in + * circuit_get_ready_rend_circuit_by_rend_data(), + * circuit_get_next_by_pk_and_purpose(), and with hash lookups in + * circuit_get_rendezvous() and circuit_get_intro_point(). + * + * This module is also the entry point for our out-of-memory handler + * logic, which was originally circuit-focused. + **/ +#define CIRCUITLIST_PRIVATE +#include "lib/cc/torint.h" /* TOR_PRIuSZ */ + +#include "core/or/or.h" +#include "core/or/channel.h" ++#include "core/or/channeltls.h" +#include "feature/client/circpathbias.h" +#include "core/or/circuitbuild.h" +#include "core/or/circuitlist.h" +#include "core/or/circuituse.h" +#include "core/or/circuitstats.h" +#include "core/mainloop/connection.h" +#include "app/config/config.h" +#include "core/or/connection_edge.h" +#include "core/or/connection_or.h" +#include "feature/control/control.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/crypt_ops/crypto_util.h" +#include "lib/crypt_ops/crypto_dh.h" +#include "feature/dircache/directory.h" +#include "feature/client/entrynodes.h" +#include "core/mainloop/main.h" +#include "feature/hs/hs_circuit.h" +#include "feature/hs/hs_circuitmap.h" +#include "feature/hs/hs_ident.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/nodelist.h" +#include "core/crypto/onion.h" +#include "core/crypto/onion_fast.h" +#include "core/or/policies.h" +#include "core/or/relay.h" +#include "core/crypto/relay_crypto.h" +#include "feature/rend/rendclient.h" +#include "feature/rend/rendcommon.h" +#include "feature/stats/rephist.h" +#include "feature/nodelist/routerlist.h" +#include "feature/nodelist/routerset.h" +#include "core/or/channelpadding.h" +#include "lib/compress/compress.h" +#include "lib/compress/compress_lzma.h" +#include "lib/compress/compress_zlib.h" +#include "lib/compress/compress_zstd.h" +#include "lib/container/buffers.h" + +#include "ht.h" + +#include "core/or/cpath_build_state_st.h" +#include "core/or/crypt_path_reference_st.h" +#include "feature/dircommon/dir_connection_st.h" +#include "core/or/edge_connection_st.h" +#include "core/or/half_edge_st.h" +#include "core/or/extend_info_st.h" +#include "core/or/or_circuit_st.h" +#include "core/or/origin_circuit_st.h" + +/********* START VARIABLES **********/ + +/** A global list of all circuits at this hop. */ +static smartlist_t *global_circuitlist = NULL; + +/** A global list of all origin circuits. Every element of this is also + * an element of global_circuitlist. */ +static smartlist_t *global_origin_circuit_list = NULL; + +/** A list of all the circuits in CIRCUIT_STATE_CHAN_WAIT. */ +static smartlist_t *circuits_pending_chans = NULL; + +/** List of all the (origin) circuits whose state is + * CIRCUIT_STATE_GUARD_WAIT. */ +static smartlist_t *circuits_pending_other_guards = NULL; + +/** A list of all the circuits that have been marked with + * circuit_mark_for_close and which are waiting for circuit_about_to_free. */ +static smartlist_t *circuits_pending_close = NULL; + +static void circuit_free_cpath_node(crypt_path_t *victim); +static void cpath_ref_decref(crypt_path_reference_t *cpath_ref); +static void circuit_about_to_free_atexit(circuit_t *circ); +static void circuit_about_to_free(circuit_t *circ); + +/** + * A cached value of the current state of the origin circuit list. Has the + * value 1 if we saw any opened circuits recently (since the last call to + * circuit_any_opened_circuits(), which gets called around once a second by + * circuit_expire_building). 0 otherwise. + */ +static int any_opened_circs_cached_val = 0; + +/********* END VARIABLES ************/ + +or_circuit_t * +TO_OR_CIRCUIT(circuit_t *x) +{ + tor_assert(x->magic == OR_CIRCUIT_MAGIC); + return DOWNCAST(or_circuit_t, x); +} +const or_circuit_t * +CONST_TO_OR_CIRCUIT(const circuit_t *x) +{ + tor_assert(x->magic == OR_CIRCUIT_MAGIC); + return DOWNCAST(or_circuit_t, x); +} +origin_circuit_t * +TO_ORIGIN_CIRCUIT(circuit_t *x) +{ + tor_assert(x->magic == ORIGIN_CIRCUIT_MAGIC); + return DOWNCAST(origin_circuit_t, x); +} +const origin_circuit_t * +CONST_TO_ORIGIN_CIRCUIT(const circuit_t *x) +{ + tor_assert(x->magic == ORIGIN_CIRCUIT_MAGIC); + return DOWNCAST(origin_circuit_t, x); +} + +/** A map from channel and circuit ID to circuit. (Lookup performance is + * very important here, since we need to do it every time a cell arrives.) */ +typedef struct chan_circid_circuit_map_t { + HT_ENTRY(chan_circid_circuit_map_t) node; + channel_t *chan; + circid_t circ_id; + circuit_t *circuit; + /* For debugging 12184: when was this placeholder item added? */ + time_t made_placeholder_at; +} chan_circid_circuit_map_t; + +/** Helper for hash tables: compare the channel and circuit ID for a and + * b, and return less than, equal to, or greater than zero appropriately. + */ +static inline int +chan_circid_entries_eq_(chan_circid_circuit_map_t *a, + chan_circid_circuit_map_t *b) +{ + return a->chan == b->chan && a->circ_id == b->circ_id; +} + +/** Helper: return a hash based on circuit ID and the pointer value of + * chan in <b>a</b>. */ +static inline unsigned int +chan_circid_entry_hash_(chan_circid_circuit_map_t *a) +{ + /* Try to squeze the siphash input into 8 bytes to save any extra siphash + * rounds. This hash function is in the critical path. */ + uintptr_t chan = (uintptr_t) (void*) a->chan; + uint32_t array[2]; + array[0] = a->circ_id; + /* The low bits of the channel pointer are uninteresting, since the channel + * is a pretty big structure. */ + array[1] = (uint32_t) (chan >> 6); + return (unsigned) siphash24g(array, sizeof(array)); +} + +/** Map from [chan,circid] to circuit. */ +static HT_HEAD(chan_circid_map, chan_circid_circuit_map_t) + chan_circid_map = HT_INITIALIZER(); +HT_PROTOTYPE(chan_circid_map, chan_circid_circuit_map_t, node, + chan_circid_entry_hash_, chan_circid_entries_eq_) +HT_GENERATE2(chan_circid_map, chan_circid_circuit_map_t, node, + chan_circid_entry_hash_, chan_circid_entries_eq_, 0.6, + tor_reallocarray_, tor_free_) + +/** The most recently returned entry from circuit_get_by_circid_chan; + * used to improve performance when many cells arrive in a row from the + * same circuit. + */ +static chan_circid_circuit_map_t *_last_circid_chan_ent = NULL; + +/** Implementation helper for circuit_set_{p,n}_circid_channel: A circuit ID + * and/or channel for circ has just changed from <b>old_chan, old_id</b> + * to <b>chan, id</b>. Adjust the chan,circid map as appropriate, removing + * the old entry (if any) and adding a new one. */ +static void +circuit_set_circid_chan_helper(circuit_t *circ, int direction, + circid_t id, + channel_t *chan) +{ + chan_circid_circuit_map_t search; + chan_circid_circuit_map_t *found; + channel_t *old_chan, **chan_ptr; + circid_t old_id, *circid_ptr; + int make_active, attached = 0; + + if (direction == CELL_DIRECTION_OUT) { + chan_ptr = &circ->n_chan; + circid_ptr = &circ->n_circ_id; + make_active = circ->n_chan_cells.n > 0; + } else { + or_circuit_t *c = TO_OR_CIRCUIT(circ); + chan_ptr = &c->p_chan; + circid_ptr = &c->p_circ_id; + make_active = c->p_chan_cells.n > 0; + } + old_chan = *chan_ptr; + old_id = *circid_ptr; + + if (id == old_id && chan == old_chan) + return; + + if (_last_circid_chan_ent && + ((old_id == _last_circid_chan_ent->circ_id && + old_chan == _last_circid_chan_ent->chan) || + (id == _last_circid_chan_ent->circ_id && + chan == _last_circid_chan_ent->chan))) { + _last_circid_chan_ent = NULL; + } + + if (old_chan) { + /* + * If we're changing channels or ID and had an old channel and a non + * zero old ID and weren't marked for close (i.e., we should have been + * attached), detach the circuit. ID changes require this because + * circuitmux hashes on (channel_id, circuit_id). + */ + if (old_id != 0 && (old_chan != chan || old_id != id) && + !(circ->marked_for_close)) { + tor_assert(old_chan->cmux); + circuitmux_detach_circuit(old_chan->cmux, circ); + } + + /* we may need to remove it from the conn-circid map */ + search.circ_id = old_id; + search.chan = old_chan; + found = HT_REMOVE(chan_circid_map, &chan_circid_map, &search); + if (found) { + tor_free(found); + if (direction == CELL_DIRECTION_OUT) { + /* One fewer circuits use old_chan as n_chan */ + --(old_chan->num_n_circuits); + } else { + /* One fewer circuits use old_chan as p_chan */ + --(old_chan->num_p_circuits); + } + } + } + + /* Change the values only after we have possibly made the circuit inactive + * on the previous chan. */ + *chan_ptr = chan; + *circid_ptr = id; + + if (chan == NULL) + return; + + /* now add the new one to the conn-circid map */ + search.circ_id = id; + search.chan = chan; + found = HT_FIND(chan_circid_map, &chan_circid_map, &search); + if (found) { + found->circuit = circ; + found->made_placeholder_at = 0; + } else { + found = tor_malloc_zero(sizeof(chan_circid_circuit_map_t)); + found->circ_id = id; + found->chan = chan; + found->circuit = circ; + HT_INSERT(chan_circid_map, &chan_circid_map, found); + } + + /* + * Attach to the circuitmux if we're changing channels or IDs and + * have a new channel and ID to use and the circuit is not marked for + * close. + */ + if (chan && id != 0 && (old_chan != chan || old_id != id) && + !(circ->marked_for_close)) { + tor_assert(chan->cmux); + circuitmux_attach_circuit(chan->cmux, circ, direction); + attached = 1; + } + + /* + * This is a no-op if we have no cells, but if we do it marks us active to + * the circuitmux + */ + if (make_active && attached) + update_circuit_on_cmux(circ, direction); + + /* Adjust circuit counts on new channel */ + if (direction == CELL_DIRECTION_OUT) { + ++chan->num_n_circuits; + } else { + ++chan->num_p_circuits; + } +} + +/** Mark that circuit id <b>id</b> shouldn't be used on channel <b>chan</b>, + * even if there is no circuit on the channel. We use this to keep the + * circuit id from getting re-used while we have queued but not yet sent + * a destroy cell. */ +void +channel_mark_circid_unusable(channel_t *chan, circid_t id) +{ + chan_circid_circuit_map_t search; + chan_circid_circuit_map_t *ent; + + /* See if there's an entry there. That wouldn't be good. */ + memset(&search, 0, sizeof(search)); + search.chan = chan; + search.circ_id = id; + ent = HT_FIND(chan_circid_map, &chan_circid_map, &search); + + if (ent && ent->circuit) { + /* we have a problem. */ + log_warn(LD_BUG, "Tried to mark %u unusable on %p, but there was already " + "a circuit there.", (unsigned)id, chan); + } else if (ent) { + /* It's already marked. */ + if (!ent->made_placeholder_at) + ent->made_placeholder_at = approx_time(); + } else { + ent = tor_malloc_zero(sizeof(chan_circid_circuit_map_t)); + ent->chan = chan; + ent->circ_id = id; + /* leave circuit at NULL. */ + ent->made_placeholder_at = approx_time(); + HT_INSERT(chan_circid_map, &chan_circid_map, ent); + } +} + +/** Mark that a circuit id <b>id</b> can be used again on <b>chan</b>. + * We use this to re-enable the circuit ID after we've sent a destroy cell. + */ +void +channel_mark_circid_usable(channel_t *chan, circid_t id) +{ + chan_circid_circuit_map_t search; + chan_circid_circuit_map_t *ent; + + /* See if there's an entry there. That wouldn't be good. */ + memset(&search, 0, sizeof(search)); + search.chan = chan; + search.circ_id = id; + ent = HT_REMOVE(chan_circid_map, &chan_circid_map, &search); + if (ent && ent->circuit) { + log_warn(LD_BUG, "Tried to mark %u usable on %p, but there was already " + "a circuit there.", (unsigned)id, chan); + return; + } + if (_last_circid_chan_ent == ent) + _last_circid_chan_ent = NULL; + tor_free(ent); +} + +/** Called to indicate that a DESTROY is pending on <b>chan</b> with + * circuit ID <b>id</b>, but hasn't been sent yet. */ +void +channel_note_destroy_pending(channel_t *chan, circid_t id) +{ + circuit_t *circ = circuit_get_by_circid_channel_even_if_marked(id,chan); + if (circ) { + if (circ->n_chan == chan && circ->n_circ_id == id) { + circ->n_delete_pending = 1; + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + if (orcirc->p_chan == chan && orcirc->p_circ_id == id) { + circ->p_delete_pending = 1; + } + } + return; + } + channel_mark_circid_unusable(chan, id); +} + +/** Called to indicate that a DESTROY is no longer pending on <b>chan</b> with + * circuit ID <b>id</b> -- typically, because it has been sent. */ +MOCK_IMPL(void, +channel_note_destroy_not_pending,(channel_t *chan, circid_t id)) +{ + circuit_t *circ = circuit_get_by_circid_channel_even_if_marked(id,chan); + if (circ) { + if (circ->n_chan == chan && circ->n_circ_id == id) { + circ->n_delete_pending = 0; + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + if (orcirc->p_chan == chan && orcirc->p_circ_id == id) { + circ->p_delete_pending = 0; + } + } + /* XXXX this shouldn't happen; log a bug here. */ + return; + } + channel_mark_circid_usable(chan, id); +} + +/** Set the p_conn field of a circuit <b>circ</b>, along + * with the corresponding circuit ID, and add the circuit as appropriate + * to the (chan,id)->circuit map. */ +void +circuit_set_p_circid_chan(or_circuit_t *or_circ, circid_t id, + channel_t *chan) +{ + circuit_t *circ = TO_CIRCUIT(or_circ); + channel_t *old_chan = or_circ->p_chan; + circid_t old_id = or_circ->p_circ_id; + + circuit_set_circid_chan_helper(circ, CELL_DIRECTION_IN, id, chan); + + if (chan) { + chan->timestamp_last_had_circuits = approx_time(); + } + + if (circ->p_delete_pending && old_chan) { + channel_mark_circid_unusable(old_chan, old_id); + circ->p_delete_pending = 0; + } +} + +/** Set the n_conn field of a circuit <b>circ</b>, along + * with the corresponding circuit ID, and add the circuit as appropriate + * to the (chan,id)->circuit map. */ +void +circuit_set_n_circid_chan(circuit_t *circ, circid_t id, + channel_t *chan) +{ + channel_t *old_chan = circ->n_chan; + circid_t old_id = circ->n_circ_id; + + circuit_set_circid_chan_helper(circ, CELL_DIRECTION_OUT, id, chan); + + if (chan) { + chan->timestamp_last_had_circuits = approx_time(); + } + + if (circ->n_delete_pending && old_chan) { + channel_mark_circid_unusable(old_chan, old_id); + circ->n_delete_pending = 0; + } +} + +/** Change the state of <b>circ</b> to <b>state</b>, adding it to or removing + * it from lists as appropriate. */ +void +circuit_set_state(circuit_t *circ, uint8_t state) +{ + tor_assert(circ); + if (state == circ->state) + return; + if (PREDICT_UNLIKELY(!circuits_pending_chans)) + circuits_pending_chans = smartlist_new(); + if (PREDICT_UNLIKELY(!circuits_pending_other_guards)) + circuits_pending_other_guards = smartlist_new(); + if (circ->state == CIRCUIT_STATE_CHAN_WAIT) { + /* remove from waiting-circuit list. */ + smartlist_remove(circuits_pending_chans, circ); + } + if (state == CIRCUIT_STATE_CHAN_WAIT) { + /* add to waiting-circuit list. */ + smartlist_add(circuits_pending_chans, circ); + } + if (circ->state == CIRCUIT_STATE_GUARD_WAIT) { + smartlist_remove(circuits_pending_other_guards, circ); + } + if (state == CIRCUIT_STATE_GUARD_WAIT) { + smartlist_add(circuits_pending_other_guards, circ); + } + if (state == CIRCUIT_STATE_GUARD_WAIT || state == CIRCUIT_STATE_OPEN) + tor_assert(!circ->n_chan_create_cell); + circ->state = state; +} + +/** Append to <b>out</b> all circuits in state CHAN_WAIT waiting for + * the given connection. */ +void +circuit_get_all_pending_on_channel(smartlist_t *out, channel_t *chan) +{ + tor_assert(out); + tor_assert(chan); + + if (!circuits_pending_chans) + return; + + SMARTLIST_FOREACH_BEGIN(circuits_pending_chans, circuit_t *, circ) { + if (circ->marked_for_close) + continue; + if (!circ->n_hop) + continue; + tor_assert(circ->state == CIRCUIT_STATE_CHAN_WAIT); + if (tor_digest_is_zero(circ->n_hop->identity_digest)) { + /* Look at addr/port. This is an unkeyed connection. */ + if (!channel_matches_extend_info(chan, circ->n_hop)) + continue; + } else { + /* We expected a key. See if it's the right one. */ + if (tor_memneq(chan->identity_digest, + circ->n_hop->identity_digest, DIGEST_LEN)) + continue; + } + smartlist_add(out, circ); + } SMARTLIST_FOREACH_END(circ); +} + +/** Return the number of circuits in state CHAN_WAIT, waiting for the given + * channel. */ +int +circuit_count_pending_on_channel(channel_t *chan) +{ + int cnt; + smartlist_t *sl = smartlist_new(); + + tor_assert(chan); + + circuit_get_all_pending_on_channel(sl, chan); + cnt = smartlist_len(sl); + smartlist_free(sl); + log_debug(LD_CIRC,"or_conn to %s, %d pending circs", + channel_get_canonical_remote_descr(chan), + cnt); + return cnt; +} + +/** Remove <b>origin_circ</b> from the global list of origin circuits. + * Called when we are freeing a circuit. + */ +static void +circuit_remove_from_origin_circuit_list(origin_circuit_t *origin_circ) +{ + int origin_idx = origin_circ->global_origin_circuit_list_idx; + if (origin_idx < 0) + return; + origin_circuit_t *c2; + tor_assert(origin_idx <= smartlist_len(global_origin_circuit_list)); + c2 = smartlist_get(global_origin_circuit_list, origin_idx); + tor_assert(origin_circ == c2); + smartlist_del(global_origin_circuit_list, origin_idx); + if (origin_idx < smartlist_len(global_origin_circuit_list)) { + origin_circuit_t *replacement = + smartlist_get(global_origin_circuit_list, origin_idx); + replacement->global_origin_circuit_list_idx = origin_idx; + } + origin_circ->global_origin_circuit_list_idx = -1; +} + +/** Add <b>origin_circ</b> to the global list of origin circuits. Called + * when creating the circuit. */ +static void +circuit_add_to_origin_circuit_list(origin_circuit_t *origin_circ) +{ + tor_assert(origin_circ->global_origin_circuit_list_idx == -1); + smartlist_t *lst = circuit_get_global_origin_circuit_list(); + smartlist_add(lst, origin_circ); + origin_circ->global_origin_circuit_list_idx = smartlist_len(lst) - 1; +} + +/** Detach from the global circuit list, and deallocate, all + * circuits that have been marked for close. + */ +void +circuit_close_all_marked(void) +{ + if (circuits_pending_close == NULL) + return; + + smartlist_t *lst = circuit_get_global_list(); + SMARTLIST_FOREACH_BEGIN(circuits_pending_close, circuit_t *, circ) { + tor_assert(circ->marked_for_close); + + /* Remove it from the circuit list. */ + int idx = circ->global_circuitlist_idx; + smartlist_del(lst, idx); + if (idx < smartlist_len(lst)) { + circuit_t *replacement = smartlist_get(lst, idx); + replacement->global_circuitlist_idx = idx; + } + circ->global_circuitlist_idx = -1; + + /* Remove it from the origin circuit list, if appropriate. */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_remove_from_origin_circuit_list(TO_ORIGIN_CIRCUIT(circ)); + } + + circuit_about_to_free(circ); + circuit_free(circ); + } SMARTLIST_FOREACH_END(circ); + + smartlist_clear(circuits_pending_close); +} + +/** Return a pointer to the global list of circuits. */ +MOCK_IMPL(smartlist_t *, +circuit_get_global_list,(void)) +{ + if (NULL == global_circuitlist) + global_circuitlist = smartlist_new(); + return global_circuitlist; +} + +/** Return a pointer to the global list of origin circuits. */ +smartlist_t * +circuit_get_global_origin_circuit_list(void) +{ + if (NULL == global_origin_circuit_list) + global_origin_circuit_list = smartlist_new(); + return global_origin_circuit_list; +} + +/** + * Return true if we have any opened general-purpose 3 hop + * origin circuits. + * + * The result from this function is cached for use by + * circuit_any_opened_circuits_cached(). + */ +int +circuit_any_opened_circuits(void) +{ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_origin_circuit_list(), + const origin_circuit_t *, next_circ) { + if (!TO_CIRCUIT(next_circ)->marked_for_close && + next_circ->has_opened && + TO_CIRCUIT(next_circ)->state == CIRCUIT_STATE_OPEN && + TO_CIRCUIT(next_circ)->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT && + next_circ->build_state && + next_circ->build_state->desired_path_len == DEFAULT_ROUTE_LEN) { + circuit_cache_opened_circuit_state(1); + return 1; + } + } SMARTLIST_FOREACH_END(next_circ); + + circuit_cache_opened_circuit_state(0); + return 0; +} + +/** + * Cache the "any circuits opened" state, as specified in param + * circuits_are_opened. This is a helper function to update + * the circuit opened status whenever we happen to look at the + * circuit list. + */ +void +circuit_cache_opened_circuit_state(int circuits_are_opened) +{ + any_opened_circs_cached_val = circuits_are_opened; +} + +/** + * Return true if there were any opened circuits since the last call to + * circuit_any_opened_circuits(), or since circuit_expire_building() last + * ran (it runs roughly once per second). + */ +int +circuit_any_opened_circuits_cached(void) +{ + return any_opened_circs_cached_val; +} + +/** Function to make circ->state human-readable */ +const char * +circuit_state_to_string(int state) +{ + static char buf[64]; + switch (state) { + case CIRCUIT_STATE_BUILDING: return "doing handshakes"; + case CIRCUIT_STATE_ONIONSKIN_PENDING: return "processing the onion"; + case CIRCUIT_STATE_CHAN_WAIT: return "connecting to server"; + case CIRCUIT_STATE_GUARD_WAIT: return "waiting to see how other " + "guards perform"; + case CIRCUIT_STATE_OPEN: return "open"; + default: + log_warn(LD_BUG, "Unknown circuit state %d", state); + tor_snprintf(buf, sizeof(buf), "unknown state [%d]", state); + return buf; + } +} + +/** Map a circuit purpose to a string suitable to be displayed to a + * controller. */ +const char * +circuit_purpose_to_controller_string(uint8_t purpose) +{ + static char buf[32]; + switch (purpose) { + case CIRCUIT_PURPOSE_OR: + case CIRCUIT_PURPOSE_INTRO_POINT: + case CIRCUIT_PURPOSE_REND_POINT_WAITING: + case CIRCUIT_PURPOSE_REND_ESTABLISHED: + return "SERVER"; /* A controller should never see these, actually. */ + + case CIRCUIT_PURPOSE_C_GENERAL: + return "GENERAL"; + + case CIRCUIT_PURPOSE_C_HSDIR_GET: + return "HS_CLIENT_HSDIR"; + + case CIRCUIT_PURPOSE_C_INTRODUCING: + case CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT: + case CIRCUIT_PURPOSE_C_INTRODUCE_ACKED: + return "HS_CLIENT_INTRO"; + + case CIRCUIT_PURPOSE_C_ESTABLISH_REND: + case CIRCUIT_PURPOSE_C_REND_READY: + case CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED: + case CIRCUIT_PURPOSE_C_REND_JOINED: + return "HS_CLIENT_REND"; + + case CIRCUIT_PURPOSE_S_HSDIR_POST: + return "HS_SERVICE_HSDIR"; + + case CIRCUIT_PURPOSE_S_ESTABLISH_INTRO: + case CIRCUIT_PURPOSE_S_INTRO: + return "HS_SERVICE_INTRO"; + + case CIRCUIT_PURPOSE_S_CONNECT_REND: + case CIRCUIT_PURPOSE_S_REND_JOINED: + return "HS_SERVICE_REND"; + + case CIRCUIT_PURPOSE_TESTING: + return "TESTING"; + case CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT: + return "MEASURE_TIMEOUT"; + case CIRCUIT_PURPOSE_CONTROLLER: + return "CONTROLLER"; + case CIRCUIT_PURPOSE_PATH_BIAS_TESTING: + return "PATH_BIAS_TESTING"; + case CIRCUIT_PURPOSE_HS_VANGUARDS: + return "HS_VANGUARDS"; + + default: + tor_snprintf(buf, sizeof(buf), "UNKNOWN_%d", (int)purpose); + return buf; + } +} + +/** Return a string specifying the state of the hidden-service circuit + * purpose <b>purpose</b>, or NULL if <b>purpose</b> is not a + * hidden-service-related circuit purpose. */ +const char * +circuit_purpose_to_controller_hs_state_string(uint8_t purpose) +{ + switch (purpose) + { + default: + log_fn(LOG_WARN, LD_BUG, + "Unrecognized circuit purpose: %d", + (int)purpose); + tor_fragile_assert(); + /* fall through */ + + case CIRCUIT_PURPOSE_OR: + case CIRCUIT_PURPOSE_C_GENERAL: + case CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT: + case CIRCUIT_PURPOSE_TESTING: + case CIRCUIT_PURPOSE_CONTROLLER: + case CIRCUIT_PURPOSE_PATH_BIAS_TESTING: + case CIRCUIT_PURPOSE_HS_VANGUARDS: + return NULL; + + case CIRCUIT_PURPOSE_INTRO_POINT: + return "OR_HSSI_ESTABLISHED"; + case CIRCUIT_PURPOSE_REND_POINT_WAITING: + return "OR_HSCR_ESTABLISHED"; + case CIRCUIT_PURPOSE_REND_ESTABLISHED: + return "OR_HS_R_JOINED"; + + case CIRCUIT_PURPOSE_C_HSDIR_GET: + case CIRCUIT_PURPOSE_C_INTRODUCING: + return "HSCI_CONNECTING"; + case CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT: + return "HSCI_INTRO_SENT"; + case CIRCUIT_PURPOSE_C_INTRODUCE_ACKED: + return "HSCI_DONE"; + + case CIRCUIT_PURPOSE_C_ESTABLISH_REND: + return "HSCR_CONNECTING"; + case CIRCUIT_PURPOSE_C_REND_READY: + return "HSCR_ESTABLISHED_IDLE"; + case CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED: + return "HSCR_ESTABLISHED_WAITING"; + case CIRCUIT_PURPOSE_C_REND_JOINED: + return "HSCR_JOINED"; + + case CIRCUIT_PURPOSE_S_HSDIR_POST: + case CIRCUIT_PURPOSE_S_ESTABLISH_INTRO: + return "HSSI_CONNECTING"; + case CIRCUIT_PURPOSE_S_INTRO: + return "HSSI_ESTABLISHED"; + + case CIRCUIT_PURPOSE_S_CONNECT_REND: + return "HSSR_CONNECTING"; + case CIRCUIT_PURPOSE_S_REND_JOINED: + return "HSSR_JOINED"; + } +} + +/** Return a human-readable string for the circuit purpose <b>purpose</b>. */ +const char * +circuit_purpose_to_string(uint8_t purpose) +{ + static char buf[32]; + + switch (purpose) + { + case CIRCUIT_PURPOSE_OR: + return "Circuit at relay"; + case CIRCUIT_PURPOSE_INTRO_POINT: + return "Acting as intro point"; + case CIRCUIT_PURPOSE_REND_POINT_WAITING: + return "Acting as rendezvous (pending)"; + case CIRCUIT_PURPOSE_REND_ESTABLISHED: + return "Acting as rendezvous (established)"; + case CIRCUIT_PURPOSE_C_GENERAL: + return "General-purpose client"; + case CIRCUIT_PURPOSE_C_INTRODUCING: + return "Hidden service client: Connecting to intro point"; + case CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT: + return "Hidden service client: Waiting for ack from intro point"; + case CIRCUIT_PURPOSE_C_INTRODUCE_ACKED: + return "Hidden service client: Received ack from intro point"; + case CIRCUIT_PURPOSE_C_ESTABLISH_REND: + return "Hidden service client: Establishing rendezvous point"; + case CIRCUIT_PURPOSE_C_REND_READY: + return "Hidden service client: Pending rendezvous point"; + case CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED: + return "Hidden service client: Pending rendezvous point (ack received)"; + case CIRCUIT_PURPOSE_C_REND_JOINED: + return "Hidden service client: Active rendezvous point"; + case CIRCUIT_PURPOSE_C_HSDIR_GET: + return "Hidden service client: Fetching HS descriptor"; + + case CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT: + return "Measuring circuit timeout"; + + case CIRCUIT_PURPOSE_S_ESTABLISH_INTRO: + return "Hidden service: Establishing introduction point"; + case CIRCUIT_PURPOSE_S_INTRO: + return "Hidden service: Introduction point"; + case CIRCUIT_PURPOSE_S_CONNECT_REND: + return "Hidden service: Connecting to rendezvous point"; + case CIRCUIT_PURPOSE_S_REND_JOINED: + return "Hidden service: Active rendezvous point"; + case CIRCUIT_PURPOSE_S_HSDIR_POST: + return "Hidden service: Uploading HS descriptor"; + + case CIRCUIT_PURPOSE_TESTING: + return "Testing circuit"; + + case CIRCUIT_PURPOSE_CONTROLLER: + return "Circuit made by controller"; + + case CIRCUIT_PURPOSE_PATH_BIAS_TESTING: + return "Path-bias testing circuit"; + + case CIRCUIT_PURPOSE_HS_VANGUARDS: + return "Hidden service: Pre-built vanguard circuit"; + + default: + tor_snprintf(buf, sizeof(buf), "UNKNOWN_%d", (int)purpose); + return buf; + } +} + +/** Pick a reasonable package_window to start out for our circuits. + * Originally this was hard-coded at 1000, but now the consensus votes + * on the answer. See proposal 168. */ +int32_t +circuit_initial_package_window(void) +{ + int32_t num = networkstatus_get_param(NULL, "circwindow", CIRCWINDOW_START, + CIRCWINDOW_START_MIN, + CIRCWINDOW_START_MAX); + /* If the consensus tells us a negative number, we'd assert. */ + if (num < 0) + num = CIRCWINDOW_START; + return num; +} + +/** Initialize the common elements in a circuit_t, and add it to the global + * list. */ +static void +init_circuit_base(circuit_t *circ) +{ + tor_gettimeofday(&circ->timestamp_created); + + // Gets reset when we send CREATE_FAST. + // circuit_expire_building() expects these to be equal + // until the orconn is built. + circ->timestamp_began = circ->timestamp_created; + + circ->package_window = circuit_initial_package_window(); + circ->deliver_window = CIRCWINDOW_START; + cell_queue_init(&circ->n_chan_cells); + + smartlist_add(circuit_get_global_list(), circ); + circ->global_circuitlist_idx = smartlist_len(circuit_get_global_list()) - 1; +} + +/** If we haven't yet decided on a good timeout value for circuit + * building, we close idle circuits aggressively so we can get more + * data points. These are the default, min, and max consensus values */ +#define DFLT_IDLE_TIMEOUT_WHILE_LEARNING (3*60) +#define MIN_IDLE_TIMEOUT_WHILE_LEARNING (10) +#define MAX_IDLE_TIMEOUT_WHILE_LEARNING (1000*60) + +/** Allocate space for a new circuit, initializing with <b>p_circ_id</b> + * and <b>p_conn</b>. Add it to the global circuit list. + */ +origin_circuit_t * +origin_circuit_new(void) +{ + origin_circuit_t *circ; + /* never zero, since a global ID of 0 is treated specially by the + * controller */ + static uint32_t n_circuits_allocated = 1; + + circ = tor_malloc_zero(sizeof(origin_circuit_t)); + circ->base_.magic = ORIGIN_CIRCUIT_MAGIC; + + circ->next_stream_id = crypto_rand_int(1<<16); + circ->global_identifier = n_circuits_allocated++; + circ->remaining_relay_early_cells = MAX_RELAY_EARLY_CELLS_PER_CIRCUIT; + circ->remaining_relay_early_cells -= crypto_rand_int(2); + + init_circuit_base(TO_CIRCUIT(circ)); + + /* Add to origin-list. */ + circ->global_origin_circuit_list_idx = -1; + circuit_add_to_origin_circuit_list(circ); + + circuit_build_times_update_last_circ(get_circuit_build_times_mutable()); + + if (! circuit_build_times_disabled(get_options()) && + circuit_build_times_needs_circuits(get_circuit_build_times())) { + /* Circuits should be shorter lived if we need more of them + * for learning a good build timeout */ + circ->circuit_idle_timeout = + networkstatus_get_param(NULL, "cbtlearntimeout", + DFLT_IDLE_TIMEOUT_WHILE_LEARNING, + MIN_IDLE_TIMEOUT_WHILE_LEARNING, + MAX_IDLE_TIMEOUT_WHILE_LEARNING); + } else { + // This should always be larger than the current port prediction time + // remaining, or else we'll end up with the case where a circuit times out + // and another one is built, effectively doubling the timeout window. + // + // We also randomize it by up to 5% more (ie 5% of 0 to 3600 seconds, + // depending on how much circuit prediction time is remaining) so that + // we don't close a bunch of unused circuits all at the same time. + int prediction_time_remaining = + predicted_ports_prediction_time_remaining(time(NULL)); + circ->circuit_idle_timeout = prediction_time_remaining+1+ + crypto_rand_int(1+prediction_time_remaining/20); + + if (circ->circuit_idle_timeout <= 0) { + log_warn(LD_BUG, + "Circuit chose a negative idle timeout of %d based on " + "%d seconds of predictive building remaining.", + circ->circuit_idle_timeout, + prediction_time_remaining); + circ->circuit_idle_timeout = + networkstatus_get_param(NULL, "cbtlearntimeout", + DFLT_IDLE_TIMEOUT_WHILE_LEARNING, + MIN_IDLE_TIMEOUT_WHILE_LEARNING, + MAX_IDLE_TIMEOUT_WHILE_LEARNING); + } + + log_info(LD_CIRC, + "Circuit %"PRIu32" chose an idle timeout of %d based on " + "%d seconds of predictive building remaining.", + (circ->global_identifier), + circ->circuit_idle_timeout, + prediction_time_remaining); + } + + return circ; +} + +/** Allocate a new or_circuit_t, connected to <b>p_chan</b> as + * <b>p_circ_id</b>. If <b>p_chan</b> is NULL, the circuit is unattached. */ +or_circuit_t * +or_circuit_new(circid_t p_circ_id, channel_t *p_chan) +{ + /* CircIDs */ + or_circuit_t *circ; + + circ = tor_malloc_zero(sizeof(or_circuit_t)); + circ->base_.magic = OR_CIRCUIT_MAGIC; + + if (p_chan) + circuit_set_p_circid_chan(circ, p_circ_id, p_chan); + + circ->remaining_relay_early_cells = MAX_RELAY_EARLY_CELLS_PER_CIRCUIT; + cell_queue_init(&circ->p_chan_cells); + + init_circuit_base(TO_CIRCUIT(circ)); + + return circ; +} + +/** Free all storage held in circ->testing_cell_stats */ +void +circuit_clear_testing_cell_stats(circuit_t *circ) +{ + if (!circ || !circ->testing_cell_stats) + return; + SMARTLIST_FOREACH(circ->testing_cell_stats, testing_cell_stats_entry_t *, + ent, tor_free(ent)); + smartlist_free(circ->testing_cell_stats); + circ->testing_cell_stats = NULL; +} + +/** Deallocate space associated with circ. + */ +STATIC void +circuit_free_(circuit_t *circ) +{ + circid_t n_circ_id = 0; + void *mem; + size_t memlen; + int should_free = 1; + if (!circ) + return; + + /* We keep a copy of this so we can log its value before it gets unset. */ + n_circ_id = circ->n_circ_id; + + circuit_clear_testing_cell_stats(circ); + + /* Cleanup circuit from anything HS v3 related. We also do this when the + * circuit is closed. This is to avoid any code path that free registered + * circuits without closing them before. This needs to be done before the + * hs identifier is freed. */ + hs_circ_cleanup(circ); + + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + mem = ocirc; + memlen = sizeof(origin_circuit_t); + tor_assert(circ->magic == ORIGIN_CIRCUIT_MAGIC); + + circuit_remove_from_origin_circuit_list(ocirc); + + if (ocirc->half_streams) { + SMARTLIST_FOREACH_BEGIN(ocirc->half_streams, half_edge_t*, + half_conn) { + tor_free(half_conn); + } SMARTLIST_FOREACH_END(half_conn); + smartlist_free(ocirc->half_streams); + } + + if (ocirc->build_state) { + extend_info_free(ocirc->build_state->chosen_exit); + circuit_free_cpath_node(ocirc->build_state->pending_final_cpath); + cpath_ref_decref(ocirc->build_state->service_pending_final_cpath_ref); + } + tor_free(ocirc->build_state); + + /* Cancel before freeing, if we haven't already succeeded or failed. */ + if (ocirc->guard_state) { + entry_guard_cancel(ô->guard_state); + } + circuit_guard_state_free(ocirc->guard_state); + + circuit_clear_cpath(ocirc); + + crypto_pk_free(ocirc->intro_key); + rend_data_free(ocirc->rend_data); + + /* Finally, free the identifier of the circuit and nullify it so multiple + * cleanup will work. */ + hs_ident_circuit_free(ocirc->hs_ident); + ocirc->hs_ident = NULL; + + tor_free(ocirc->dest_address); + if (ocirc->socks_username) { + memwipe(ocirc->socks_username, 0x12, ocirc->socks_username_len); + tor_free(ocirc->socks_username); + } + if (ocirc->socks_password) { + memwipe(ocirc->socks_password, 0x06, ocirc->socks_password_len); + tor_free(ocirc->socks_password); + } + addr_policy_list_free(ocirc->prepend_policy); + } else { + or_circuit_t *ocirc = TO_OR_CIRCUIT(circ); + /* Remember cell statistics for this circuit before deallocating. */ + if (get_options()->CellStatistics) + rep_hist_buffer_stats_add_circ(circ, time(NULL)); + mem = ocirc; + memlen = sizeof(or_circuit_t); + tor_assert(circ->magic == OR_CIRCUIT_MAGIC); + + should_free = (ocirc->workqueue_entry == NULL); + + relay_crypto_clear(ô->crypto); + + if (ocirc->rend_splice) { + or_circuit_t *other = ocirc->rend_splice; + tor_assert(other->base_.magic == OR_CIRCUIT_MAGIC); + other->rend_splice = NULL; + } + + /* remove from map. */ + circuit_set_p_circid_chan(ocirc, 0, NULL); + + /* Clear cell queue _after_ removing it from the map. Otherwise our + * "active" checks will be violated. */ + cell_queue_clear(ô->p_chan_cells); + } + + extend_info_free(circ->n_hop); + tor_free(circ->n_chan_create_cell); + + if (circ->global_circuitlist_idx != -1) { + int idx = circ->global_circuitlist_idx; + circuit_t *c2 = smartlist_get(global_circuitlist, idx); + tor_assert(c2 == circ); + smartlist_del(global_circuitlist, idx); + if (idx < smartlist_len(global_circuitlist)) { + c2 = smartlist_get(global_circuitlist, idx); + c2->global_circuitlist_idx = idx; + } + } + + /* Remove from map. */ + circuit_set_n_circid_chan(circ, 0, NULL); + + /* Clear cell queue _after_ removing it from the map. Otherwise our + * "active" checks will be violated. */ + cell_queue_clear(&circ->n_chan_cells); + + log_info(LD_CIRC, "Circuit %u (id: %" PRIu32 ") has been freed.", + n_circ_id, + CIRCUIT_IS_ORIGIN(circ) ? + TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0); + + if (should_free) { + memwipe(mem, 0xAA, memlen); /* poison memory */ + tor_free(mem); + } else { + /* If we made it here, this is an or_circuit_t that still has a pending + * cpuworker request which we weren't able to cancel. Instead, set up + * the magic value so that when the reply comes back, we'll know to discard + * the reply and free this structure. + */ + memwipe(mem, 0xAA, memlen); + circ->magic = DEAD_CIRCUIT_MAGIC; + } +} + +/** Deallocate the linked list circ-><b>cpath</b>, and remove the cpath from + * <b>circ</b>. */ +void +circuit_clear_cpath(origin_circuit_t *circ) +{ + crypt_path_t *victim, *head, *cpath; + + head = cpath = circ->cpath; + + if (!cpath) + return; + + /* it's a circular list, so we have to notice when we've + * gone through it once. */ + while (cpath->next && cpath->next != head) { + victim = cpath; + cpath = victim->next; + circuit_free_cpath_node(victim); + } + + circuit_free_cpath_node(cpath); + + circ->cpath = NULL; +} + +/** Release all storage held by circuits. */ +void +circuit_free_all(void) +{ + smartlist_t *lst = circuit_get_global_list(); + + SMARTLIST_FOREACH_BEGIN(lst, circuit_t *, tmp) { + if (! CIRCUIT_IS_ORIGIN(tmp)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(tmp); + while (or_circ->resolving_streams) { + edge_connection_t *next_conn; + next_conn = or_circ->resolving_streams->next_stream; + connection_free_(TO_CONN(or_circ->resolving_streams)); + or_circ->resolving_streams = next_conn; + } + } + tmp->global_circuitlist_idx = -1; + circuit_about_to_free_atexit(tmp); + circuit_free(tmp); + SMARTLIST_DEL_CURRENT(lst, tmp); + } SMARTLIST_FOREACH_END(tmp); + + smartlist_free(lst); + global_circuitlist = NULL; + + smartlist_free(global_origin_circuit_list); + global_origin_circuit_list = NULL; + + smartlist_free(circuits_pending_chans); + circuits_pending_chans = NULL; + + smartlist_free(circuits_pending_close); + circuits_pending_close = NULL; + + smartlist_free(circuits_pending_other_guards); + circuits_pending_other_guards = NULL; + + { + chan_circid_circuit_map_t **elt, **next, *c; + for (elt = HT_START(chan_circid_map, &chan_circid_map); + elt; + elt = next) { + c = *elt; + next = HT_NEXT_RMV(chan_circid_map, &chan_circid_map, elt); + + tor_assert(c->circuit == NULL); + tor_free(c); + } + } + HT_CLEAR(chan_circid_map, &chan_circid_map); +} + +/** Deallocate space associated with the cpath node <b>victim</b>. */ +static void +circuit_free_cpath_node(crypt_path_t *victim) +{ + if (!victim) + return; + + relay_crypto_clear(&victim->crypto); + onion_handshake_state_release(&victim->handshake_state); + crypto_dh_free(victim->rend_dh_handshake_state); + extend_info_free(victim->extend_info); + + memwipe(victim, 0xBB, sizeof(crypt_path_t)); /* poison memory */ + tor_free(victim); +} + +/** Release a crypt_path_reference_t*, which may be NULL. */ +static void +cpath_ref_decref(crypt_path_reference_t *cpath_ref) +{ + if (cpath_ref != NULL) { + if (--(cpath_ref->refcount) == 0) { + circuit_free_cpath_node(cpath_ref->cpath); + tor_free(cpath_ref); + } + } +} + +/** A helper function for circuit_dump_by_conn() below. Log a bunch + * of information about circuit <b>circ</b>. + */ +static void +circuit_dump_conn_details(int severity, + circuit_t *circ, + int conn_array_index, + const char *type, + circid_t this_circid, + circid_t other_circid) +{ + tor_log(severity, LD_CIRC, "Conn %d has %s circuit: circID %u " + "(other side %u), state %d (%s), born %ld:", + conn_array_index, type, (unsigned)this_circid, (unsigned)other_circid, + circ->state, circuit_state_to_string(circ->state), + (long)circ->timestamp_began.tv_sec); + if (CIRCUIT_IS_ORIGIN(circ)) { /* circ starts at this node */ + circuit_log_path(severity, LD_CIRC, TO_ORIGIN_CIRCUIT(circ)); + } +} + +/** Log, at severity <b>severity</b>, information about each circuit + * that is connected to <b>conn</b>. + */ +void +circuit_dump_by_conn(connection_t *conn, int severity) +{ + edge_connection_t *tmpconn; + + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + circid_t n_circ_id = circ->n_circ_id, p_circ_id = 0; + + if (circ->marked_for_close) { + continue; + } + + if (!CIRCUIT_IS_ORIGIN(circ)) { + p_circ_id = TO_OR_CIRCUIT(circ)->p_circ_id; + } + + if (CIRCUIT_IS_ORIGIN(circ)) { + for (tmpconn=TO_ORIGIN_CIRCUIT(circ)->p_streams; tmpconn; + tmpconn=tmpconn->next_stream) { + if (TO_CONN(tmpconn) == conn) { + circuit_dump_conn_details(severity, circ, conn->conn_array_index, + "App-ward", p_circ_id, n_circ_id); + } + } + } + + if (! CIRCUIT_IS_ORIGIN(circ)) { + for (tmpconn=TO_OR_CIRCUIT(circ)->n_streams; tmpconn; + tmpconn=tmpconn->next_stream) { + if (TO_CONN(tmpconn) == conn) { + circuit_dump_conn_details(severity, circ, conn->conn_array_index, + "Exit-ward", n_circ_id, p_circ_id); + } + } + } + } + SMARTLIST_FOREACH_END(circ); +} + +/** Return the circuit whose global ID is <b>id</b>, or NULL if no + * such circuit exists. */ +origin_circuit_t * +circuit_get_by_global_id(uint32_t id) +{ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + if (CIRCUIT_IS_ORIGIN(circ) && + TO_ORIGIN_CIRCUIT(circ)->global_identifier == id) { + if (circ->marked_for_close) + return NULL; + else + return TO_ORIGIN_CIRCUIT(circ); + } + } + SMARTLIST_FOREACH_END(circ); + return NULL; +} + +/** Return a circ such that: + * - circ->n_circ_id or circ->p_circ_id is equal to <b>circ_id</b>, and + * - circ is attached to <b>chan</b>, either as p_chan or n_chan. + * Return NULL if no such circuit exists. + * + * If <b>found_entry_out</b> is provided, set it to true if we have a + * placeholder entry for circid/chan, and leave it unset otherwise. + */ +static inline circuit_t * +circuit_get_by_circid_channel_impl(circid_t circ_id, channel_t *chan, + int *found_entry_out) +{ + chan_circid_circuit_map_t search; + chan_circid_circuit_map_t *found; + + if (_last_circid_chan_ent && + circ_id == _last_circid_chan_ent->circ_id && + chan == _last_circid_chan_ent->chan) { + found = _last_circid_chan_ent; + } else { + search.circ_id = circ_id; + search.chan = chan; + found = HT_FIND(chan_circid_map, &chan_circid_map, &search); + _last_circid_chan_ent = found; + } + if (found && found->circuit) { + log_debug(LD_CIRC, + "circuit_get_by_circid_channel_impl() returning circuit %p for" + " circ_id %u, channel ID %"PRIu64 " (%p)", + found->circuit, (unsigned)circ_id, + (chan->global_identifier), chan); + if (found_entry_out) + *found_entry_out = 1; + return found->circuit; + } + + log_debug(LD_CIRC, + "circuit_get_by_circid_channel_impl() found %s for" + " circ_id %u, channel ID %"PRIu64 " (%p)", + found ? "placeholder" : "nothing", + (unsigned)circ_id, + (chan->global_identifier), chan); + + if (found_entry_out) + *found_entry_out = found ? 1 : 0; + + return NULL; + /* The rest of this checks for bugs. Disabled by default. */ + /* We comment it out because coverity complains otherwise. + { + circuit_t *circ; + TOR_LIST_FOREACH(circ, &global_circuitlist, head) { + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + if (or_circ->p_chan == chan && or_circ->p_circ_id == circ_id) { + log_warn(LD_BUG, + "circuit matches p_chan, but not in hash table (Bug!)"); + return circ; + } + } + if (circ->n_chan == chan && circ->n_circ_id == circ_id) { + log_warn(LD_BUG, + "circuit matches n_chan, but not in hash table (Bug!)"); + return circ; + } + } + return NULL; + } */ +} + +/** Return a circ such that: + * - circ->n_circ_id or circ->p_circ_id is equal to <b>circ_id</b>, and + * - circ is attached to <b>chan</b>, either as p_chan or n_chan. + * - circ is not marked for close. + * Return NULL if no such circuit exists. + */ +circuit_t * +circuit_get_by_circid_channel(circid_t circ_id, channel_t *chan) +{ + circuit_t *circ = circuit_get_by_circid_channel_impl(circ_id, chan, NULL); + if (!circ || circ->marked_for_close) + return NULL; + else + return circ; +} + +/** Return a circ such that: + * - circ->n_circ_id or circ->p_circ_id is equal to <b>circ_id</b>, and + * - circ is attached to <b>chan</b>, either as p_chan or n_chan. + * Return NULL if no such circuit exists. + */ +circuit_t * +circuit_get_by_circid_channel_even_if_marked(circid_t circ_id, + channel_t *chan) +{ + return circuit_get_by_circid_channel_impl(circ_id, chan, NULL); +} + +/** Return true iff the circuit ID <b>circ_id</b> is currently used by a + * circuit, marked or not, on <b>chan</b>, or if the circ ID is reserved until + * a queued destroy cell can be sent. + * + * (Return 1 if the circuit is present, marked or not; Return 2 + * if the circuit ID is pending a destroy.) + **/ +int +circuit_id_in_use_on_channel(circid_t circ_id, channel_t *chan) +{ + int found = 0; + if (circuit_get_by_circid_channel_impl(circ_id, chan, &found) != NULL) + return 1; + if (found) + return 2; + return 0; +} + +/** Helper for debugging 12184. Returns the time since which 'circ_id' has + * been marked unusable on 'chan'. */ +time_t +circuit_id_when_marked_unusable_on_channel(circid_t circ_id, channel_t *chan) +{ + chan_circid_circuit_map_t search; + chan_circid_circuit_map_t *found; + + memset(&search, 0, sizeof(search)); + search.circ_id = circ_id; + search.chan = chan; + + found = HT_FIND(chan_circid_map, &chan_circid_map, &search); + + if (! found || found->circuit) + return 0; + + return found->made_placeholder_at; +} + +/** Return the circuit that a given edge connection is using. */ +circuit_t * +circuit_get_by_edge_conn(edge_connection_t *conn) +{ + circuit_t *circ; + + circ = conn->on_circuit; + tor_assert(!circ || + (CIRCUIT_IS_ORIGIN(circ) ? circ->magic == ORIGIN_CIRCUIT_MAGIC + : circ->magic == OR_CIRCUIT_MAGIC)); + + return circ; +} + +/** For each circuit that has <b>chan</b> as n_chan or p_chan, unlink the + * circuit from the chan,circid map, and mark it for close if it hasn't + * been marked already. + */ +void +circuit_unlink_all_from_channel(channel_t *chan, int reason) +{ + smartlist_t *detached = smartlist_new(); + +/* #define DEBUG_CIRCUIT_UNLINK_ALL */ + + channel_unlink_all_circuits(chan, detached); + +#ifdef DEBUG_CIRCUIT_UNLINK_ALL + { + smartlist_t *detached_2 = smartlist_new(); + int mismatch = 0, badlen = 0; + + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + if (circ->n_chan == chan || + (!CIRCUIT_IS_ORIGIN(circ) && + TO_OR_CIRCUIT(circ)->p_chan == chan)) { + smartlist_add(detached_2, circ); + } + } + SMARTLIST_FOREACH_END(circ); + + if (smartlist_len(detached) != smartlist_len(detached_2)) { + log_warn(LD_BUG, "List of detached circuits had the wrong length! " + "(got %d, should have gotten %d)", + (int)smartlist_len(detached), + (int)smartlist_len(detached_2)); + badlen = 1; + } + smartlist_sort_pointers(detached); + smartlist_sort_pointers(detached_2); + + SMARTLIST_FOREACH(detached, circuit_t *, c, + if (c != smartlist_get(detached_2, c_sl_idx)) + mismatch = 1; + ); + + if (mismatch) + log_warn(LD_BUG, "Mismatch in list of detached circuits."); + + if (badlen || mismatch) { + smartlist_free(detached); + detached = detached_2; + } else { + log_notice(LD_CIRC, "List of %d circuits was as expected.", + (int)smartlist_len(detached)); + smartlist_free(detached_2); + } + } +#endif /* defined(DEBUG_CIRCUIT_UNLINK_ALL) */ + + SMARTLIST_FOREACH_BEGIN(detached, circuit_t *, circ) { + int mark = 0; + if (circ->n_chan == chan) { + + circuit_set_n_circid_chan(circ, 0, NULL); + mark = 1; + + /* If we didn't request this closure, pass the remote + * bit to mark_for_close. */ + if (chan->reason_for_closing != CHANNEL_CLOSE_REQUESTED) + reason |= END_CIRC_REASON_FLAG_REMOTE; + } + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + if (or_circ->p_chan == chan) { + circuit_set_p_circid_chan(or_circ, 0, NULL); + mark = 1; + } + } + if (!mark) { + log_warn(LD_BUG, "Circuit on detached list which I had no reason " + "to mark"); + continue; + } + if (!circ->marked_for_close) + circuit_mark_for_close(circ, reason); + } SMARTLIST_FOREACH_END(circ); + + smartlist_free(detached); +} + +/** Return a circ such that + * - circ->rend_data->onion_address is equal to + * <b>rend_data</b>->onion_address, + * - circ->rend_data->rend_cookie is equal to + * <b>rend_data</b>->rend_cookie, and + * - circ->purpose is equal to CIRCUIT_PURPOSE_C_REND_READY. + * + * Return NULL if no such circuit exists. + */ +origin_circuit_t * +circuit_get_ready_rend_circ_by_rend_data(const rend_data_t *rend_data) +{ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + if (!circ->marked_for_close && + circ->purpose == CIRCUIT_PURPOSE_C_REND_READY) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (ocirc->rend_data == NULL) { + continue; + } + if (!rend_cmp_service_ids(rend_data_get_address(rend_data), + rend_data_get_address(ocirc->rend_data)) && + tor_memeq(ocirc->rend_data->rend_cookie, + rend_data->rend_cookie, + REND_COOKIE_LEN)) + return ocirc; + } + } + SMARTLIST_FOREACH_END(circ); + return NULL; +} + +/** Return the first service introduction circuit originating from the global + * circuit list after <b>start</b> or at the start of the list if <b>start</b> + * is NULL. Return NULL if no circuit is found. + * + * A service introduction point circuit has a purpose of either + * CIRCUIT_PURPOSE_S_ESTABLISH_INTRO or CIRCUIT_PURPOSE_S_INTRO. This does not + * return a circuit marked for close and its state must be open. */ +origin_circuit_t * +circuit_get_next_service_intro_circ(origin_circuit_t *start) +{ + int idx = 0; + smartlist_t *lst = circuit_get_global_list(); + + if (start) { + idx = TO_CIRCUIT(start)->global_circuitlist_idx + 1; + } + + for ( ; idx < smartlist_len(lst); ++idx) { + circuit_t *circ = smartlist_get(lst, idx); + + /* Ignore a marked for close circuit or purpose not matching a service + * intro point or if the state is not open. */ + if (circ->marked_for_close || circ->state != CIRCUIT_STATE_OPEN || + (circ->purpose != CIRCUIT_PURPOSE_S_ESTABLISH_INTRO && + circ->purpose != CIRCUIT_PURPOSE_S_INTRO)) { + continue; + } + /* The purposes we are looking for are only for origin circuits so the + * following is valid. */ + return TO_ORIGIN_CIRCUIT(circ); + } + /* Not found. */ + return NULL; +} + +/** Return the first service rendezvous circuit originating from the global + * circuit list after <b>start</b> or at the start of the list if <b>start</b> + * is NULL. Return NULL if no circuit is found. + * + * A service rendezvous point circuit has a purpose of either + * CIRCUIT_PURPOSE_S_CONNECT_REND or CIRCUIT_PURPOSE_S_REND_JOINED. This does + * not return a circuit marked for close and its state must be open. */ +origin_circuit_t * +circuit_get_next_service_rp_circ(origin_circuit_t *start) +{ + int idx = 0; + smartlist_t *lst = circuit_get_global_list(); + + if (start) { + idx = TO_CIRCUIT(start)->global_circuitlist_idx + 1; + } + + for ( ; idx < smartlist_len(lst); ++idx) { + circuit_t *circ = smartlist_get(lst, idx); + + /* Ignore a marked for close circuit or purpose not matching a service + * intro point or if the state is not open. */ + if (circ->marked_for_close || circ->state != CIRCUIT_STATE_OPEN || + (circ->purpose != CIRCUIT_PURPOSE_S_CONNECT_REND && + circ->purpose != CIRCUIT_PURPOSE_S_REND_JOINED)) { + continue; + } + /* The purposes we are looking for are only for origin circuits so the + * following is valid. */ + return TO_ORIGIN_CIRCUIT(circ); + } + /* Not found. */ + return NULL; +} + +/** Return the first circuit originating here in global_circuitlist after + * <b>start</b> whose purpose is <b>purpose</b>, and where <b>digest</b> (if + * set) matches the private key digest of the rend data associated with the + * circuit. Return NULL if no circuit is found. If <b>start</b> is NULL, + * begin at the start of the list. + */ +origin_circuit_t * +circuit_get_next_by_pk_and_purpose(origin_circuit_t *start, + const uint8_t *digest, uint8_t purpose) +{ + int idx; + smartlist_t *lst = circuit_get_global_list(); + tor_assert(CIRCUIT_PURPOSE_IS_ORIGIN(purpose)); + if (start == NULL) + idx = 0; + else + idx = TO_CIRCUIT(start)->global_circuitlist_idx + 1; + + for ( ; idx < smartlist_len(lst); ++idx) { + circuit_t *circ = smartlist_get(lst, idx); + origin_circuit_t *ocirc; + + if (circ->marked_for_close) + continue; + if (circ->purpose != purpose) + continue; + /* At this point we should be able to get a valid origin circuit because + * the origin purpose we are looking for matches this circuit. */ + if (BUG(!CIRCUIT_PURPOSE_IS_ORIGIN(circ->purpose))) { + break; + } + ocirc = TO_ORIGIN_CIRCUIT(circ); + if (!digest) + return ocirc; + if (rend_circuit_pk_digest_eq(ocirc, digest)) { + return ocirc; + } + } + return NULL; +} + +/** We might cannibalize this circuit: Return true if its last hop can be used + * as a v3 rendezvous point. */ +static int +circuit_can_be_cannibalized_for_v3_rp(const origin_circuit_t *circ) +{ + if (!circ->build_state) { + return 0; + } + + extend_info_t *chosen_exit = circ->build_state->chosen_exit; + if (BUG(!chosen_exit)) { + return 0; + } + + const node_t *rp_node = node_get_by_id(chosen_exit->identity_digest); + if (rp_node) { + if (node_supports_v3_rendezvous_point(rp_node)) { + return 1; + } + } + + return 0; +} + +/** We are trying to create a circuit of purpose <b>purpose</b> and we are + * looking for cannibalizable circuits. Return the circuit purpose we would be + * willing to cannibalize. */ +static uint8_t +get_circuit_purpose_needed_to_cannibalize(uint8_t purpose) +{ + if (circuit_should_use_vanguards(purpose)) { + /* If we are using vanguards, then we should only cannibalize vanguard + * circuits so that we get the same path construction logic. */ + return CIRCUIT_PURPOSE_HS_VANGUARDS; + } else { + /* If no vanguards are used just get a general circuit! */ + return CIRCUIT_PURPOSE_C_GENERAL; + } +} + +/** Return a circuit that is open, is CIRCUIT_PURPOSE_C_GENERAL, + * has a timestamp_dirty value of 0, has flags matching the CIRCLAUNCH_* + * flags in <b>flags</b>, and if info is defined, does not already use info + * as any of its hops; or NULL if no circuit fits this description. + * + * The <b>purpose</b> argument refers to the purpose of the circuit we want to + * create, not the purpose of the circuit we want to cannibalize. + * + * If !CIRCLAUNCH_NEED_UPTIME, prefer returning non-uptime circuits. + * + * To "cannibalize" a circuit means to extend it an extra hop, and use it + * for some other purpose than we had originally intended. We do this when + * we want to perform some low-bandwidth task at a specific relay, and we + * would like the circuit to complete as soon as possible. (If we were going + * to use a lot of bandwidth, we wouldn't want a circuit with an extra hop. + * If we didn't care about circuit completion latency, we would just build + * a new circuit.) + */ +origin_circuit_t * +circuit_find_to_cannibalize(uint8_t purpose_to_produce, extend_info_t *info, + int flags) +{ + origin_circuit_t *best=NULL; + int need_uptime = (flags & CIRCLAUNCH_NEED_UPTIME) != 0; + int need_capacity = (flags & CIRCLAUNCH_NEED_CAPACITY) != 0; + int internal = (flags & CIRCLAUNCH_IS_INTERNAL) != 0; + const or_options_t *options = get_options(); + /* We want the circuit we are trying to cannibalize to have this purpose */ + int purpose_to_search_for; + + /* Make sure we're not trying to create a onehop circ by + * cannibalization. */ + tor_assert(!(flags & CIRCLAUNCH_ONEHOP_TUNNEL)); + + purpose_to_search_for = get_circuit_purpose_needed_to_cannibalize( + purpose_to_produce); + + tor_assert_nonfatal(purpose_to_search_for == CIRCUIT_PURPOSE_C_GENERAL || + purpose_to_search_for == CIRCUIT_PURPOSE_HS_VANGUARDS); + + log_debug(LD_CIRC, + "Hunting for a circ to cannibalize: purpose %d, uptime %d, " + "capacity %d, internal %d", + purpose_to_produce, need_uptime, need_capacity, internal); + + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ_) { + if (CIRCUIT_IS_ORIGIN(circ_) && + circ_->state == CIRCUIT_STATE_OPEN && + !circ_->marked_for_close && + circ_->purpose == purpose_to_search_for && + !circ_->timestamp_dirty) { + origin_circuit_t *circ = TO_ORIGIN_CIRCUIT(circ_); + + /* Only cannibalize from reasonable length circuits. If we + * want C_GENERAL, then only choose 3 hop circs. If we want + * HS_VANGUARDS, only choose 4 hop circs. + */ + if (circ->build_state->desired_path_len != + route_len_for_purpose(purpose_to_search_for, NULL)) { + goto next; + } + + /* Ignore any circuits for which we can't use the Guard. It is possible + * that the Guard was removed from the samepled set after the circuit + * was created so avoid using it. */ + if (!entry_guard_could_succeed(circ->guard_state)) { + goto next; + } + + if ((!need_uptime || circ->build_state->need_uptime) && + (!need_capacity || circ->build_state->need_capacity) && + (internal == circ->build_state->is_internal) && + !circ->unusable_for_new_conns && + circ->remaining_relay_early_cells && + !circ->build_state->onehop_tunnel && + !circ->isolation_values_set) { + if (info) { + /* need to make sure we don't duplicate hops */ + crypt_path_t *hop = circ->cpath; + const node_t *ri1 = node_get_by_id(info->identity_digest); + do { + const node_t *ri2; + if (tor_memeq(hop->extend_info->identity_digest, + info->identity_digest, DIGEST_LEN)) + goto next; + if (ri1 && + (ri2 = node_get_by_id(hop->extend_info->identity_digest)) + && nodes_in_same_family(ri1, ri2)) + goto next; + hop=hop->next; + } while (hop!=circ->cpath); + } + if (options->ExcludeNodes) { + /* Make sure no existing nodes in the circuit are excluded for + * general use. (This may be possible if StrictNodes is 0, and we + * thought we needed to use an otherwise excluded node for, say, a + * directory operation.) */ + crypt_path_t *hop = circ->cpath; + do { + if (routerset_contains_extendinfo(options->ExcludeNodes, + hop->extend_info)) + goto next; + hop = hop->next; + } while (hop != circ->cpath); + } + + if ((flags & CIRCLAUNCH_IS_V3_RP) && + !circuit_can_be_cannibalized_for_v3_rp(circ)) { + log_debug(LD_GENERAL, "Skipping uncannibalizable circuit for v3 " + "rendezvous point."); + goto next; + } + + if (!best || (best->build_state->need_uptime && !need_uptime)) + best = circ; + next: ; + } + } + } + SMARTLIST_FOREACH_END(circ_); + return best; +} + +/** + * Check whether any of the origin circuits that are waiting to see if + * their guard is good enough to use can be upgraded to "ready". If so, + * return a new smartlist containing them. Otherwise return NULL. + */ +smartlist_t * +circuit_find_circuits_to_upgrade_from_guard_wait(void) +{ + /* Only if some circuit is actually waiting on an upgrade should we + * run the algorithm. */ + if (! circuits_pending_other_guards || + smartlist_len(circuits_pending_other_guards)==0) + return NULL; + /* Only if we have some origin circuits should we run the algorithm. */ + if (!global_origin_circuit_list) + return NULL; + + /* Okay; we can pass our circuit list to entrynodes.c.*/ + smartlist_t *result = smartlist_new(); + int circuits_upgraded = entry_guards_upgrade_waiting_circuits( + get_guard_selection_info(), + global_origin_circuit_list, + result); + if (circuits_upgraded && smartlist_len(result)) { + return result; + } else { + smartlist_free(result); + return NULL; + } +} + +/** Return the number of hops in circuit's path. If circ has no entries, + * or is NULL, returns 0. */ +int +circuit_get_cpath_len(origin_circuit_t *circ) +{ + int n = 0; + if (circ && circ->cpath) { + crypt_path_t *cpath, *cpath_next = NULL; + for (cpath = circ->cpath; cpath_next != circ->cpath; cpath = cpath_next) { + cpath_next = cpath->next; + ++n; + } + } + return n; +} + +/** Return the number of opened hops in circuit's path. + * If circ has no entries, or is NULL, returns 0. */ +int +circuit_get_cpath_opened_len(const origin_circuit_t *circ) +{ + int n = 0; + if (circ && circ->cpath) { + crypt_path_t *cpath, *cpath_next = NULL; + for (cpath = circ->cpath; + cpath->state == CPATH_STATE_OPEN + && cpath_next != circ->cpath; + cpath = cpath_next) { + cpath_next = cpath->next; + ++n; + } + } + return n; +} + +/** Return the <b>hopnum</b>th hop in <b>circ</b>->cpath, or NULL if there + * aren't that many hops in the list. <b>hopnum</b> starts at 1. + * Returns NULL if <b>hopnum</b> is 0 or negative. */ +crypt_path_t * +circuit_get_cpath_hop(origin_circuit_t *circ, int hopnum) +{ + if (circ && circ->cpath && hopnum > 0) { + crypt_path_t *cpath, *cpath_next = NULL; + for (cpath = circ->cpath; cpath_next != circ->cpath; cpath = cpath_next) { + cpath_next = cpath->next; + if (--hopnum <= 0) + return cpath; + } + } + return NULL; +} + +/** Go through the circuitlist; mark-for-close each circuit that starts + * at us but has not yet been used. */ +void +circuit_mark_all_unused_circs(void) +{ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + if (CIRCUIT_IS_ORIGIN(circ) && + !circ->marked_for_close && + !circ->timestamp_dirty) + circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); + } + SMARTLIST_FOREACH_END(circ); +} + +/** Go through the circuitlist; for each circuit that starts at us + * and is dirty, frob its timestamp_dirty so we won't use it for any + * new streams. + * + * This is useful for letting the user change pseudonyms, so new + * streams will not be linkable to old streams. + */ +void +circuit_mark_all_dirty_circs_as_unusable(void) +{ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + if (CIRCUIT_IS_ORIGIN(circ) && + !circ->marked_for_close && + circ->timestamp_dirty) { + mark_circuit_unusable_for_new_conns(TO_ORIGIN_CIRCUIT(circ)); + } + } + SMARTLIST_FOREACH_END(circ); +} + ++/** ++ * Report any queued cells on or_circuits as written in our bandwidth ++ * totals, for the specified channel direction. ++ * ++ * When we close a circuit or clear its cell queues, we've read ++ * data and recorded those bytes in our read statistics, but we're ++ * not going to write it. This discrepancy can be used by an adversary ++ * to infer information from our public relay statistics and perform ++ * attacks such as guard discovery. ++ * ++ * This function is in the critical path of circuit_mark_for_close(). ++ * It must be (and is) O(1)! ++ * ++ * See https://trac.torproject.org/projects/tor/ticket/23512. ++ */ ++void ++circuit_synchronize_written_or_bandwidth(const circuit_t *c, ++ circuit_channel_direction_t dir) ++{ ++ uint64_t cells; ++ uint64_t cell_size; ++ uint64_t written_sync; ++ const channel_t *chan = NULL; ++ const or_circuit_t *or_circ; ++ ++ if (!CIRCUIT_IS_ORCIRC(c)) ++ return; ++ ++ or_circ = CONST_TO_OR_CIRCUIT(c); ++ ++ if (dir == CIRCUIT_N_CHAN) { ++ chan = c->n_chan; ++ cells = c->n_chan_cells.n; ++ } else { ++ chan = or_circ->p_chan; ++ cells = or_circ->p_chan_cells.n; ++ } ++ ++ /* If we still know the chan, determine real cell size. Otherwise, ++ * assume it's a wide circid channel */ ++ if (chan) ++ cell_size = get_cell_network_size(chan->wide_circ_ids); ++ else ++ cell_size = CELL_MAX_NETWORK_SIZE; ++ ++ /* The missing written bytes are the cell counts times their cell ++ * size plus TLS per cell overhead */ ++ written_sync = cells*(cell_size+TLS_PER_CELL_OVERHEAD); ++ ++ /* Report the missing bytes as written, to avoid asymmetry. ++ * We must use time() for consistency with rephist, even though on ++ * some very old rare platforms, approx_time() may be faster. */ ++ rep_hist_note_bytes_written(written_sync, time(NULL)); ++} ++ +/** Mark <b>circ</b> to be closed next time we call + * circuit_close_all_marked(). Do any cleanup needed: + * - If state is onionskin_pending, remove circ from the onion_pending + * list. + * - If circ isn't open yet: call circuit_build_failed() if we're + * the origin. + * - If purpose is C_INTRODUCE_ACK_WAIT, report the intro point + * failure we just had to the hidden service client module. + * - If purpose is C_INTRODUCING and <b>reason</b> isn't TIMEOUT, + * report to the hidden service client module that the intro point + * we just tried may be unreachable. + * - Send appropriate destroys and edge_destroys for conns and + * streams attached to circ. + * - If circ->rend_splice is set (we are the midpoint of a joined + * rendezvous stream), then mark the other circuit to close as well. + */ +MOCK_IMPL(void, +circuit_mark_for_close_, (circuit_t *circ, int reason, int line, + const char *file)) +{ + int orig_reason = reason; /* Passed to the controller */ + assert_circuit_ok(circ); + tor_assert(line); + tor_assert(file); + + if (circ->marked_for_close) { + log_warn(LD_BUG, + "Duplicate call to circuit_mark_for_close at %s:%d" + " (first at %s:%d)", file, line, + circ->marked_for_close_file, circ->marked_for_close); + return; + } + if (reason == END_CIRC_AT_ORIGIN) { + if (!CIRCUIT_IS_ORIGIN(circ)) { + log_warn(LD_BUG, "Specified 'at-origin' non-reason for ending circuit, " + "but circuit was not at origin. (called %s:%d, purpose=%d)", + file, line, circ->purpose); + } + reason = END_CIRC_REASON_NONE; + } + + if (CIRCUIT_IS_ORIGIN(circ)) { + if (pathbias_check_close(TO_ORIGIN_CIRCUIT(circ), reason) == -1) { + /* Don't close it yet, we need to test it first */ + return; + } + + /* We don't send reasons when closing circuits at the origin. */ + reason = END_CIRC_REASON_NONE; + } + ++ circuit_synchronize_written_or_bandwidth(circ, CIRCUIT_N_CHAN); ++ circuit_synchronize_written_or_bandwidth(circ, CIRCUIT_P_CHAN); ++ + if (reason & END_CIRC_REASON_FLAG_REMOTE) + reason &= ~END_CIRC_REASON_FLAG_REMOTE; + + if (reason < END_CIRC_REASON_MIN_ || reason > END_CIRC_REASON_MAX_) { + if (!(orig_reason & END_CIRC_REASON_FLAG_REMOTE)) + log_warn(LD_BUG, "Reason %d out of range at %s:%d", reason, file, line); + reason = END_CIRC_REASON_NONE; + } + + circ->marked_for_close = line; + circ->marked_for_close_file = file; + circ->marked_for_close_reason = reason; + circ->marked_for_close_orig_reason = orig_reason; + + if (!CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + if (or_circ->rend_splice) { + if (!or_circ->rend_splice->base_.marked_for_close) { + /* do this after marking this circuit, to avoid infinite recursion. */ + circuit_mark_for_close(TO_CIRCUIT(or_circ->rend_splice), reason); + } + or_circ->rend_splice = NULL; + } + } + + /* Notify the HS subsystem that this circuit is closing. */ + hs_circ_cleanup(circ); + + if (circuits_pending_close == NULL) + circuits_pending_close = smartlist_new(); + + smartlist_add(circuits_pending_close, circ); + mainloop_schedule_postloop_cleanup(); + + log_info(LD_GENERAL, "Circuit %u (id: %" PRIu32 ") marked for close at " + "%s:%d (orig reason: %d, new reason: %d)", + circ->n_circ_id, + CIRCUIT_IS_ORIGIN(circ) ? + TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0, + file, line, orig_reason, reason); +} + +/** Called immediately before freeing a marked circuit <b>circ</b> from + * circuit_free_all() while shutting down Tor; this is a safe-at-shutdown + * version of circuit_about_to_free(). It's important that it at least + * do circuitmux_detach_circuit() when appropriate. + */ +static void +circuit_about_to_free_atexit(circuit_t *circ) +{ + + if (circ->n_chan) { + circuit_clear_cell_queue(circ, circ->n_chan); + circuitmux_detach_circuit(circ->n_chan->cmux, circ); + circuit_set_n_circid_chan(circ, 0, NULL); + } + + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + + if (or_circ->p_chan) { + circuit_clear_cell_queue(circ, or_circ->p_chan); + circuitmux_detach_circuit(or_circ->p_chan->cmux, circ); + circuit_set_p_circid_chan(or_circ, 0, NULL); + } + } +} + +/** Called immediately before freeing a marked circuit <b>circ</b>. + * Disconnects the circuit from other data structures, launches events + * as appropriate, and performs other housekeeping. + */ +static void +circuit_about_to_free(circuit_t *circ) +{ + + int reason = circ->marked_for_close_reason; + int orig_reason = circ->marked_for_close_orig_reason; + + if (circ->state == CIRCUIT_STATE_ONIONSKIN_PENDING) { + onion_pending_remove(TO_OR_CIRCUIT(circ)); + } + /* If the circuit ever became OPEN, we sent it to the reputation history + * module then. If it isn't OPEN, we send it there now to remember which + * links worked and which didn't. + */ + if (circ->state != CIRCUIT_STATE_OPEN && + circ->state != CIRCUIT_STATE_GUARD_WAIT) { + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + circuit_build_failed(ocirc); /* take actions if necessary */ + } + } + if (circ->state == CIRCUIT_STATE_CHAN_WAIT) { + if (circuits_pending_chans) + smartlist_remove(circuits_pending_chans, circ); + } + if (circuits_pending_other_guards) { + smartlist_remove(circuits_pending_other_guards, circ); + } + if (CIRCUIT_IS_ORIGIN(circ)) { + control_event_circuit_status(TO_ORIGIN_CIRCUIT(circ), + (circ->state == CIRCUIT_STATE_OPEN || + circ->state == CIRCUIT_STATE_GUARD_WAIT) ? + CIRC_EVENT_CLOSED:CIRC_EVENT_FAILED, + orig_reason); + } + + if (circ->purpose == CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + int timed_out = (reason == END_CIRC_REASON_TIMEOUT); + tor_assert(circ->state == CIRCUIT_STATE_OPEN); + tor_assert(ocirc->build_state->chosen_exit); + if (orig_reason != END_CIRC_REASON_IP_NOW_REDUNDANT && + ocirc->rend_data) { + /* treat this like getting a nack from it */ + log_info(LD_REND, "Failed intro circ %s to %s (awaiting ack). %s", + safe_str_client(rend_data_get_address(ocirc->rend_data)), + safe_str_client(build_state_get_exit_nickname(ocirc->build_state)), + timed_out ? "Recording timeout." : "Removing from descriptor."); + rend_client_report_intro_point_failure(ocirc->build_state->chosen_exit, + ocirc->rend_data, + timed_out ? + INTRO_POINT_FAILURE_TIMEOUT : + INTRO_POINT_FAILURE_GENERIC); + } + } else if (circ->purpose == CIRCUIT_PURPOSE_C_INTRODUCING && + reason != END_CIRC_REASON_TIMEOUT) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (ocirc->build_state->chosen_exit && ocirc->rend_data) { + if (orig_reason != END_CIRC_REASON_IP_NOW_REDUNDANT && + ocirc->rend_data) { + log_info(LD_REND, "Failed intro circ %s to %s " + "(building circuit to intro point). " + "Marking intro point as possibly unreachable.", + safe_str_client(rend_data_get_address(ocirc->rend_data)), + safe_str_client(build_state_get_exit_nickname( + ocirc->build_state))); + rend_client_report_intro_point_failure(ocirc->build_state->chosen_exit, + ocirc->rend_data, + INTRO_POINT_FAILURE_UNREACHABLE); + } + } + } + + if (circ->n_chan) { + circuit_clear_cell_queue(circ, circ->n_chan); + /* Only send destroy if the channel isn't closing anyway */ + if (!CHANNEL_CONDEMNED(circ->n_chan)) { + channel_send_destroy(circ->n_circ_id, circ->n_chan, reason); + } + circuitmux_detach_circuit(circ->n_chan->cmux, circ); + circuit_set_n_circid_chan(circ, 0, NULL); + } + + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + edge_connection_t *conn; + for (conn=or_circ->n_streams; conn; conn=conn->next_stream) + connection_edge_destroy(or_circ->p_circ_id, conn); + or_circ->n_streams = NULL; + + while (or_circ->resolving_streams) { + conn = or_circ->resolving_streams; + or_circ->resolving_streams = conn->next_stream; + if (!conn->base_.marked_for_close) { + /* The client will see a DESTROY, and infer that the connections + * are closing because the circuit is getting torn down. No need + * to send an end cell. */ + conn->edge_has_sent_end = 1; + conn->end_reason = END_STREAM_REASON_DESTROY; + conn->end_reason |= END_STREAM_REASON_FLAG_ALREADY_SENT_CLOSED; + connection_mark_for_close(TO_CONN(conn)); + } + conn->on_circuit = NULL; + } + + if (or_circ->p_chan) { + circuit_clear_cell_queue(circ, or_circ->p_chan); + /* Only send destroy if the channel isn't closing anyway */ + if (!CHANNEL_CONDEMNED(or_circ->p_chan)) { + channel_send_destroy(or_circ->p_circ_id, or_circ->p_chan, reason); + } + circuitmux_detach_circuit(or_circ->p_chan->cmux, circ); + circuit_set_p_circid_chan(or_circ, 0, NULL); + } + } else { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + edge_connection_t *conn; + for (conn=ocirc->p_streams; conn; conn=conn->next_stream) + connection_edge_destroy(circ->n_circ_id, conn); + ocirc->p_streams = NULL; + } +} + +/** Given a marked circuit <b>circ</b>, aggressively free its cell queues to + * recover memory. */ +static void +marked_circuit_free_cells(circuit_t *circ) +{ + if (!circ->marked_for_close) { + log_warn(LD_BUG, "Called on non-marked circuit"); + return; + } + cell_queue_clear(&circ->n_chan_cells); + if (circ->n_mux) + circuitmux_clear_num_cells(circ->n_mux, circ); + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + cell_queue_clear(&orcirc->p_chan_cells); + if (orcirc->p_mux) + circuitmux_clear_num_cells(orcirc->p_mux, circ); + } +} + +static size_t +single_conn_free_bytes(connection_t *conn) +{ + size_t result = 0; + if (conn->inbuf) { + result += buf_allocation(conn->inbuf); + buf_clear(conn->inbuf); + } + if (conn->outbuf) { + result += buf_allocation(conn->outbuf); + buf_clear(conn->outbuf); + conn->outbuf_flushlen = 0; + } + if (conn->type == CONN_TYPE_DIR) { + dir_connection_t *dir_conn = TO_DIR_CONN(conn); + if (dir_conn->compress_state) { + result += tor_compress_state_size(dir_conn->compress_state); + tor_compress_free(dir_conn->compress_state); + dir_conn->compress_state = NULL; + } + } + return result; +} + +/** Aggressively free buffer contents on all the buffers of all streams in the + * list starting at <b>stream</b>. Return the number of bytes recovered. */ +static size_t +marked_circuit_streams_free_bytes(edge_connection_t *stream) +{ + size_t result = 0; + for ( ; stream; stream = stream->next_stream) { + connection_t *conn = TO_CONN(stream); + result += single_conn_free_bytes(conn); + if (conn->linked_conn) { + result += single_conn_free_bytes(conn->linked_conn); + } + } + return result; +} + +/** Aggressively free buffer contents on all the buffers of all streams on + * circuit <b>c</b>. Return the number of bytes recovered. */ +static size_t +marked_circuit_free_stream_bytes(circuit_t *c) +{ + if (CIRCUIT_IS_ORIGIN(c)) { + return marked_circuit_streams_free_bytes(TO_ORIGIN_CIRCUIT(c)->p_streams); + } else { + return marked_circuit_streams_free_bytes(TO_OR_CIRCUIT(c)->n_streams); + } +} + +/** Return the number of cells used by the circuit <b>c</b>'s cell queues. */ +STATIC size_t +n_cells_in_circ_queues(const circuit_t *c) +{ + size_t n = c->n_chan_cells.n; + if (! CIRCUIT_IS_ORIGIN(c)) { + circuit_t *cc = (circuit_t *) c; + n += TO_OR_CIRCUIT(cc)->p_chan_cells.n; + } + return n; +} + +/** + * Return the age of the oldest cell queued on <b>c</b>, in timestamp units. + * Return 0 if there are no cells queued on c. Requires that <b>now</b> be + * the current coarse timestamp. + * + * This function will return incorrect results if the oldest cell queued on + * the circuit is older than about 2**32 msec (about 49 days) old. + */ +STATIC uint32_t +circuit_max_queued_cell_age(const circuit_t *c, uint32_t now) +{ + uint32_t age = 0; + packed_cell_t *cell; + + if (NULL != (cell = TOR_SIMPLEQ_FIRST(&c->n_chan_cells.head))) + age = now - cell->inserted_timestamp; + + if (! CIRCUIT_IS_ORIGIN(c)) { + const or_circuit_t *orcirc = CONST_TO_OR_CIRCUIT(c); + if (NULL != (cell = TOR_SIMPLEQ_FIRST(&orcirc->p_chan_cells.head))) { + uint32_t age2 = now - cell->inserted_timestamp; + if (age2 > age) + return age2; + } + } + return age; +} + +/** Return the age of the oldest buffer chunk on <b>conn</b>, where age is + * taken in timestamp units before the time <b>now</b>. If the connection has + * no data, treat it as having age zero. + **/ +static uint32_t +conn_get_buffer_age(const connection_t *conn, uint32_t now_ts) +{ + uint32_t age = 0, age2; + if (conn->outbuf) { + age2 = buf_get_oldest_chunk_timestamp(conn->outbuf, now_ts); + if (age2 > age) + age = age2; + } + if (conn->inbuf) { + age2 = buf_get_oldest_chunk_timestamp(conn->inbuf, now_ts); + if (age2 > age) + age = age2; + } + return age; +} + +/** Return the age in timestamp units of the oldest buffer chunk on any stream + * in the linked list <b>stream</b>, where age is taken in timestamp units + * before the timestamp <b>now</b>. */ +static uint32_t +circuit_get_streams_max_data_age(const edge_connection_t *stream, uint32_t now) +{ + uint32_t age = 0, age2; + for (; stream; stream = stream->next_stream) { + const connection_t *conn = TO_CONN(stream); + age2 = conn_get_buffer_age(conn, now); + if (age2 > age) + age = age2; + if (conn->linked_conn) { + age2 = conn_get_buffer_age(conn->linked_conn, now); + if (age2 > age) + age = age2; + } + } + return age; +} + +/** Return the age in timestamp units of the oldest buffer chunk on any stream + * attached to the circuit <b>c</b>, where age is taken before the timestamp + * <b>now</b>. */ +STATIC uint32_t +circuit_max_queued_data_age(const circuit_t *c, uint32_t now) +{ + if (CIRCUIT_IS_ORIGIN(c)) { + return circuit_get_streams_max_data_age( + CONST_TO_ORIGIN_CIRCUIT(c)->p_streams, now); + } else { + return circuit_get_streams_max_data_age( + CONST_TO_OR_CIRCUIT(c)->n_streams, now); + } +} + +/** Return the age of the oldest cell or stream buffer chunk on the circuit + * <b>c</b>, where age is taken in timestamp units before the timestamp + * <b>now</b> */ +STATIC uint32_t +circuit_max_queued_item_age(const circuit_t *c, uint32_t now) +{ + uint32_t cell_age = circuit_max_queued_cell_age(c, now); + uint32_t data_age = circuit_max_queued_data_age(c, now); + if (cell_age > data_age) + return cell_age; + else + return data_age; +} + +/** Helper to sort a list of circuit_t by age of oldest item, in descending + * order. */ +static int +circuits_compare_by_oldest_queued_item_(const void **a_, const void **b_) +{ + const circuit_t *a = *a_; + const circuit_t *b = *b_; + uint32_t age_a = a->age_tmp; + uint32_t age_b = b->age_tmp; + + if (age_a < age_b) + return 1; + else if (age_a == age_b) + return 0; + else + return -1; +} + +static uint32_t now_ts_for_buf_cmp; + +/** Helper to sort a list of circuit_t by age of oldest item, in descending + * order. */ +static int +conns_compare_by_buffer_age_(const void **a_, const void **b_) +{ + const connection_t *a = *a_; + const connection_t *b = *b_; + time_t age_a = conn_get_buffer_age(a, now_ts_for_buf_cmp); + time_t age_b = conn_get_buffer_age(b, now_ts_for_buf_cmp); + + if (age_a < age_b) + return 1; + else if (age_a == age_b) + return 0; + else + return -1; +} + +#define FRACTION_OF_DATA_TO_RETAIN_ON_OOM 0.90 + +/** We're out of memory for cells, having allocated <b>current_allocation</b> + * bytes' worth. Kill the 'worst' circuits until we're under + * FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage. */ +void +circuits_handle_oom(size_t current_allocation) +{ + smartlist_t *circlist; + smartlist_t *connection_array = get_connection_array(); + int conn_idx; + size_t mem_to_recover; + size_t mem_recovered=0; + int n_circuits_killed=0; + int n_dirconns_killed=0; + uint32_t now_ts; + log_notice(LD_GENERAL, "We're low on memory (cell queues total alloc:" + " %"TOR_PRIuSZ" buffer total alloc: %" TOR_PRIuSZ "," + " tor compress total alloc: %" TOR_PRIuSZ + " (zlib: %" TOR_PRIuSZ ", zstd: %" TOR_PRIuSZ "," + " lzma: %" TOR_PRIuSZ ")," + " rendezvous cache total alloc: %" TOR_PRIuSZ "). Killing" + " circuits withover-long queues. (This behavior is controlled by" + " MaxMemInQueues.)", + cell_queues_get_total_allocation(), + buf_get_total_allocation(), + tor_compress_get_total_allocation(), + tor_zlib_get_total_allocation(), + tor_zstd_get_total_allocation(), + tor_lzma_get_total_allocation(), + rend_cache_get_total_allocation()); + + { + size_t mem_target = (size_t)(get_options()->MaxMemInQueues * + FRACTION_OF_DATA_TO_RETAIN_ON_OOM); + if (current_allocation <= mem_target) + return; + mem_to_recover = current_allocation - mem_target; + } + + now_ts = monotime_coarse_get_stamp(); + + circlist = circuit_get_global_list(); + SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) { + circ->age_tmp = circuit_max_queued_item_age(circ, now_ts); + } SMARTLIST_FOREACH_END(circ); + + /* This is O(n log n); there are faster algorithms we could use instead. + * Let's hope this doesn't happen enough to be in the critical path. */ + smartlist_sort(circlist, circuits_compare_by_oldest_queued_item_); + + /* Fix up the indices before we run into trouble */ + SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) { + circ->global_circuitlist_idx = circ_sl_idx; + } SMARTLIST_FOREACH_END(circ); + + /* Now sort the connection array ... */ + now_ts_for_buf_cmp = now_ts; + smartlist_sort(connection_array, conns_compare_by_buffer_age_); + now_ts_for_buf_cmp = 0; + + /* Fix up the connection array to its new order. */ + SMARTLIST_FOREACH_BEGIN(connection_array, connection_t *, conn) { + conn->conn_array_index = conn_sl_idx; + } SMARTLIST_FOREACH_END(conn); + + /* Okay, now the worst circuits and connections are at the front of their + * respective lists. Let's mark them, and reclaim their storage + * aggressively. */ + conn_idx = 0; + SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) { + size_t n; + size_t freed; + + /* Free storage in any non-linked directory connections that have buffered + * data older than this circuit. */ + while (conn_idx < smartlist_len(connection_array)) { + connection_t *conn = smartlist_get(connection_array, conn_idx); + uint32_t conn_age = conn_get_buffer_age(conn, now_ts); + if (conn_age < circ->age_tmp) { + break; + } + if (conn->type == CONN_TYPE_DIR && conn->linked_conn == NULL) { + if (!conn->marked_for_close) + connection_mark_for_close(conn); + mem_recovered += single_conn_free_bytes(conn); + + ++n_dirconns_killed; + + if (mem_recovered >= mem_to_recover) + goto done_recovering_mem; + } + ++conn_idx; + } + + /* Now, kill the circuit. */ + n = n_cells_in_circ_queues(circ); + if (! circ->marked_for_close) { + circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT); + } + marked_circuit_free_cells(circ); + freed = marked_circuit_free_stream_bytes(circ); + + ++n_circuits_killed; + + mem_recovered += n * packed_cell_mem_cost(); + mem_recovered += freed; + + if (mem_recovered >= mem_to_recover) + goto done_recovering_mem; + } SMARTLIST_FOREACH_END(circ); + + done_recovering_mem: + + log_notice(LD_GENERAL, "Removed %"TOR_PRIuSZ" bytes by killing %d circuits; " + "%d circuits remain alive. Also killed %d non-linked directory " + "connections.", + mem_recovered, + n_circuits_killed, + smartlist_len(circlist) - n_circuits_killed, + n_dirconns_killed); +} + +/** Verify that cpath layer <b>cp</b> has all of its invariants + * correct. Trigger an assert if anything is invalid. + */ +void +assert_cpath_layer_ok(const crypt_path_t *cp) +{ +// tor_assert(cp->addr); /* these are zero for rendezvous extra-hops */ +// tor_assert(cp->port); + tor_assert(cp); + tor_assert(cp->magic == CRYPT_PATH_MAGIC); + switch (cp->state) + { + case CPATH_STATE_OPEN: + relay_crypto_assert_ok(&cp->crypto); + /* fall through */ + case CPATH_STATE_CLOSED: + /*XXXX Assert that there's no handshake_state either. */ + tor_assert(!cp->rend_dh_handshake_state); + break; + case CPATH_STATE_AWAITING_KEYS: + /* tor_assert(cp->dh_handshake_state); */ + break; + default: + log_fn(LOG_ERR, LD_BUG, "Unexpected state %d", cp->state); + tor_assert(0); + } + tor_assert(cp->package_window >= 0); + tor_assert(cp->deliver_window >= 0); +} + +/** Verify that cpath <b>cp</b> has all of its invariants + * correct. Trigger an assert if anything is invalid. + */ +static void +assert_cpath_ok(const crypt_path_t *cp) +{ + const crypt_path_t *start = cp; + + do { + assert_cpath_layer_ok(cp); + /* layers must be in sequence of: "open* awaiting? closed*" */ + if (cp != start) { + if (cp->state == CPATH_STATE_AWAITING_KEYS) { + tor_assert(cp->prev->state == CPATH_STATE_OPEN); + } else if (cp->state == CPATH_STATE_OPEN) { + tor_assert(cp->prev->state == CPATH_STATE_OPEN); + } + } + cp = cp->next; + tor_assert(cp); + } while (cp != start); +} + +/** Verify that circuit <b>c</b> has all of its invariants + * correct. Trigger an assert if anything is invalid. + */ +MOCK_IMPL(void, +assert_circuit_ok,(const circuit_t *c)) +{ + edge_connection_t *conn; + const or_circuit_t *or_circ = NULL; + const origin_circuit_t *origin_circ = NULL; + + tor_assert(c); + tor_assert(c->magic == ORIGIN_CIRCUIT_MAGIC || c->magic == OR_CIRCUIT_MAGIC); + tor_assert(c->purpose >= CIRCUIT_PURPOSE_MIN_ && + c->purpose <= CIRCUIT_PURPOSE_MAX_); + + if (CIRCUIT_IS_ORIGIN(c)) + origin_circ = CONST_TO_ORIGIN_CIRCUIT(c); + else + or_circ = CONST_TO_OR_CIRCUIT(c); + + if (c->n_chan) { + tor_assert(!c->n_hop); + + if (c->n_circ_id) { + /* We use the _impl variant here to make sure we don't fail on marked + * circuits, which would not be returned by the regular function. */ + circuit_t *c2 = circuit_get_by_circid_channel_impl(c->n_circ_id, + c->n_chan, NULL); + tor_assert(c == c2); + } + } + if (or_circ && or_circ->p_chan) { + if (or_circ->p_circ_id) { + /* ibid */ + circuit_t *c2 = + circuit_get_by_circid_channel_impl(or_circ->p_circ_id, + or_circ->p_chan, NULL); + tor_assert(c == c2); + } + } + if (or_circ) + for (conn = or_circ->n_streams; conn; conn = conn->next_stream) + tor_assert(conn->base_.type == CONN_TYPE_EXIT); + + tor_assert(c->deliver_window >= 0); + tor_assert(c->package_window >= 0); + if (c->state == CIRCUIT_STATE_OPEN || + c->state == CIRCUIT_STATE_GUARD_WAIT) { + tor_assert(!c->n_chan_create_cell); + if (or_circ) { + relay_crypto_assert_ok(&or_circ->crypto); + } + } + if (c->state == CIRCUIT_STATE_CHAN_WAIT && !c->marked_for_close) { + tor_assert(circuits_pending_chans && + smartlist_contains(circuits_pending_chans, c)); + } else { + tor_assert(!circuits_pending_chans || + !smartlist_contains(circuits_pending_chans, c)); + } + if (origin_circ && origin_circ->cpath) { + assert_cpath_ok(origin_circ->cpath); + } + if (c->purpose == CIRCUIT_PURPOSE_REND_ESTABLISHED) { + tor_assert(or_circ); + if (!c->marked_for_close) { + tor_assert(or_circ->rend_splice); + tor_assert(or_circ->rend_splice->rend_splice == or_circ); + } + tor_assert(or_circ->rend_splice != or_circ); + } else { + tor_assert(!or_circ || !or_circ->rend_splice); + } +} diff --cc src/core/or/circuitlist.h index b069604a1,000000000..dac11431c mode 100644,000000..100644 --- a/src/core/or/circuitlist.h +++ b/src/core/or/circuitlist.h @@@ -1,247 -1,0 +1,249 @@@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file circuitlist.h + * \brief Header file for circuitlist.c. + **/ + +#ifndef TOR_CIRCUITLIST_H +#define TOR_CIRCUITLIST_H + +#include "lib/testsupport/testsupport.h" +#include "feature/hs/hs_ident.h" + +/** Circuit state: I'm the origin, still haven't done all my handshakes. */ +#define CIRCUIT_STATE_BUILDING 0 +/** Circuit state: Waiting to process the onionskin. */ +#define CIRCUIT_STATE_ONIONSKIN_PENDING 1 +/** Circuit state: I'd like to deliver a create, but my n_chan is still + * connecting. */ +#define CIRCUIT_STATE_CHAN_WAIT 2 +/** Circuit state: the circuit is open but we don't want to actually use it + * until we find out if a better guard will be available. + */ +#define CIRCUIT_STATE_GUARD_WAIT 3 +/** Circuit state: onionskin(s) processed, ready to send/receive cells. */ +#define CIRCUIT_STATE_OPEN 4 + +#define CIRCUIT_PURPOSE_MIN_ 1 + +/* these circuits were initiated elsewhere */ +#define CIRCUIT_PURPOSE_OR_MIN_ 1 +/** OR-side circuit purpose: normal circuit, at OR. */ +#define CIRCUIT_PURPOSE_OR 1 +/** OR-side circuit purpose: At OR, from the service, waiting for intro from + * clients. */ +#define CIRCUIT_PURPOSE_INTRO_POINT 2 +/** OR-side circuit purpose: At OR, from the client, waiting for the service. + */ +#define CIRCUIT_PURPOSE_REND_POINT_WAITING 3 +/** OR-side circuit purpose: At OR, both circuits have this purpose. */ +#define CIRCUIT_PURPOSE_REND_ESTABLISHED 4 +#define CIRCUIT_PURPOSE_OR_MAX_ 4 + +/* these circuits originate at this node */ + +/* here's how circ client-side purposes work: + * normal circuits are C_GENERAL. + * circuits that are c_introducing are either on their way to + * becoming open, or they are open and waiting for a + * suitable rendcirc before they send the intro. + * circuits that are c_introduce_ack_wait have sent the intro, + * but haven't gotten a response yet. + * circuits that are c_establish_rend are either on their way + * to becoming open, or they are open and have sent the + * establish_rendezvous cell but haven't received an ack. + * circuits that are c_rend_ready are open and have received a + * rend ack, but haven't heard from the service yet. if they have a + * buildstate->pending_final_cpath then they're expecting a + * cell from the service, else they're not. + * circuits that are c_rend_ready_intro_acked are open, and + * some intro circ has sent its intro and received an ack. + * circuits that are c_rend_joined are open, have heard from + * the service, and are talking to it. + */ +/** Client-side circuit purpose: Normal circuit, with cpath. */ +#define CIRCUIT_PURPOSE_C_GENERAL 5 +#define CIRCUIT_PURPOSE_C_HS_MIN_ 6 +/** Client-side circuit purpose: at the client, connecting to intro point. */ +#define CIRCUIT_PURPOSE_C_INTRODUCING 6 +/** Client-side circuit purpose: at the client, sent INTRODUCE1 to intro point, + * waiting for ACK/NAK. */ +#define CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT 7 +/** Client-side circuit purpose: at the client, introduced and acked, closing. + */ +#define CIRCUIT_PURPOSE_C_INTRODUCE_ACKED 8 +/** Client-side circuit purpose: at the client, waiting for ack. */ +#define CIRCUIT_PURPOSE_C_ESTABLISH_REND 9 +/** Client-side circuit purpose: at the client, waiting for the service. */ +#define CIRCUIT_PURPOSE_C_REND_READY 10 +/** Client-side circuit purpose: at the client, waiting for the service, + * INTRODUCE has been acknowledged. */ +#define CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED 11 +/** Client-side circuit purpose: at the client, rendezvous established. */ +#define CIRCUIT_PURPOSE_C_REND_JOINED 12 +/** This circuit is used for getting hsdirs */ +#define CIRCUIT_PURPOSE_C_HSDIR_GET 13 +#define CIRCUIT_PURPOSE_C_HS_MAX_ 13 +/** This circuit is used for build time measurement only */ +#define CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT 14 +#define CIRCUIT_PURPOSE_C_MAX_ 14 + +#define CIRCUIT_PURPOSE_S_HS_MIN_ 15 +/** Hidden-service-side circuit purpose: at the service, waiting for + * introductions. */ +#define CIRCUIT_PURPOSE_S_ESTABLISH_INTRO 15 +/** Hidden-service-side circuit purpose: at the service, successfully + * established intro. */ +#define CIRCUIT_PURPOSE_S_INTRO 16 +/** Hidden-service-side circuit purpose: at the service, connecting to rend + * point. */ +#define CIRCUIT_PURPOSE_S_CONNECT_REND 17 +/** Hidden-service-side circuit purpose: at the service, rendezvous + * established. */ +#define CIRCUIT_PURPOSE_S_REND_JOINED 18 +/** This circuit is used for uploading hsdirs */ +#define CIRCUIT_PURPOSE_S_HSDIR_POST 19 +#define CIRCUIT_PURPOSE_S_HS_MAX_ 19 + +/** A testing circuit; not meant to be used for actual traffic. */ +#define CIRCUIT_PURPOSE_TESTING 20 +/** A controller made this circuit and Tor should not use it. */ +#define CIRCUIT_PURPOSE_CONTROLLER 21 +/** This circuit is used for path bias probing only */ +#define CIRCUIT_PURPOSE_PATH_BIAS_TESTING 22 + +/** This circuit is used for vanguards/restricted paths. + * + * This type of circuit is *only* created preemptively and never + * on-demand. When an HS operation needs to take place (e.g. connect to an + * intro point), these circuits are then cannibalized and repurposed to the + * actual needed HS purpose. */ +#define CIRCUIT_PURPOSE_HS_VANGUARDS 23 + +#define CIRCUIT_PURPOSE_MAX_ 23 +/** A catch-all for unrecognized purposes. Currently we don't expect + * to make or see any circuits with this purpose. */ +#define CIRCUIT_PURPOSE_UNKNOWN 255 + +/** True iff the circuit purpose <b>p</b> is for a circuit that + * originated at this node. */ +#define CIRCUIT_PURPOSE_IS_ORIGIN(p) ((p)>CIRCUIT_PURPOSE_OR_MAX_) +/** True iff the circuit purpose <b>p</b> is for a circuit that originated + * here to serve as a client. (Hidden services don't count here.) */ +#define CIRCUIT_PURPOSE_IS_CLIENT(p) \ + ((p)> CIRCUIT_PURPOSE_OR_MAX_ && \ + (p)<=CIRCUIT_PURPOSE_C_MAX_) +/** True iff the circuit_t <b>c</b> is actually an origin_circuit_t. */ +#define CIRCUIT_IS_ORIGIN(c) (CIRCUIT_PURPOSE_IS_ORIGIN((c)->purpose)) +/** True iff the circuit purpose <b>p</b> is for an established rendezvous + * circuit. */ +#define CIRCUIT_PURPOSE_IS_ESTABLISHED_REND(p) \ + ((p) == CIRCUIT_PURPOSE_C_REND_JOINED || \ + (p) == CIRCUIT_PURPOSE_S_REND_JOINED) +/** True iff the circuit_t c is actually an or_circuit_t */ +#define CIRCUIT_IS_ORCIRC(c) (((circuit_t *)(c))->magic == OR_CIRCUIT_MAGIC) + +/** True iff this circuit purpose should count towards the global + * pending rate limit (set by MaxClientCircuitsPending). We count all + * general purpose circuits, as well as the first step of client onion + * service connections (HSDir gets). */ +#define CIRCUIT_PURPOSE_COUNTS_TOWARDS_MAXPENDING(p) \ + ((p) == CIRCUIT_PURPOSE_C_GENERAL || \ + (p) == CIRCUIT_PURPOSE_C_HSDIR_GET) + +/** Convert a circuit_t* to a pointer to the enclosing or_circuit_t. Assert + * if the cast is impossible. */ +or_circuit_t *TO_OR_CIRCUIT(circuit_t *); +const or_circuit_t *CONST_TO_OR_CIRCUIT(const circuit_t *); +/** Convert a circuit_t* to a pointer to the enclosing origin_circuit_t. + * Assert if the cast is impossible. */ +origin_circuit_t *TO_ORIGIN_CIRCUIT(circuit_t *); +const origin_circuit_t *CONST_TO_ORIGIN_CIRCUIT(const circuit_t *); + +MOCK_DECL(smartlist_t *, circuit_get_global_list, (void)); +smartlist_t *circuit_get_global_origin_circuit_list(void); +int circuit_any_opened_circuits(void); +int circuit_any_opened_circuits_cached(void); +void circuit_cache_opened_circuit_state(int circuits_are_opened); + +const char *circuit_state_to_string(int state); +const char *circuit_purpose_to_controller_string(uint8_t purpose); +const char *circuit_purpose_to_controller_hs_state_string(uint8_t purpose); +const char *circuit_purpose_to_string(uint8_t purpose); +void circuit_dump_by_conn(connection_t *conn, int severity); +void circuit_set_p_circid_chan(or_circuit_t *circ, circid_t id, + channel_t *chan); +void circuit_set_n_circid_chan(circuit_t *circ, circid_t id, + channel_t *chan); +void channel_mark_circid_unusable(channel_t *chan, circid_t id); +void channel_mark_circid_usable(channel_t *chan, circid_t id); +time_t circuit_id_when_marked_unusable_on_channel(circid_t circ_id, + channel_t *chan); +void circuit_set_state(circuit_t *circ, uint8_t state); +void circuit_close_all_marked(void); +int32_t circuit_initial_package_window(void); +origin_circuit_t *origin_circuit_new(void); +or_circuit_t *or_circuit_new(circid_t p_circ_id, channel_t *p_chan); +circuit_t *circuit_get_by_circid_channel(circid_t circ_id, + channel_t *chan); +circuit_t * +circuit_get_by_circid_channel_even_if_marked(circid_t circ_id, + channel_t *chan); +int circuit_id_in_use_on_channel(circid_t circ_id, channel_t *chan); +circuit_t *circuit_get_by_edge_conn(edge_connection_t *conn); +void circuit_unlink_all_from_channel(channel_t *chan, int reason); +origin_circuit_t *circuit_get_by_global_id(uint32_t id); +origin_circuit_t *circuit_get_ready_rend_circ_by_rend_data( + const rend_data_t *rend_data); +origin_circuit_t *circuit_get_next_by_pk_and_purpose(origin_circuit_t *start, + const uint8_t *digest, uint8_t purpose); +origin_circuit_t *circuit_get_next_service_intro_circ(origin_circuit_t *start); +origin_circuit_t *circuit_get_next_service_rp_circ(origin_circuit_t *start); +origin_circuit_t *circuit_get_next_service_hsdir_circ(origin_circuit_t *start); +origin_circuit_t *circuit_find_to_cannibalize(uint8_t purpose, + extend_info_t *info, int flags); +void circuit_mark_all_unused_circs(void); +void circuit_mark_all_dirty_circs_as_unusable(void); ++void circuit_synchronize_written_or_bandwidth(const circuit_t *c, ++ circuit_channel_direction_t dir); +MOCK_DECL(void, circuit_mark_for_close_, (circuit_t *circ, int reason, + int line, const char *file)); +int circuit_get_cpath_len(origin_circuit_t *circ); +int circuit_get_cpath_opened_len(const origin_circuit_t *); +void circuit_clear_cpath(origin_circuit_t *circ); +crypt_path_t *circuit_get_cpath_hop(origin_circuit_t *circ, int hopnum); +void circuit_get_all_pending_on_channel(smartlist_t *out, + channel_t *chan); +int circuit_count_pending_on_channel(channel_t *chan); + +#define circuit_mark_for_close(c, reason) \ + circuit_mark_for_close_((c), (reason), __LINE__, SHORT_FILE__) + +void assert_cpath_layer_ok(const crypt_path_t *cp); +MOCK_DECL(void, assert_circuit_ok,(const circuit_t *c)); +void circuit_free_all(void); +void circuits_handle_oom(size_t current_allocation); + +void circuit_clear_testing_cell_stats(circuit_t *circ); + +void channel_note_destroy_pending(channel_t *chan, circid_t id); +MOCK_DECL(void, channel_note_destroy_not_pending, + (channel_t *chan, circid_t id)); + +smartlist_t *circuit_find_circuits_to_upgrade_from_guard_wait(void); + +#ifdef CIRCUITLIST_PRIVATE +STATIC void circuit_free_(circuit_t *circ); +#define circuit_free(circ) FREE_AND_NULL(circuit_t, circuit_free_, (circ)) +STATIC size_t n_cells_in_circ_queues(const circuit_t *c); +STATIC uint32_t circuit_max_queued_data_age(const circuit_t *c, uint32_t now); +STATIC uint32_t circuit_max_queued_cell_age(const circuit_t *c, uint32_t now); +STATIC uint32_t circuit_max_queued_item_age(const circuit_t *c, uint32_t now); +#endif /* defined(CIRCUITLIST_PRIVATE) */ + +#endif /* !defined(TOR_CIRCUITLIST_H) */ diff --cc src/core/or/or.h index eae027012,000000000..4d8b6d787 mode 100644,000000..100644 --- a/src/core/or/or.h +++ b/src/core/or/or.h @@@ -1,1083 -1,0 +1,1095 @@@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file or.h + * \brief Master header file for Tor-specific functionality. + **/ + +#ifndef TOR_OR_H +#define TOR_OR_H + +#include "orconfig.h" +#include "lib/cc/torint.h" + +#ifdef HAVE_SIGNAL_H +#include <signal.h> +#endif +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +#include "lib/arch/bytes.h" +#include "lib/cc/compat_compiler.h" +#include "lib/cc/torint.h" +#include "lib/container/map.h" +#include "lib/container/smartlist.h" +#include "lib/crypt_ops/crypto_cipher.h" +#include "lib/crypt_ops/crypto_rsa.h" +#include "lib/ctime/di_ops.h" +#include "lib/defs/dh_sizes.h" +#include "lib/encoding/binascii.h" +#include "lib/encoding/cstring.h" +#include "lib/encoding/time_fmt.h" +#include "lib/err/torerr.h" +#include "lib/fs/dir.h" +#include "lib/fs/files.h" +#include "lib/fs/mmap.h" +#include "lib/fs/path.h" +#include "lib/fs/userdb.h" +#include "lib/intmath/addsub.h" +#include "lib/intmath/bits.h" +#include "lib/intmath/cmp.h" +#include "lib/intmath/logic.h" +#include "lib/intmath/muldiv.h" +#include "lib/log/escape.h" +#include "lib/log/ratelim.h" +#include "lib/log/util_bug.h" +#include "lib/malloc/malloc.h" +#include "lib/net/address.h" +#include "lib/net/inaddr.h" +#include "lib/net/socket.h" +#include "lib/string/compat_ctype.h" +#include "lib/string/compat_string.h" +#include "lib/string/parse_int.h" +#include "lib/string/printf.h" +#include "lib/string/scanf.h" +#include "lib/string/util_string.h" +#include "lib/testsupport/testsupport.h" +#include "lib/thread/threads.h" +#include "lib/time/compat_time.h" +#include "lib/wallclock/approx_time.h" +#include "lib/wallclock/timeval.h" + +#include "ht.h" + +// These, more than other includes, are for keeping the other struct +// definitions working. We should remove them when we minimize our includes. +#include "core/or/entry_port_cfg_st.h" + +struct ed25519_public_key_t; +struct curve25519_public_key_t; + +/* These signals are defined to help handle_control_signal work. + */ +#ifndef SIGHUP +#define SIGHUP 1 +#endif +#ifndef SIGINT +#define SIGINT 2 +#endif +#ifndef SIGUSR1 +#define SIGUSR1 10 +#endif +#ifndef SIGUSR2 +#define SIGUSR2 12 +#endif +#ifndef SIGTERM +#define SIGTERM 15 +#endif +/* Controller signals start at a high number so we don't + * conflict with system-defined signals. */ +#define SIGNEWNYM 129 +#define SIGCLEARDNSCACHE 130 +#define SIGHEARTBEAT 131 + +#if (SIZEOF_CELL_T != 0) +/* On Irix, stdlib.h defines a cell_t type, so we need to make sure + * that our stuff always calls cell_t something different. */ +#define cell_t tor_cell_t +#endif + +/** Helper macro: Given a pointer to to.base_, of type from*, return &to. */ +#define DOWNCAST(to, ptr) ((to*)SUBTYPE_P(ptr, to, base_)) + +/** Length of longest allowable configured nickname. */ +#define MAX_NICKNAME_LEN 19 +/** Length of a router identity encoded as a hexadecimal digest, plus + * possible dollar sign. */ +#define MAX_HEX_NICKNAME_LEN (HEX_DIGEST_LEN+1) +/** Maximum length of verbose router identifier: dollar sign, hex ID digest, + * equal sign or tilde, nickname. */ +#define MAX_VERBOSE_NICKNAME_LEN (1+HEX_DIGEST_LEN+1+MAX_NICKNAME_LEN) + +/** For HTTP parsing: Maximum number of bytes we'll accept in the headers + * of an HTTP request or response. */ +#define MAX_HEADERS_SIZE 50000 + +/** Maximum size, in bytes, of a single router descriptor uploaded to us + * as a directory authority. Caches and clients fetch whatever descriptors + * the authorities tell them to fetch, and don't care about size. */ +#define MAX_DESCRIPTOR_UPLOAD_SIZE 20000 + +/** Maximum size of a single extrainfo document, as above. */ +#define MAX_EXTRAINFO_UPLOAD_SIZE 50000 + +/** Minimum lifetime for an onion key in days. */ +#define MIN_ONION_KEY_LIFETIME_DAYS (1) + +/** Maximum lifetime for an onion key in days. */ +#define MAX_ONION_KEY_LIFETIME_DAYS (90) + +/** Default lifetime for an onion key in days. */ +#define DEFAULT_ONION_KEY_LIFETIME_DAYS (28) + +/** Minimum grace period for acceptance of an onion key in days. + * The maximum value is defined in proposal #274 as being the current network + * consensus parameter for "onion-key-rotation-days". */ +#define MIN_ONION_KEY_GRACE_PERIOD_DAYS (1) + +/** Default grace period for acceptance of an onion key in days. */ +#define DEFAULT_ONION_KEY_GRACE_PERIOD_DAYS (7) + +/** How often we should check the network consensus if it is time to rotate or + * expire onion keys. */ +#define ONION_KEY_CONSENSUS_CHECK_INTERVAL (60*60) + +/** How often do we rotate TLS contexts? */ +#define MAX_SSL_KEY_LIFETIME_INTERNAL (2*60*60) + +/** How old do we allow a router to get before removing it + * from the router list? In seconds. */ +#define ROUTER_MAX_AGE (60*60*48) +/** How old can a router get before we (as a server) will no longer + * consider it live? In seconds. */ +#define ROUTER_MAX_AGE_TO_PUBLISH (60*60*24) +/** How old do we let a saved descriptor get before force-removing it? */ +#define OLD_ROUTER_DESC_MAX_AGE (60*60*24*5) + +/* Proxy client types */ +#define PROXY_NONE 0 +#define PROXY_CONNECT 1 +#define PROXY_SOCKS4 2 +#define PROXY_SOCKS5 3 +/* !!!! If there is ever a PROXY_* type over 3, we must grow the proxy_type + * field in or_connection_t */ + +/* Pluggable transport proxy type. Don't use this in or_connection_t, + * instead use the actual underlying proxy type (see above). */ +#define PROXY_PLUGGABLE 4 + +/** How many circuits do we want simultaneously in-progress to handle + * a given stream? */ +#define MIN_CIRCUITS_HANDLING_STREAM 2 + +/* These RELAY_COMMAND constants define values for relay cell commands, and +* must match those defined in tor-spec.txt. */ +#define RELAY_COMMAND_BEGIN 1 +#define RELAY_COMMAND_DATA 2 +#define RELAY_COMMAND_END 3 +#define RELAY_COMMAND_CONNECTED 4 +#define RELAY_COMMAND_SENDME 5 +#define RELAY_COMMAND_EXTEND 6 +#define RELAY_COMMAND_EXTENDED 7 +#define RELAY_COMMAND_TRUNCATE 8 +#define RELAY_COMMAND_TRUNCATED 9 +#define RELAY_COMMAND_DROP 10 +#define RELAY_COMMAND_RESOLVE 11 +#define RELAY_COMMAND_RESOLVED 12 +#define RELAY_COMMAND_BEGIN_DIR 13 +#define RELAY_COMMAND_EXTEND2 14 +#define RELAY_COMMAND_EXTENDED2 15 + +#define RELAY_COMMAND_ESTABLISH_INTRO 32 +#define RELAY_COMMAND_ESTABLISH_RENDEZVOUS 33 +#define RELAY_COMMAND_INTRODUCE1 34 +#define RELAY_COMMAND_INTRODUCE2 35 +#define RELAY_COMMAND_RENDEZVOUS1 36 +#define RELAY_COMMAND_RENDEZVOUS2 37 +#define RELAY_COMMAND_INTRO_ESTABLISHED 38 +#define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39 +#define RELAY_COMMAND_INTRODUCE_ACK 40 + +/* Reasons why an OR connection is closed. */ +#define END_OR_CONN_REASON_DONE 1 +#define END_OR_CONN_REASON_REFUSED 2 /* connection refused */ +#define END_OR_CONN_REASON_OR_IDENTITY 3 +#define END_OR_CONN_REASON_CONNRESET 4 /* connection reset by peer */ +#define END_OR_CONN_REASON_TIMEOUT 5 +#define END_OR_CONN_REASON_NO_ROUTE 6 /* no route to host/net */ +#define END_OR_CONN_REASON_IO_ERROR 7 /* read/write error */ +#define END_OR_CONN_REASON_RESOURCE_LIMIT 8 /* sockets, buffers, etc */ +#define END_OR_CONN_REASON_PT_MISSING 9 /* PT failed or not available */ +#define END_OR_CONN_REASON_MISC 10 + +/* Reasons why we (or a remote OR) might close a stream. See tor-spec.txt for + * documentation of these. The values must match. */ +#define END_STREAM_REASON_MISC 1 +#define END_STREAM_REASON_RESOLVEFAILED 2 +#define END_STREAM_REASON_CONNECTREFUSED 3 +#define END_STREAM_REASON_EXITPOLICY 4 +#define END_STREAM_REASON_DESTROY 5 +#define END_STREAM_REASON_DONE 6 +#define END_STREAM_REASON_TIMEOUT 7 +#define END_STREAM_REASON_NOROUTE 8 +#define END_STREAM_REASON_HIBERNATING 9 +#define END_STREAM_REASON_INTERNAL 10 +#define END_STREAM_REASON_RESOURCELIMIT 11 +#define END_STREAM_REASON_CONNRESET 12 +#define END_STREAM_REASON_TORPROTOCOL 13 +#define END_STREAM_REASON_NOTDIRECTORY 14 +#define END_STREAM_REASON_ENTRYPOLICY 15 + +/* These high-numbered end reasons are not part of the official spec, + * and are not intended to be put in relay end cells. They are here + * to be more informative when sending back socks replies to the + * application. */ +/* XXXX 256 is no longer used; feel free to reuse it. */ +/** We were unable to attach the connection to any circuit at all. */ +/* XXXX the ways we use this one don't make a lot of sense. */ +#define END_STREAM_REASON_CANT_ATTACH 257 +/** We can't connect to any directories at all, so we killed our streams + * before they can time out. */ +#define END_STREAM_REASON_NET_UNREACHABLE 258 +/** This is a SOCKS connection, and the client used (or misused) the SOCKS + * protocol in a way we couldn't handle. */ +#define END_STREAM_REASON_SOCKSPROTOCOL 259 +/** This is a transparent proxy connection, but we can't extract the original + * target address:port. */ +#define END_STREAM_REASON_CANT_FETCH_ORIG_DEST 260 +/** This is a connection on the NATD port, and the destination IP:Port was + * either ill-formed or out-of-range. */ +#define END_STREAM_REASON_INVALID_NATD_DEST 261 +/** The target address is in a private network (like 127.0.0.1 or 10.0.0.1); + * you don't want to do that over a randomly chosen exit */ +#define END_STREAM_REASON_PRIVATE_ADDR 262 +/** This is an HTTP tunnel connection and the client used or misused HTTP in a + * way we can't handle. + */ +#define END_STREAM_REASON_HTTPPROTOCOL 263 + +/** Bitwise-and this value with endreason to mask out all flags. */ +#define END_STREAM_REASON_MASK 511 + +/** Bitwise-or this with the argument to control_event_stream_status + * to indicate that the reason came from an END cell. */ +#define END_STREAM_REASON_FLAG_REMOTE 512 +/** Bitwise-or this with the argument to control_event_stream_status + * to indicate that we already sent a CLOSED stream event. */ +#define END_STREAM_REASON_FLAG_ALREADY_SENT_CLOSED 1024 +/** Bitwise-or this with endreason to indicate that we already sent + * a socks reply, and no further reply needs to be sent from + * connection_mark_unattached_ap(). */ +#define END_STREAM_REASON_FLAG_ALREADY_SOCKS_REPLIED 2048 + +/* 'type' values to use in RESOLVED cells. Specified in tor-spec.txt. */ +#define RESOLVED_TYPE_HOSTNAME 0 +#define RESOLVED_TYPE_IPV4 4 +#define RESOLVED_TYPE_IPV6 6 +#define RESOLVED_TYPE_ERROR_TRANSIENT 0xF0 +#define RESOLVED_TYPE_ERROR 0xF1 + +/* Negative reasons are internal: we never send them in a DESTROY or TRUNCATE + * call; they only go to the controller for tracking */ + +/* Closing introduction point that were opened in parallel. */ +#define END_CIRC_REASON_IP_NOW_REDUNDANT -4 + +/** Our post-timeout circuit time measurement period expired. + * We must give up now */ +#define END_CIRC_REASON_MEASUREMENT_EXPIRED -3 + +/** We couldn't build a path for this circuit. */ +#define END_CIRC_REASON_NOPATH -2 +/** Catch-all "other" reason for closing origin circuits. */ +#define END_CIRC_AT_ORIGIN -1 + +/* Reasons why we (or a remote OR) might close a circuit. See tor-spec.txt + * section 5.4 for documentation of these. */ +#define END_CIRC_REASON_MIN_ 0 +#define END_CIRC_REASON_NONE 0 +#define END_CIRC_REASON_TORPROTOCOL 1 +#define END_CIRC_REASON_INTERNAL 2 +#define END_CIRC_REASON_REQUESTED 3 +#define END_CIRC_REASON_HIBERNATING 4 +#define END_CIRC_REASON_RESOURCELIMIT 5 +#define END_CIRC_REASON_CONNECTFAILED 6 +#define END_CIRC_REASON_OR_IDENTITY 7 +#define END_CIRC_REASON_CHANNEL_CLOSED 8 +#define END_CIRC_REASON_FINISHED 9 +#define END_CIRC_REASON_TIMEOUT 10 +#define END_CIRC_REASON_DESTROYED 11 +#define END_CIRC_REASON_NOSUCHSERVICE 12 +#define END_CIRC_REASON_MAX_ 12 + +/** Bitwise-OR this with the argument to circuit_mark_for_close() or + * control_event_circuit_status() to indicate that the reason was + * passed through from a destroy or truncate cell. */ +#define END_CIRC_REASON_FLAG_REMOTE 512 + +/** Length of 'y' portion of 'y.onion' URL. */ +#define REND_SERVICE_ID_LEN_BASE32 16 + +/** Length of 'y.onion' including '.onion' URL. */ +#define REND_SERVICE_ADDRESS_LEN (16+1+5) + +/** Length of a binary-encoded rendezvous service ID. */ +#define REND_SERVICE_ID_LEN 10 + +/** Time period for which a v2 descriptor will be valid. */ +#define REND_TIME_PERIOD_V2_DESC_VALIDITY (24*60*60) + +/** Time period within which two sets of v2 descriptors will be uploaded in + * parallel. */ +#define REND_TIME_PERIOD_OVERLAPPING_V2_DESCS (60*60) + +/** Number of non-consecutive replicas (i.e. distributed somewhere + * in the ring) for a descriptor. */ +#define REND_NUMBER_OF_NON_CONSECUTIVE_REPLICAS 2 + +/** Number of consecutive replicas for a descriptor. */ +#define REND_NUMBER_OF_CONSECUTIVE_REPLICAS 3 + +/** Length of v2 descriptor ID (32 base32 chars = 160 bits). */ +#define REND_DESC_ID_V2_LEN_BASE32 BASE32_DIGEST_LEN + +/** Length of the base32-encoded secret ID part of versioned hidden service + * descriptors. */ +#define REND_SECRET_ID_PART_LEN_BASE32 BASE32_DIGEST_LEN + +/** Length of the base32-encoded hash of an introduction point's + * identity key. */ +#define REND_INTRO_POINT_ID_LEN_BASE32 BASE32_DIGEST_LEN + +/** Length of the descriptor cookie that is used for client authorization + * to hidden services. */ +#define REND_DESC_COOKIE_LEN 16 + +/** Length of the base64-encoded descriptor cookie that is used for + * exchanging client authorization between hidden service and client. */ +#define REND_DESC_COOKIE_LEN_BASE64 22 + +/** Length of client identifier in encrypted introduction points for hidden + * service authorization type 'basic'. */ +#define REND_BASIC_AUTH_CLIENT_ID_LEN 4 + +/** Multiple of the number of clients to which the real number of clients + * is padded with fake clients for hidden service authorization type + * 'basic'. */ +#define REND_BASIC_AUTH_CLIENT_MULTIPLE 16 + +/** Length of client entry consisting of client identifier and encrypted + * session key for hidden service authorization type 'basic'. */ +#define REND_BASIC_AUTH_CLIENT_ENTRY_LEN (REND_BASIC_AUTH_CLIENT_ID_LEN \ + + CIPHER_KEY_LEN) + +/** Maximum size of v2 hidden service descriptors. */ +#define REND_DESC_MAX_SIZE (20 * 1024) + +/** Legal characters for use in authorized client names for a hidden + * service. */ +#define REND_LEGAL_CLIENTNAME_CHARACTERS \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-_" + +/** Maximum length of authorized client names for a hidden service. */ +#define REND_CLIENTNAME_MAX_LEN 16 + +/** Length of the rendezvous cookie that is used to connect circuits at the + * rendezvous point. */ +#define REND_COOKIE_LEN DIGEST_LEN + +/** Client authorization type that a hidden service performs. */ +typedef enum rend_auth_type_t { + REND_NO_AUTH = 0, + REND_BASIC_AUTH = 1, + REND_STEALTH_AUTH = 2, +} rend_auth_type_t; + +/** Client-side configuration of authorization for a hidden service. */ +typedef struct rend_service_authorization_t { + uint8_t descriptor_cookie[REND_DESC_COOKIE_LEN]; + char onion_address[REND_SERVICE_ADDRESS_LEN+1]; + rend_auth_type_t auth_type; +} rend_service_authorization_t; + +/** Client- and server-side data that is used for hidden service connection + * establishment. Not all fields contain data depending on where this struct + * is used. */ +typedef struct rend_data_t { + /* Hidden service protocol version of this base object. */ + uint32_t version; + + /** List of HSDir fingerprints on which this request has been sent to. This + * contains binary identity digest of the directory of size DIGEST_LEN. */ + smartlist_t *hsdirs_fp; + + /** Rendezvous cookie used by both, client and service. */ + char rend_cookie[REND_COOKIE_LEN]; + + /** Number of streams associated with this rendezvous circuit. */ + int nr_streams; +} rend_data_t; + +typedef struct rend_data_v2_t { + /* Rendezvous base data. */ + rend_data_t base_; + + /** Onion address (without the .onion part) that a client requests. */ + char onion_address[REND_SERVICE_ID_LEN_BASE32+1]; + + /** Descriptor ID for each replicas computed from the onion address. If + * the onion address is empty, this array MUST be empty. We keep them so + * we know when to purge our entry in the last hsdir request table. */ + char descriptor_id[REND_NUMBER_OF_NON_CONSECUTIVE_REPLICAS][DIGEST_LEN]; + + /** (Optional) descriptor cookie that is used by a client. */ + char descriptor_cookie[REND_DESC_COOKIE_LEN]; + + /** Authorization type for accessing a service used by a client. */ + rend_auth_type_t auth_type; + + /** Descriptor ID for a client request. The control port command HSFETCH + * uses this. It's set if the descriptor query should only use this + * descriptor ID. */ + char desc_id_fetch[DIGEST_LEN]; + + /** Hash of the hidden service's PK used by a service. */ + char rend_pk_digest[DIGEST_LEN]; +} rend_data_v2_t; + +/* From a base rend_data_t object <b>d</d>, return the v2 object. */ +static inline +rend_data_v2_t *TO_REND_DATA_V2(const rend_data_t *d) +{ + tor_assert(d); + tor_assert(d->version == 2); + return DOWNCAST(rend_data_v2_t, d); +} + +/* Stub because we can't include hs_ident.h. */ +struct hs_ident_edge_conn_t; +struct hs_ident_dir_conn_t; +struct hs_ident_circuit_t; + +typedef struct hsdir_index_t hsdir_index_t; + +/** Time interval for tracking replays of DH public keys received in + * INTRODUCE2 cells. Used only to avoid launching multiple + * simultaneous attempts to connect to the same rendezvous point. */ +#define REND_REPLAY_TIME_INTERVAL (5 * 60) + +/** Used to indicate which way a cell is going on a circuit. */ +typedef enum { + CELL_DIRECTION_IN=1, /**< The cell is moving towards the origin. */ + CELL_DIRECTION_OUT=2, /**< The cell is moving away from the origin. */ +} cell_direction_t; + ++/** ++ * An enum to allow us to specify which channel in a circuit ++ * we're interested in. ++ * ++ * This is needed because our data structures and other fields ++ * for channel delivery are disassociated from the channel. ++ */ ++typedef enum { ++ CIRCUIT_N_CHAN = 0, ++ CIRCUIT_P_CHAN = 1 ++} circuit_channel_direction_t; ++ +/** Initial value for both sides of a circuit transmission window when the + * circuit is initialized. Measured in cells. */ +#define CIRCWINDOW_START 1000 +#define CIRCWINDOW_START_MIN 100 +#define CIRCWINDOW_START_MAX 1000 +/** Amount to increment a circuit window when we get a circuit SENDME. */ +#define CIRCWINDOW_INCREMENT 100 +/** Initial value on both sides of a stream transmission window when the + * stream is initialized. Measured in cells. */ +#define STREAMWINDOW_START 500 +#define STREAMWINDOW_START_MAX 500 +/** Amount to increment a stream window when we get a stream SENDME. */ +#define STREAMWINDOW_INCREMENT 50 + +/** Maximum number of queued cells on a circuit for which we are the + * midpoint before we give up and kill it. This must be >= circwindow + * to avoid killing innocent circuits, and >= circwindow*2 to give + * leaky-pipe a chance of working someday. The ORCIRC_MAX_MIDDLE_KILL_THRESH + * ratio controls the margin of error between emitting a warning and + * killing the circuit. + */ +#define ORCIRC_MAX_MIDDLE_CELLS (CIRCWINDOW_START_MAX*2) +/** Ratio of hard (circuit kill) to soft (warning) thresholds for the + * ORCIRC_MAX_MIDDLE_CELLS tests. + */ +#define ORCIRC_MAX_MIDDLE_KILL_THRESH (1.1f) + +/* Cell commands. These values are defined in tor-spec.txt. */ +#define CELL_PADDING 0 +#define CELL_CREATE 1 +#define CELL_CREATED 2 +#define CELL_RELAY 3 +#define CELL_DESTROY 4 +#define CELL_CREATE_FAST 5 +#define CELL_CREATED_FAST 6 +#define CELL_VERSIONS 7 +#define CELL_NETINFO 8 +#define CELL_RELAY_EARLY 9 +#define CELL_CREATE2 10 +#define CELL_CREATED2 11 +#define CELL_PADDING_NEGOTIATE 12 + +#define CELL_VPADDING 128 +#define CELL_CERTS 129 +#define CELL_AUTH_CHALLENGE 130 +#define CELL_AUTHENTICATE 131 +#define CELL_AUTHORIZE 132 +#define CELL_COMMAND_MAX_ 132 + +/** How long to test reachability before complaining to the user. */ +#define TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT (20*60) + +/** Legal characters in a nickname. */ +#define LEGAL_NICKNAME_CHARACTERS \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + +/** Name to use in client TLS certificates if no nickname is given. Once + * Tor 0.1.2.x is obsolete, we can remove this. */ +#define DEFAULT_CLIENT_NICKNAME "client" + +/** Name chosen by routers that don't configure nicknames */ +#define UNNAMED_ROUTER_NICKNAME "Unnamed" + +/** Number of bytes in a SOCKS4 header. */ +#define SOCKS4_NETWORK_LEN 8 + +/* + * Relay payload: + * Relay command [1 byte] + * Recognized [2 bytes] + * Stream ID [2 bytes] + * Partial SHA-1 [4 bytes] + * Length [2 bytes] + * Relay payload [498 bytes] + */ + +/** Number of bytes in a cell, minus cell header. */ +#define CELL_PAYLOAD_SIZE 509 +/** Number of bytes in a cell transmitted over the network, in the longest + * form */ +#define CELL_MAX_NETWORK_SIZE 514 + +/** Maximum length of a header on a variable-length cell. */ +#define VAR_CELL_MAX_HEADER_SIZE 7 + +static int get_cell_network_size(int wide_circ_ids); +static inline int get_cell_network_size(int wide_circ_ids) +{ + return wide_circ_ids ? CELL_MAX_NETWORK_SIZE : CELL_MAX_NETWORK_SIZE - 2; +} +static int get_var_cell_header_size(int wide_circ_ids); +static inline int get_var_cell_header_size(int wide_circ_ids) +{ + return wide_circ_ids ? VAR_CELL_MAX_HEADER_SIZE : + VAR_CELL_MAX_HEADER_SIZE - 2; +} +static int get_circ_id_size(int wide_circ_ids); +static inline int get_circ_id_size(int wide_circ_ids) +{ + return wide_circ_ids ? 4 : 2; +} + +/** Number of bytes in a relay cell's header (not including general cell + * header). */ +#define RELAY_HEADER_SIZE (1+2+2+4+2) +/** Largest number of bytes that can fit in a relay cell payload. */ +#define RELAY_PAYLOAD_SIZE (CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) + +/** Identifies a circuit on an or_connection */ +typedef uint32_t circid_t; +/** Identifies a stream on a circuit */ +typedef uint16_t streamid_t; + +/* channel_t typedef; struct channel_s is in channel.h */ + +typedef struct channel_s channel_t; + +/* channel_listener_t typedef; struct channel_listener_s is in channel.h */ + +typedef struct channel_listener_s channel_listener_t; + +/* TLS channel stuff */ + +typedef struct channel_tls_s channel_tls_t; + +/* circuitmux_t typedef; struct circuitmux_s is in circuitmux.h */ + +typedef struct circuitmux_s circuitmux_t; + +typedef struct cell_t cell_t; +typedef struct var_cell_t var_cell_t; +typedef struct packed_cell_t packed_cell_t; +typedef struct cell_queue_t cell_queue_t; +typedef struct destroy_cell_t destroy_cell_t; +typedef struct destroy_cell_queue_t destroy_cell_queue_t; +typedef struct ext_or_cmd_t ext_or_cmd_t; + +/** Beginning of a RELAY cell payload. */ +typedef struct { + uint8_t command; /**< The end-to-end relay command. */ + uint16_t recognized; /**< Used to tell whether cell is for us. */ + streamid_t stream_id; /**< Which stream is this cell associated with? */ + char integrity[4]; /**< Used to tell whether cell is corrupted. */ + uint16_t length; /**< How long is the payload body? */ +} relay_header_t; + +typedef struct socks_request_t socks_request_t; +typedef struct entry_port_cfg_t entry_port_cfg_t; +typedef struct server_port_cfg_t server_port_cfg_t; + +/** Minimum length of the random part of an AUTH_CHALLENGE cell. */ +#define OR_AUTH_CHALLENGE_LEN 32 + +/** + * @name Certificate types for CERTS cells. + * + * These values are defined by the protocol, and affect how an X509 + * certificate in a CERTS cell is interpreted and used. + * + * @{ */ +/** A certificate that authenticates a TLS link key. The subject key + * must match the key used in the TLS handshake; it must be signed by + * the identity key. */ +#define OR_CERT_TYPE_TLS_LINK 1 +/** A self-signed identity certificate. The subject key must be a + * 1024-bit RSA key. */ +#define OR_CERT_TYPE_ID_1024 2 +/** A certificate that authenticates a key used in an AUTHENTICATE cell + * in the v3 handshake. The subject key must be a 1024-bit RSA key; it + * must be signed by the identity key */ +#define OR_CERT_TYPE_AUTH_1024 3 +/* DOCDOC */ +#define OR_CERT_TYPE_RSA_ED_CROSSCERT 7 +/**@}*/ + +/** The first supported type of AUTHENTICATE cell. It contains + * a bunch of structures signed with an RSA1024 key. The signed + * structures include a HMAC using negotiated TLS secrets, and a digest + * of all cells sent or received before the AUTHENTICATE cell (including + * the random server-generated AUTH_CHALLENGE cell). + */ +#define AUTHTYPE_RSA_SHA256_TLSSECRET 1 +/** As AUTHTYPE_RSA_SHA256_TLSSECRET, but instead of using the + * negotiated TLS secrets, uses exported keying material from the TLS + * session as described in RFC 5705. + * + * Not used by today's tors, since everything that supports this + * also supports ED25519_SHA256_5705, which is better. + **/ +#define AUTHTYPE_RSA_SHA256_RFC5705 2 +/** As AUTHTYPE_RSA_SHA256_RFC5705, but uses an Ed25519 identity key to + * authenticate. */ +#define AUTHTYPE_ED25519_SHA256_RFC5705 3 +/* + * NOTE: authchallenge_type_is_better() relies on these AUTHTYPE codes + * being sorted in order of preference. If we someday add one with + * a higher numerical value that we don't like as much, we should revise + * authchallenge_type_is_better(). + */ + +/** The length of the part of the AUTHENTICATE cell body that the client and + * server can generate independently (when using RSA_SHA256_TLSSECRET). It + * contains everything except the client's timestamp, the client's randomly + * generated nonce, and the signature. */ +#define V3_AUTH_FIXED_PART_LEN (8+(32*6)) +/** The length of the part of the AUTHENTICATE cell body that the client + * signs. */ +#define V3_AUTH_BODY_LEN (V3_AUTH_FIXED_PART_LEN + 8 + 16) + +typedef struct or_handshake_certs_t or_handshake_certs_t; +typedef struct or_handshake_state_t or_handshake_state_t; + +/** Length of Extended ORPort connection identifier. */ +#define EXT_OR_CONN_ID_LEN DIGEST_LEN /* 20 */ +/* + * OR_CONN_HIGHWATER and OR_CONN_LOWWATER moved from connection_or.c so + * channeltls.c can see them too. + */ + +/** When adding cells to an OR connection's outbuf, keep adding until the + * outbuf is at least this long, or we run out of cells. */ +#define OR_CONN_HIGHWATER (32*1024) + +/** Add cells to an OR connection's outbuf whenever the outbuf's data length + * drops below this size. */ +#define OR_CONN_LOWWATER (16*1024) + +typedef struct connection_t connection_t; +typedef struct control_connection_t control_connection_t; +typedef struct dir_connection_t dir_connection_t; +typedef struct edge_connection_t edge_connection_t; +typedef struct entry_connection_t entry_connection_t; +typedef struct listener_connection_t listener_connection_t; +typedef struct or_connection_t or_connection_t; + +/** Cast a connection_t subtype pointer to a connection_t **/ +#define TO_CONN(c) (&(((c)->base_))) + +/** Cast a entry_connection_t subtype pointer to a connection_t **/ +#define ENTRY_TO_CONN(c) (TO_CONN(ENTRY_TO_EDGE_CONN(c))) + +typedef struct addr_policy_t addr_policy_t; + +typedef struct cached_dir_t cached_dir_t; + +/** Enum used to remember where a signed_descriptor_t is stored and how to + * manage the memory for signed_descriptor_body. */ +typedef enum { + /** The descriptor isn't stored on disk at all: the copy in memory is + * canonical; the saved_offset field is meaningless. */ + SAVED_NOWHERE=0, + /** The descriptor is stored in the cached_routers file: the + * signed_descriptor_body is meaningless; the signed_descriptor_len and + * saved_offset are used to index into the mmaped cache file. */ + SAVED_IN_CACHE, + /** The descriptor is stored in the cached_routers.new file: the + * signed_descriptor_body and saved_offset fields are both set. */ + /* FFFF (We could also mmap the file and grow the mmap as needed, or + * lazy-load the descriptor text by using seek and read. We don't, for + * now.) + */ + SAVED_IN_JOURNAL +} saved_location_t; +#define saved_location_bitfield_t ENUM_BF(saved_location_t) + +/** Enumeration: what directory object is being downloaded? + * This determines which schedule is selected to perform the download. */ +typedef enum { + DL_SCHED_GENERIC = 0, + DL_SCHED_CONSENSUS = 1, + DL_SCHED_BRIDGE = 2, +} download_schedule_t; +#define download_schedule_bitfield_t ENUM_BF(download_schedule_t) + +/** Enumeration: is the download schedule for downloading from an authority, + * or from any available directory mirror? + * During bootstrap, "any" means a fallback (or an authority, if there + * are no fallbacks). + * When we have a valid consensus, "any" means any directory server. */ +typedef enum { + DL_WANT_ANY_DIRSERVER = 0, + DL_WANT_AUTHORITY = 1, +} download_want_authority_t; +#define download_want_authority_bitfield_t \ + ENUM_BF(download_want_authority_t) + +/** Enumeration: do we want to increment the schedule position each time a + * connection is attempted (these attempts can be concurrent), or do we want + * to increment the schedule position after a connection fails? */ +typedef enum { + DL_SCHED_INCREMENT_FAILURE = 0, + DL_SCHED_INCREMENT_ATTEMPT = 1, +} download_schedule_increment_t; +#define download_schedule_increment_bitfield_t \ + ENUM_BF(download_schedule_increment_t) + +typedef struct download_status_t download_status_t; + +/** If n_download_failures is this high, the download can never happen. */ +#define IMPOSSIBLE_TO_DOWNLOAD 255 + +/** The max size we expect router descriptor annotations we create to + * be. We'll accept larger ones if we see them on disk, but we won't + * create any that are larger than this. */ +#define ROUTER_ANNOTATION_BUF_LEN 256 + +typedef struct signed_descriptor_t signed_descriptor_t; + +/** A signed integer representing a country code. */ +typedef int16_t country_t; + +/** Flags used to summarize the declared protocol versions of a relay, + * so we don't need to parse them again and again. */ +typedef struct protover_summary_flags_t { + /** True iff we have a proto line for this router, or a versions line + * from which we could infer the protocols. */ + unsigned int protocols_known:1; + + /** True iff this router has a version or protocol list that allows it to + * accept EXTEND2 cells. This requires Relay=2. */ + unsigned int supports_extend2_cells:1; + + /** True iff this router has a protocol list that allows it to negotiate + * ed25519 identity keys on a link handshake with us. This + * requires LinkAuth=3. */ + unsigned int supports_ed25519_link_handshake_compat:1; + + /** True iff this router has a protocol list that allows it to negotiate + * ed25519 identity keys on a link handshake, at all. This requires some + * LinkAuth=X for X >= 3. */ + unsigned int supports_ed25519_link_handshake_any:1; + + /** True iff this router has a protocol list that allows it to be an + * introduction point supporting ed25519 authentication key which is part of + * the v3 protocol detailed in proposal 224. This requires HSIntro=4. */ + unsigned int supports_ed25519_hs_intro : 1; + + /** True iff this router has a protocol list that allows it to be an hidden + * service directory supporting version 3 as seen in proposal 224. This + * requires HSDir=2. */ + unsigned int supports_v3_hsdir : 1; + + /** True iff this router has a protocol list that allows it to be an hidden + * service rendezvous point supporting version 3 as seen in proposal 224. + * This requires HSRend=2. */ + unsigned int supports_v3_rendezvous_point: 1; +} protover_summary_flags_t; + +typedef struct routerinfo_t routerinfo_t; +typedef struct extrainfo_t extrainfo_t; +typedef struct routerstatus_t routerstatus_t; + +typedef struct microdesc_t microdesc_t; +typedef struct node_t node_t; +typedef struct vote_microdesc_hash_t vote_microdesc_hash_t; +typedef struct vote_routerstatus_t vote_routerstatus_t; +typedef struct document_signature_t document_signature_t; +typedef struct networkstatus_voter_info_t networkstatus_voter_info_t; +typedef struct networkstatus_sr_info_t networkstatus_sr_info_t; + +/** Enumerates recognized flavors of a consensus networkstatus document. All + * flavors of a consensus are generated from the same set of votes, but they + * present different types information to different versions of Tor. */ +typedef enum { + FLAV_NS = 0, + FLAV_MICRODESC = 1, +} consensus_flavor_t; + +/** How many different consensus flavors are there? */ +#define N_CONSENSUS_FLAVORS ((int)(FLAV_MICRODESC)+1) + +typedef struct networkstatus_t networkstatus_t; +typedef struct ns_detached_signatures_t ns_detached_signatures_t; +typedef struct desc_store_t desc_store_t; +typedef struct routerlist_t routerlist_t; +typedef struct extend_info_t extend_info_t; +typedef struct authority_cert_t authority_cert_t; + +/** Bitfield enum type listing types of information that directory authorities + * can be authoritative about, and that directory caches may or may not cache. + * + * Note that the granularity here is based on authority granularity and on + * cache capabilities. Thus, one particular bit may correspond in practice to + * a few types of directory info, so long as every authority that pronounces + * officially about one of the types prounounces officially about all of them, + * and so long as every cache that caches one of them caches all of them. + */ +typedef enum { + NO_DIRINFO = 0, + /** Serves/signs v3 directory information: votes, consensuses, certs */ + V3_DIRINFO = 1 << 2, + /** Serves bridge descriptors. */ + BRIDGE_DIRINFO = 1 << 4, + /** Serves extrainfo documents. */ + EXTRAINFO_DIRINFO=1 << 5, + /** Serves microdescriptors. */ + MICRODESC_DIRINFO=1 << 6, +} dirinfo_type_t; + +#define ALL_DIRINFO ((dirinfo_type_t)((1<<7)-1)) + +#define ONION_HANDSHAKE_TYPE_TAP 0x0000 +#define ONION_HANDSHAKE_TYPE_FAST 0x0001 +#define ONION_HANDSHAKE_TYPE_NTOR 0x0002 +#define MAX_ONION_HANDSHAKE_TYPE 0x0002 + +typedef struct onion_handshake_state_t onion_handshake_state_t; +typedef struct relay_crypto_t relay_crypto_t; +typedef struct crypt_path_t crypt_path_t; +typedef struct crypt_path_reference_t crypt_path_reference_t; + +#define CPATH_KEY_MATERIAL_LEN (20*2+16*2) + +typedef struct cpath_build_state_t cpath_build_state_t; + +struct create_cell_t; + +/** Entry in the cell stats list of a circuit; used only if CELL_STATS + * events are enabled. */ +typedef struct testing_cell_stats_entry_t { + uint8_t command; /**< cell command number. */ + /** Waiting time in centiseconds if this event is for a removed cell, + * or 0 if this event is for adding a cell to the queue. 22 bits can + * store more than 11 hours, enough to assume that a circuit with this + * delay would long have been closed. */ + unsigned int waiting_time:22; + unsigned int removed:1; /**< 0 for added to, 1 for removed from queue. */ + unsigned int exitward:1; /**< 0 for app-ward, 1 for exit-ward. */ +} testing_cell_stats_entry_t; + +typedef struct circuit_t circuit_t; +typedef struct origin_circuit_t origin_circuit_t; +typedef struct or_circuit_t or_circuit_t; + +/** Largest number of relay_early cells that we can send on a given + * circuit. */ +#define MAX_RELAY_EARLY_CELLS_PER_CIRCUIT 8 + +typedef enum path_state_t path_state_t; +#define path_state_bitfield_t ENUM_BF(path_state_t) + +#if REND_COOKIE_LEN != DIGEST_LEN +#error "The REND_TOKEN_LEN macro assumes REND_COOKIE_LEN == DIGEST_LEN" +#endif +#define REND_TOKEN_LEN DIGEST_LEN + +/** Convert a circuit subtype to a circuit_t. */ +#define TO_CIRCUIT(x) (&((x)->base_)) + +/** @name Isolation flags + + Ways to isolate client streams + + @{ +*/ +/** Isolate based on destination port */ +#define ISO_DESTPORT (1u<<0) +/** Isolate based on destination address */ +#define ISO_DESTADDR (1u<<1) +/** Isolate based on SOCKS authentication */ +#define ISO_SOCKSAUTH (1u<<2) +/** Isolate based on client protocol choice */ +#define ISO_CLIENTPROTO (1u<<3) +/** Isolate based on client address */ +#define ISO_CLIENTADDR (1u<<4) +/** Isolate based on session group (always on). */ +#define ISO_SESSIONGRP (1u<<5) +/** Isolate based on newnym epoch (always on). */ +#define ISO_NYM_EPOCH (1u<<6) +/** Isolate all streams (Internal only). */ +#define ISO_STREAM (1u<<7) +/**@}*/ + +/** Default isolation level for ports. */ +#define ISO_DEFAULT (ISO_CLIENTADDR|ISO_SOCKSAUTH|ISO_SESSIONGRP|ISO_NYM_EPOCH) + +/** Indicates that we haven't yet set a session group on a port_cfg_t. */ +#define SESSION_GROUP_UNSET -1 +/** Session group reserved for directory connections */ +#define SESSION_GROUP_DIRCONN -2 +/** Session group reserved for resolve requests launched by a controller */ +#define SESSION_GROUP_CONTROL_RESOLVE -3 +/** First automatically allocated session group number */ +#define SESSION_GROUP_FIRST_AUTO -4 + +typedef struct port_cfg_t port_cfg_t; +typedef struct routerset_t routerset_t; + +/** A magic value for the (Socks|OR|...)Port options below, telling Tor + * to pick its own port. */ +#define CFG_AUTO_PORT 0xc4005e + +typedef struct or_options_t or_options_t; + +#define LOG_PROTOCOL_WARN (get_protocol_warning_severity_level()) + +typedef struct or_state_t or_state_t; + +#define MAX_SOCKS_ADDR_LEN 256 + +/********************************* circuitbuild.c **********************/ + +/** How many hops does a general-purpose circuit have by default? */ +#define DEFAULT_ROUTE_LEN 3 + +/* Circuit Build Timeout "public" structures. */ + +/** Precision multiplier for the Bw weights */ +#define BW_WEIGHT_SCALE 10000 +#define BW_MIN_WEIGHT_SCALE 1 +#define BW_MAX_WEIGHT_SCALE INT32_MAX + +typedef struct circuit_build_times_s circuit_build_times_t; + +/********************************* config.c ***************************/ + +/********************************* connection_edge.c *************************/ + +/** Enumerates possible origins of a client-side address mapping. */ +typedef enum { + /** We're remapping this address because the controller told us to. */ + ADDRMAPSRC_CONTROLLER, + /** We're remapping this address because of an AutomapHostsOnResolve + * configuration. */ + ADDRMAPSRC_AUTOMAP, + /** We're remapping this address because our configuration (via torrc, the + * command line, or a SETCONF command) told us to. */ + ADDRMAPSRC_TORRC, + /** We're remapping this address because we have TrackHostExit configured, + * and we want to remember to use the same exit next time. */ + ADDRMAPSRC_TRACKEXIT, + /** We're remapping this address because we got a DNS resolution from a + * Tor server that told us what its value was. */ + ADDRMAPSRC_DNS, + + /** No remapping has occurred. This isn't a possible value for an + * addrmap_entry_t; it's used as a null value when we need to answer "Why + * did this remapping happen." */ + ADDRMAPSRC_NONE +} addressmap_entry_source_t; +#define addressmap_entry_source_bitfield_t ENUM_BF(addressmap_entry_source_t) + +#define WRITE_STATS_INTERVAL (24*60*60) + +/********************************* dirvote.c ************************/ + +typedef struct vote_timing_t vote_timing_t; + +/********************************* microdesc.c *************************/ + +typedef struct microdesc_cache_t microdesc_cache_t; + +/********************************* rendcommon.c ***************************/ + +typedef struct rend_authorized_client_t rend_authorized_client_t; +typedef struct rend_encoded_v2_service_descriptor_t + rend_encoded_v2_service_descriptor_t; + +/** The maximum number of non-circuit-build-timeout failures a hidden + * service client will tolerate while trying to build a circuit to an + * introduction point. See also rend_intro_point_t.unreachable_count. */ +#define MAX_INTRO_POINT_REACHABILITY_FAILURES 5 + +/** The minimum and maximum number of distinct INTRODUCE2 cells which a + * hidden service's introduction point will receive before it begins to + * expire. */ +#define INTRO_POINT_MIN_LIFETIME_INTRODUCTIONS 16384 +/* Double the minimum value so the interval is [min, min * 2]. */ +#define INTRO_POINT_MAX_LIFETIME_INTRODUCTIONS \ + (INTRO_POINT_MIN_LIFETIME_INTRODUCTIONS * 2) + +/** The minimum number of seconds that an introduction point will last + * before expiring due to old age. (If it receives + * INTRO_POINT_LIFETIME_INTRODUCTIONS INTRODUCE2 cells, it may expire + * sooner.) + * + * XXX Should this be configurable? */ +#define INTRO_POINT_LIFETIME_MIN_SECONDS (18*60*60) +/** The maximum number of seconds that an introduction point will last + * before expiring due to old age. + * + * XXX Should this be configurable? */ +#define INTRO_POINT_LIFETIME_MAX_SECONDS (24*60*60) + +/** The maximum number of circuit creation retry we do to an intro point + * before giving up. We try to reuse intro point that fails during their + * lifetime so this is a hard limit on the amount of time we do that. */ +#define MAX_INTRO_POINT_CIRCUIT_RETRIES 3 + +typedef struct rend_intro_point_t rend_intro_point_t; +typedef struct rend_service_descriptor_t rend_service_descriptor_t; + +/********************************* routerlist.c ***************************/ + +typedef struct dir_server_t dir_server_t; + +#define RELAY_REQUIRED_MIN_BANDWIDTH (75*1024) +#define BRIDGE_REQUIRED_MIN_BANDWIDTH (50*1024) + +#define ROUTER_MAX_DECLARED_BANDWIDTH INT32_MAX + +typedef struct tor_version_t tor_version_t; + +#endif /* !defined(TOR_OR_H) */ diff --cc src/core/or/relay.c index 407873e2a,000000000..260d84797 mode 100644,000000..100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@@ -1,3168 -1,0 +1,3169 @@@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file relay.c + * \brief Handle relay cell encryption/decryption, plus packaging and + * receiving from circuits, plus queuing on circuits. + * + * This is a core modules that makes Tor work. It's responsible for + * dealing with RELAY cells (the ones that travel more than one hop along a + * circuit), by: + * <ul> + * <li>constructing relays cells, + * <li>encrypting relay cells, + * <li>decrypting relay cells, + * <li>demultiplexing relay cells as they arrive on a connection, + * <li>queueing relay cells for retransmission, + * <li>or handling relay cells that are for us to receive (as an exit or a + * client). + * </ul> + * + * RELAY cells are generated throughout the code at the client or relay side, + * using relay_send_command_from_edge() or one of the functions like + * connection_edge_send_command() that calls it. Of particular interest is + * connection_edge_package_raw_inbuf(), which takes information that has + * arrived on an edge connection socket, and packages it as a RELAY_DATA cell + * -- this is how information is actually sent across the Tor network. The + * cryptography for these functions is handled deep in + * circuit_package_relay_cell(), which either adds a single layer of + * encryption (if we're an exit), or multiple layers (if we're the origin of + * the circuit). After construction and encryption, the RELAY cells are + * passed to append_cell_to_circuit_queue(), which queues them for + * transmission and tells the circuitmux (see circuitmux.c) that the circuit + * is waiting to send something. + * + * Incoming RELAY cells arrive at circuit_receive_relay_cell(), called from + * command.c. There they are decrypted and, if they are for us, are passed to + * connection_edge_process_relay_cell(). If they're not for us, they're + * re-queued for retransmission again with append_cell_to_circuit_queue(). + * + * The connection_edge_process_relay_cell() function handles all the different + * types of relay cells, launching requests or transmitting data as needed. + **/ + +#define RELAY_PRIVATE +#include "core/or/or.h" +#include "feature/client/addressmap.h" +#include "lib/err/backtrace.h" +#include "lib/container/buffers.h" +#include "core/or/channel.h" +#include "feature/client/circpathbias.h" +#include "core/or/circuitbuild.h" +#include "core/or/circuitlist.h" +#include "core/or/circuituse.h" +#include "lib/compress/compress.h" +#include "app/config/config.h" +#include "core/mainloop/connection.h" +#include "core/or/connection_edge.h" +#include "core/or/connection_or.h" +#include "feature/control/control.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/crypt_ops/crypto_util.h" +#include "feature/dircache/directory.h" +#include "feature/relay/dns.h" +#include "feature/stats/geoip.h" +#include "feature/hs/hs_cache.h" +#include "core/mainloop/main.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/nodelist.h" +#include "core/crypto/onion.h" +#include "core/or/policies.h" +#include "core/or/reasons.h" +#include "core/or/relay.h" +#include "core/crypto/relay_crypto.h" +#include "feature/rend/rendcache.h" +#include "feature/rend/rendcommon.h" +#include "feature/relay/router.h" +#include "feature/nodelist/routerlist.h" +#include "feature/nodelist/routerparse.h" +#include "core/or/scheduler.h" +#include "feature/stats/rephist.h" + +#include "core/or/cell_st.h" +#include "core/or/cell_queue_st.h" +#include "core/or/cpath_build_state_st.h" +#include "feature/dircommon/dir_connection_st.h" +#include "core/or/destroy_cell_queue_st.h" +#include "core/or/entry_connection_st.h" +#include "core/or/extend_info_st.h" +#include "core/or/or_circuit_st.h" +#include "core/or/origin_circuit_st.h" +#include "feature/nodelist/routerinfo_st.h" +#include "core/or/socks_request_st.h" + +#include "lib/intmath/weakrng.h" + +static edge_connection_t *relay_lookup_conn(circuit_t *circ, cell_t *cell, + cell_direction_t cell_direction, + crypt_path_t *layer_hint); + +static void circuit_consider_sending_sendme(circuit_t *circ, + crypt_path_t *layer_hint); +static void circuit_resume_edge_reading(circuit_t *circ, + crypt_path_t *layer_hint); +static int circuit_resume_edge_reading_helper(edge_connection_t *conn, + circuit_t *circ, + crypt_path_t *layer_hint); +static int circuit_consider_stop_edge_reading(circuit_t *circ, + crypt_path_t *layer_hint); +static int circuit_queue_streams_are_blocked(circuit_t *circ); +static void adjust_exit_policy_from_exitpolicy_failure(origin_circuit_t *circ, + entry_connection_t *conn, + node_t *node, + const tor_addr_t *addr); + +/** Stop reading on edge connections when we have this many cells + * waiting on the appropriate queue. */ +#define CELL_QUEUE_HIGHWATER_SIZE 256 +/** Start reading from edge connections again when we get down to this many + * cells. */ +#define CELL_QUEUE_LOWWATER_SIZE 64 + +/** Stats: how many relay cells have originated at this hop, or have + * been relayed onward (not recognized at this hop)? + */ +uint64_t stats_n_relay_cells_relayed = 0; +/** Stats: how many relay cells have been delivered to streams at this + * hop? + */ +uint64_t stats_n_relay_cells_delivered = 0; +/** Stats: how many circuits have we closed due to the cell queue limit being + * reached (see append_cell_to_circuit_queue()) */ +uint64_t stats_n_circ_max_cell_reached = 0; + +/** Used to tell which stream to read from first on a circuit. */ +static tor_weak_rng_t stream_choice_rng = TOR_WEAK_RNG_INIT; + +/** + * Update channel usage state based on the type of relay cell and + * circuit properties. + * + * This is needed to determine if a client channel is being + * used for application traffic, and if a relay channel is being + * used for multihop circuits and application traffic. The decision + * to pad in channelpadding.c depends upon this info (as well as + * consensus parameters) to decide what channels to pad. + */ +static void +circuit_update_channel_usage(circuit_t *circ, cell_t *cell) +{ + if (CIRCUIT_IS_ORIGIN(circ)) { + /* + * The client state was first set much earlier in + * circuit_send_next_onion_skin(), so we can start padding as early as + * possible. + * + * However, if padding turns out to be expensive, we may want to not do + * it until actual application traffic starts flowing (which is controlled + * via consensus param nf_pad_before_usage). + * + * So: If we're an origin circuit and we've created a full length circuit, + * then any CELL_RELAY cell means application data. Increase the usage + * state of the channel to indicate this. + * + * We want to wait for CELL_RELAY specifically here, so we know that + * the channel was definitely being used for data and not for extends. + * By default, we pad as soon as a channel has been used for *any* + * circuits, so this state is irrelevant to the padding decision in + * the default case. However, if padding turns out to be expensive, + * we would like the ability to avoid padding until we're absolutely + * sure that a channel is used for enough application data to be worth + * padding. + * + * (So it does not matter that CELL_RELAY_EARLY can actually contain + * application data. This is only a load reducing option and that edge + * case does not matter if we're desperately trying to reduce overhead + * anyway. See also consensus parameter nf_pad_before_usage). + */ + if (BUG(!circ->n_chan)) + return; + + if (circ->n_chan->channel_usage == CHANNEL_USED_FOR_FULL_CIRCS && + cell->command == CELL_RELAY) { + circ->n_chan->channel_usage = CHANNEL_USED_FOR_USER_TRAFFIC; + } + } else { + /* If we're a relay circuit, the question is more complicated. Basically: + * we only want to pad connections that carry multihop (anonymous) + * circuits. + * + * We assume we're more than one hop if either the previous hop + * is not a client, or if the previous hop is a client and there's + * a next hop. Then, circuit traffic starts at RELAY_EARLY, and + * user application traffic starts when we see RELAY cells. + */ + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + + if (BUG(!or_circ->p_chan)) + return; + + if (!channel_is_client(or_circ->p_chan) || + (channel_is_client(or_circ->p_chan) && circ->n_chan)) { + if (cell->command == CELL_RELAY_EARLY) { + if (or_circ->p_chan->channel_usage < CHANNEL_USED_FOR_FULL_CIRCS) { + or_circ->p_chan->channel_usage = CHANNEL_USED_FOR_FULL_CIRCS; + } + } else if (cell->command == CELL_RELAY) { + or_circ->p_chan->channel_usage = CHANNEL_USED_FOR_USER_TRAFFIC; + } + } + } +} + +/** Receive a relay cell: + * - Crypt it (encrypt if headed toward the origin or if we <b>are</b> the + * origin; decrypt if we're headed toward the exit). + * - Check if recognized (if exitward). + * - If recognized and the digest checks out, then find if there's a stream + * that the cell is intended for, and deliver it to the right + * connection_edge. + * - If not recognized, then we need to relay it: append it to the appropriate + * cell_queue on <b>circ</b>. + * + * Return -<b>reason</b> on failure. + */ +int +circuit_receive_relay_cell(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction) +{ + channel_t *chan = NULL; + crypt_path_t *layer_hint=NULL; + char recognized=0; + int reason; + + tor_assert(cell); + tor_assert(circ); + tor_assert(cell_direction == CELL_DIRECTION_OUT || + cell_direction == CELL_DIRECTION_IN); + if (circ->marked_for_close) + return 0; + + if (relay_decrypt_cell(circ, cell, cell_direction, &layer_hint, &recognized) + < 0) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "relay crypt failed. Dropping connection."); + return -END_CIRC_REASON_INTERNAL; + } + + circuit_update_channel_usage(circ, cell); + + if (recognized) { + edge_connection_t *conn = NULL; + + if (circ->purpose == CIRCUIT_PURPOSE_PATH_BIAS_TESTING) { + if (pathbias_check_probe_response(circ, cell) == -1) { + pathbias_count_valid_cells(circ, cell); + } + + /* We need to drop this cell no matter what to avoid code that expects + * a certain purpose (such as the hidserv code). */ + return 0; + } + + conn = relay_lookup_conn(circ, cell, cell_direction, layer_hint); + if (cell_direction == CELL_DIRECTION_OUT) { + ++stats_n_relay_cells_delivered; + log_debug(LD_OR,"Sending away from origin."); + if ((reason=connection_edge_process_relay_cell(cell, circ, conn, NULL)) + < 0) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "connection_edge_process_relay_cell (away from origin) " + "failed."); + return reason; + } + } + if (cell_direction == CELL_DIRECTION_IN) { + ++stats_n_relay_cells_delivered; + log_debug(LD_OR,"Sending to origin."); + if ((reason = connection_edge_process_relay_cell(cell, circ, conn, + layer_hint)) < 0) { + /* If a client is trying to connect to unknown hidden service port, + * END_CIRC_AT_ORIGIN is sent back so we can then close the circuit. + * Do not log warn as this is an expected behavior for a service. */ + if (reason != END_CIRC_AT_ORIGIN) { + log_warn(LD_OR, + "connection_edge_process_relay_cell (at origin) failed."); + } + return reason; + } + } + return 0; + } + + /* not recognized. pass it on. */ + if (cell_direction == CELL_DIRECTION_OUT) { + cell->circ_id = circ->n_circ_id; /* switch it */ + chan = circ->n_chan; + } else if (! CIRCUIT_IS_ORIGIN(circ)) { + cell->circ_id = TO_OR_CIRCUIT(circ)->p_circ_id; /* switch it */ + chan = TO_OR_CIRCUIT(circ)->p_chan; + } else { + log_fn(LOG_PROTOCOL_WARN, LD_OR, + "Dropping unrecognized inbound cell on origin circuit."); + /* If we see unrecognized cells on path bias testing circs, + * it's bad mojo. Those circuits need to die. + * XXX: Shouldn't they always die? */ + if (circ->purpose == CIRCUIT_PURPOSE_PATH_BIAS_TESTING) { + TO_ORIGIN_CIRCUIT(circ)->path_state = PATH_STATE_USE_FAILED; + return -END_CIRC_REASON_TORPROTOCOL; + } else { + return 0; + } + } + + if (!chan) { + // XXXX Can this splice stuff be done more cleanly? + if (! CIRCUIT_IS_ORIGIN(circ) && + TO_OR_CIRCUIT(circ)->rend_splice && + cell_direction == CELL_DIRECTION_OUT) { + or_circuit_t *splice_ = TO_OR_CIRCUIT(circ)->rend_splice; + tor_assert(circ->purpose == CIRCUIT_PURPOSE_REND_ESTABLISHED); + tor_assert(splice_->base_.purpose == CIRCUIT_PURPOSE_REND_ESTABLISHED); + cell->circ_id = splice_->p_circ_id; + cell->command = CELL_RELAY; /* can't be relay_early anyway */ + if ((reason = circuit_receive_relay_cell(cell, TO_CIRCUIT(splice_), + CELL_DIRECTION_IN)) < 0) { + log_warn(LD_REND, "Error relaying cell across rendezvous; closing " + "circuits"); + /* XXXX Do this here, or just return -1? */ + circuit_mark_for_close(circ, -reason); + return reason; + } + return 0; + } + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Didn't recognize cell, but circ stops here! Closing circ."); + return -END_CIRC_REASON_TORPROTOCOL; + } + + log_debug(LD_OR,"Passing on unrecognized cell."); + + ++stats_n_relay_cells_relayed; /* XXXX no longer quite accurate {cells} + * we might kill the circ before we relay + * the cells. */ + + append_cell_to_circuit_queue(circ, chan, cell, cell_direction, 0); + return 0; +} + +/** Package a relay cell from an edge: + * - Encrypt it to the right layer + * - Append it to the appropriate cell_queue on <b>circ</b>. + */ +static int +circuit_package_relay_cell(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno) +{ + channel_t *chan; /* where to send the cell */ + + if (circ->marked_for_close) { + /* Circuit is marked; send nothing. */ + return 0; + } + + if (cell_direction == CELL_DIRECTION_OUT) { + chan = circ->n_chan; + if (!chan) { + log_warn(LD_BUG,"outgoing relay cell sent from %s:%d has n_chan==NULL." + " Dropping. Circuit is in state %s (%d), and is " + "%smarked for close. (%s:%d, %d)", filename, lineno, + circuit_state_to_string(circ->state), circ->state, + circ->marked_for_close ? "" : "not ", + circ->marked_for_close_file?circ->marked_for_close_file:"", + circ->marked_for_close, circ->marked_for_close_reason); + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_log_path(LOG_WARN, LD_BUG, TO_ORIGIN_CIRCUIT(circ)); + } + log_backtrace(LOG_WARN,LD_BUG,""); + return 0; /* just drop it */ + } + if (!CIRCUIT_IS_ORIGIN(circ)) { + log_warn(LD_BUG,"outgoing relay cell sent from %s:%d on non-origin " + "circ. Dropping.", filename, lineno); + log_backtrace(LOG_WARN,LD_BUG,""); + return 0; /* just drop it */ + } + + relay_encrypt_cell_outbound(cell, TO_ORIGIN_CIRCUIT(circ), layer_hint); + + /* Update circ written totals for control port */ + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + ocirc->n_written_circ_bw = tor_add_u32_nowrap(ocirc->n_written_circ_bw, + CELL_PAYLOAD_SIZE); + + } else { /* incoming cell */ + if (CIRCUIT_IS_ORIGIN(circ)) { + /* We should never package an _incoming_ cell from the circuit + * origin; that means we messed up somewhere. */ + log_warn(LD_BUG,"incoming relay cell at origin circuit. Dropping."); + assert_circuit_ok(circ); + return 0; /* just drop it */ + } + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + relay_encrypt_cell_inbound(cell, or_circ); + chan = or_circ->p_chan; + } + ++stats_n_relay_cells_relayed; + + append_cell_to_circuit_queue(circ, chan, cell, cell_direction, on_stream); + return 0; +} + +/** If cell's stream_id matches the stream_id of any conn that's + * attached to circ, return that conn, else return NULL. + */ +static edge_connection_t * +relay_lookup_conn(circuit_t *circ, cell_t *cell, + cell_direction_t cell_direction, crypt_path_t *layer_hint) +{ + edge_connection_t *tmpconn; + relay_header_t rh; + + relay_header_unpack(&rh, cell->payload); + + if (!rh.stream_id) + return NULL; + + /* IN or OUT cells could have come from either direction, now + * that we allow rendezvous *to* an OP. + */ + + if (CIRCUIT_IS_ORIGIN(circ)) { + for (tmpconn = TO_ORIGIN_CIRCUIT(circ)->p_streams; tmpconn; + tmpconn=tmpconn->next_stream) { + if (rh.stream_id == tmpconn->stream_id && + !tmpconn->base_.marked_for_close && + tmpconn->cpath_layer == layer_hint) { + log_debug(LD_APP,"found conn for stream %d.", rh.stream_id); + return tmpconn; + } + } + } else { + for (tmpconn = TO_OR_CIRCUIT(circ)->n_streams; tmpconn; + tmpconn=tmpconn->next_stream) { + if (rh.stream_id == tmpconn->stream_id && + !tmpconn->base_.marked_for_close) { + log_debug(LD_EXIT,"found conn for stream %d.", rh.stream_id); + if (cell_direction == CELL_DIRECTION_OUT || + connection_edge_is_rendezvous_stream(tmpconn)) + return tmpconn; + } + } + for (tmpconn = TO_OR_CIRCUIT(circ)->resolving_streams; tmpconn; + tmpconn=tmpconn->next_stream) { + if (rh.stream_id == tmpconn->stream_id && + !tmpconn->base_.marked_for_close) { + log_debug(LD_EXIT,"found conn for stream %d.", rh.stream_id); + return tmpconn; + } + } + } + return NULL; /* probably a begin relay cell */ +} + +/** Pack the relay_header_t host-order structure <b>src</b> into + * network-order in the buffer <b>dest</b>. See tor-spec.txt for details + * about the wire format. + */ +void +relay_header_pack(uint8_t *dest, const relay_header_t *src) +{ + set_uint8(dest, src->command); + set_uint16(dest+1, htons(src->recognized)); + set_uint16(dest+3, htons(src->stream_id)); + memcpy(dest+5, src->integrity, 4); + set_uint16(dest+9, htons(src->length)); +} + +/** Unpack the network-order buffer <b>src</b> into a host-order + * relay_header_t structure <b>dest</b>. + */ +void +relay_header_unpack(relay_header_t *dest, const uint8_t *src) +{ + dest->command = get_uint8(src); + dest->recognized = ntohs(get_uint16(src+1)); + dest->stream_id = ntohs(get_uint16(src+3)); + memcpy(dest->integrity, src+5, 4); + dest->length = ntohs(get_uint16(src+9)); +} + +/** Convert the relay <b>command</b> into a human-readable string. */ +static const char * +relay_command_to_string(uint8_t command) +{ + static char buf[64]; + switch (command) { + case RELAY_COMMAND_BEGIN: return "BEGIN"; + case RELAY_COMMAND_DATA: return "DATA"; + case RELAY_COMMAND_END: return "END"; + case RELAY_COMMAND_CONNECTED: return "CONNECTED"; + case RELAY_COMMAND_SENDME: return "SENDME"; + case RELAY_COMMAND_EXTEND: return "EXTEND"; + case RELAY_COMMAND_EXTENDED: return "EXTENDED"; + case RELAY_COMMAND_TRUNCATE: return "TRUNCATE"; + case RELAY_COMMAND_TRUNCATED: return "TRUNCATED"; + case RELAY_COMMAND_DROP: return "DROP"; + case RELAY_COMMAND_RESOLVE: return "RESOLVE"; + case RELAY_COMMAND_RESOLVED: return "RESOLVED"; + case RELAY_COMMAND_BEGIN_DIR: return "BEGIN_DIR"; + case RELAY_COMMAND_ESTABLISH_INTRO: return "ESTABLISH_INTRO"; + case RELAY_COMMAND_ESTABLISH_RENDEZVOUS: return "ESTABLISH_RENDEZVOUS"; + case RELAY_COMMAND_INTRODUCE1: return "INTRODUCE1"; + case RELAY_COMMAND_INTRODUCE2: return "INTRODUCE2"; + case RELAY_COMMAND_RENDEZVOUS1: return "RENDEZVOUS1"; + case RELAY_COMMAND_RENDEZVOUS2: return "RENDEZVOUS2"; + case RELAY_COMMAND_INTRO_ESTABLISHED: return "INTRO_ESTABLISHED"; + case RELAY_COMMAND_RENDEZVOUS_ESTABLISHED: + return "RENDEZVOUS_ESTABLISHED"; + case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK"; + case RELAY_COMMAND_EXTEND2: return "EXTEND2"; + case RELAY_COMMAND_EXTENDED2: return "EXTENDED2"; + default: + tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u", + (unsigned)command); + return buf; + } +} + +/** Make a relay cell out of <b>relay_command</b> and <b>payload</b>, and send + * it onto the open circuit <b>circ</b>. <b>stream_id</b> is the ID on + * <b>circ</b> for the stream that's sending the relay cell, or 0 if it's a + * control cell. <b>cpath_layer</b> is NULL for OR->OP cells, or the + * destination hop for OP->OR cells. + * + * If you can't send the cell, mark the circuit for close and return -1. Else + * return 0. + */ +MOCK_IMPL(int, +relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ, + uint8_t relay_command, const char *payload, + size_t payload_len, crypt_path_t *cpath_layer, + const char *filename, int lineno)) +{ + cell_t cell; + relay_header_t rh; + cell_direction_t cell_direction; + /* XXXX NM Split this function into a separate versions per circuit type? */ + + tor_assert(circ); + tor_assert(payload_len <= RELAY_PAYLOAD_SIZE); + + memset(&cell, 0, sizeof(cell_t)); + cell.command = CELL_RELAY; + if (CIRCUIT_IS_ORIGIN(circ)) { + tor_assert(cpath_layer); + cell.circ_id = circ->n_circ_id; + cell_direction = CELL_DIRECTION_OUT; + } else { + tor_assert(! cpath_layer); + cell.circ_id = TO_OR_CIRCUIT(circ)->p_circ_id; + cell_direction = CELL_DIRECTION_IN; + } + + memset(&rh, 0, sizeof(rh)); + rh.command = relay_command; + rh.stream_id = stream_id; + rh.length = payload_len; + relay_header_pack(cell.payload, &rh); + if (payload_len) + memcpy(cell.payload+RELAY_HEADER_SIZE, payload, payload_len); + + log_debug(LD_OR,"delivering %d cell %s.", relay_command, + cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); + + if (relay_command == RELAY_COMMAND_DROP) + rep_hist_padding_count_write(PADDING_TYPE_DROP); + + /* If we are sending an END cell and this circuit is used for a tunneled + * directory request, advance its state. */ + if (relay_command == RELAY_COMMAND_END && circ->dirreq_id) + geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED, + DIRREQ_END_CELL_SENT); + + if (cell_direction == CELL_DIRECTION_OUT && circ->n_chan) { + /* if we're using relaybandwidthrate, this conn wants priority */ + channel_timestamp_client(circ->n_chan); + } + + if (cell_direction == CELL_DIRECTION_OUT) { + origin_circuit_t *origin_circ = TO_ORIGIN_CIRCUIT(circ); + if (origin_circ->remaining_relay_early_cells > 0 && + (relay_command == RELAY_COMMAND_EXTEND || + relay_command == RELAY_COMMAND_EXTEND2 || + cpath_layer != origin_circ->cpath)) { + /* If we've got any relay_early cells left and (we're sending + * an extend cell or we're not talking to the first hop), use + * one of them. Don't worry about the conn protocol version: + * append_cell_to_circuit_queue will fix it up. */ + cell.command = CELL_RELAY_EARLY; + --origin_circ->remaining_relay_early_cells; + log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.", + (int)origin_circ->remaining_relay_early_cells); + /* Memorize the command that is sent as RELAY_EARLY cell; helps debug + * task 878. */ + origin_circ->relay_early_commands[ + origin_circ->relay_early_cells_sent++] = relay_command; + } else if (relay_command == RELAY_COMMAND_EXTEND || + relay_command == RELAY_COMMAND_EXTEND2) { + /* If no RELAY_EARLY cells can be sent over this circuit, log which + * commands have been sent as RELAY_EARLY cells before; helps debug + * task 878. */ + smartlist_t *commands_list = smartlist_new(); + int i = 0; + char *commands = NULL; + for (; i < origin_circ->relay_early_cells_sent; i++) + smartlist_add(commands_list, (char *) + relay_command_to_string(origin_circ->relay_early_commands[i])); + commands = smartlist_join_strings(commands_list, ",", 0, NULL); + log_warn(LD_BUG, "Uh-oh. We're sending a RELAY_COMMAND_EXTEND cell, " + "but we have run out of RELAY_EARLY cells on that circuit. " + "Commands sent before: %s", commands); + tor_free(commands); + smartlist_free(commands_list); + } + + /* Let's assume we're well-behaved: Anything that we decide to send is + * valid, delivered data. */ + circuit_sent_valid_data(origin_circ, rh.length); + } + + if (circuit_package_relay_cell(&cell, circ, cell_direction, cpath_layer, + stream_id, filename, lineno) < 0) { + log_warn(LD_BUG,"circuit_package_relay_cell failed. Closing."); + circuit_mark_for_close(circ, END_CIRC_REASON_INTERNAL); + return -1; + } + return 0; +} + +/** Make a relay cell out of <b>relay_command</b> and <b>payload</b>, and + * send it onto the open circuit <b>circ</b>. <b>fromconn</b> is the stream + * that's sending the relay cell, or NULL if it's a control cell. + * <b>cpath_layer</b> is NULL for OR->OP cells, or the destination hop + * for OP->OR cells. + * + * If you can't send the cell, mark the circuit for close and + * return -1. Else return 0. + */ +int +connection_edge_send_command(edge_connection_t *fromconn, + uint8_t relay_command, const char *payload, + size_t payload_len) +{ + /* XXXX NM Split this function into a separate versions per circuit type? */ + circuit_t *circ; + crypt_path_t *cpath_layer = fromconn->cpath_layer; + tor_assert(fromconn); + circ = fromconn->on_circuit; + + if (fromconn->base_.marked_for_close) { + log_warn(LD_BUG, + "called on conn that's already marked for close at %s:%d.", + fromconn->base_.marked_for_close_file, + fromconn->base_.marked_for_close); + return 0; + } + + if (!circ) { + if (fromconn->base_.type == CONN_TYPE_AP) { + log_info(LD_APP,"no circ. Closing conn."); + connection_mark_unattached_ap(EDGE_TO_ENTRY_CONN(fromconn), + END_STREAM_REASON_INTERNAL); + } else { + log_info(LD_EXIT,"no circ. Closing conn."); + fromconn->edge_has_sent_end = 1; /* no circ to send to */ + fromconn->end_reason = END_STREAM_REASON_INTERNAL; + connection_mark_for_close(TO_CONN(fromconn)); + } + return -1; + } + + if (circ->marked_for_close) { + /* The circuit has been marked, but not freed yet. When it's freed, it + * will mark this connection for close. */ + return -1; + } + +#ifdef MEASUREMENTS_21206 + /* Keep track of the number of RELAY_DATA cells sent for directory + * connections. */ + connection_t *linked_conn = TO_CONN(fromconn)->linked_conn; + + if (linked_conn && linked_conn->type == CONN_TYPE_DIR) { + ++(TO_DIR_CONN(linked_conn)->data_cells_sent); + } +#endif /* defined(MEASUREMENTS_21206) */ + + return relay_send_command_from_edge(fromconn->stream_id, circ, + relay_command, payload, + payload_len, cpath_layer); +} + +/** How many times will I retry a stream that fails due to DNS + * resolve failure or misc error? + */ +#define MAX_RESOLVE_FAILURES 3 + +/** Return 1 if reason is something that you should retry if you + * get the end cell before you've connected; else return 0. */ +static int +edge_reason_is_retriable(int reason) +{ + return reason == END_STREAM_REASON_HIBERNATING || + reason == END_STREAM_REASON_RESOURCELIMIT || + reason == END_STREAM_REASON_EXITPOLICY || + reason == END_STREAM_REASON_RESOLVEFAILED || + reason == END_STREAM_REASON_MISC || + reason == END_STREAM_REASON_NOROUTE; +} + +/** Called when we receive an END cell on a stream that isn't open yet, + * from the client side. + * Arguments are as for connection_edge_process_relay_cell(). + */ +static int +connection_ap_process_end_not_open( + relay_header_t *rh, cell_t *cell, origin_circuit_t *circ, + entry_connection_t *conn, crypt_path_t *layer_hint) +{ + node_t *exitrouter; + int reason = *(cell->payload+RELAY_HEADER_SIZE); + int control_reason; + edge_connection_t *edge_conn = ENTRY_TO_EDGE_CONN(conn); + (void) layer_hint; /* unused */ + + if (rh->length > 0) { + if (reason == END_STREAM_REASON_TORPROTOCOL || + reason == END_STREAM_REASON_DESTROY) { + /* Both of these reasons could mean a failed tag + * hit the exit and it complained. Do not probe. + * Fail the circuit. */ + circ->path_state = PATH_STATE_USE_FAILED; + return -END_CIRC_REASON_TORPROTOCOL; + } else if (reason == END_STREAM_REASON_INTERNAL) { + /* We can't infer success or failure, since older Tors report + * ENETUNREACH as END_STREAM_REASON_INTERNAL. */ + } else { + /* Path bias: If we get a valid reason code from the exit, + * it wasn't due to tagging. + * + * We rely on recognized+digest being strong enough to make + * tags unlikely to allow us to get tagged, yet 'recognized' + * reason codes here. */ + pathbias_mark_use_success(circ); + } + } + + /* This end cell is now valid. */ + circuit_read_valid_data(circ, rh->length); + + if (rh->length == 0) { + reason = END_STREAM_REASON_MISC; + } + + control_reason = reason | END_STREAM_REASON_FLAG_REMOTE; + + if (edge_reason_is_retriable(reason) && + /* avoid retry if rend */ + !connection_edge_is_rendezvous_stream(edge_conn)) { + const char *chosen_exit_digest = + circ->build_state->chosen_exit->identity_digest; + log_info(LD_APP,"Address '%s' refused due to '%s'. Considering retrying.", + safe_str(conn->socks_request->address), + stream_end_reason_to_string(reason)); + exitrouter = node_get_mutable_by_id(chosen_exit_digest); + switch (reason) { + case END_STREAM_REASON_EXITPOLICY: { + tor_addr_t addr; + tor_addr_make_unspec(&addr); + if (rh->length >= 5) { + int ttl = -1; + tor_addr_make_unspec(&addr); + if (rh->length == 5 || rh->length == 9) { + tor_addr_from_ipv4n(&addr, + get_uint32(cell->payload+RELAY_HEADER_SIZE+1)); + if (rh->length == 9) + ttl = (int)ntohl(get_uint32(cell->payload+RELAY_HEADER_SIZE+5)); + } else if (rh->length == 17 || rh->length == 21) { + tor_addr_from_ipv6_bytes(&addr, + (char*)(cell->payload+RELAY_HEADER_SIZE+1)); + if (rh->length == 21) + ttl = (int)ntohl(get_uint32(cell->payload+RELAY_HEADER_SIZE+17)); + } + if (tor_addr_is_null(&addr)) { + log_info(LD_APP,"Address '%s' resolved to 0.0.0.0. Closing,", + safe_str(conn->socks_request->address)); + connection_mark_unattached_ap(conn, END_STREAM_REASON_TORPROTOCOL); + return 0; + } + + if ((tor_addr_family(&addr) == AF_INET && + !conn->entry_cfg.ipv4_traffic) || + (tor_addr_family(&addr) == AF_INET6 && + !conn->entry_cfg.ipv6_traffic)) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Got an EXITPOLICY failure on a connection with a " + "mismatched family. Closing."); + connection_mark_unattached_ap(conn, END_STREAM_REASON_TORPROTOCOL); + return 0; + } + if (get_options()->ClientDNSRejectInternalAddresses && + tor_addr_is_internal(&addr, 0)) { + log_info(LD_APP,"Address '%s' resolved to internal. Closing,", + safe_str(conn->socks_request->address)); + connection_mark_unattached_ap(conn, END_STREAM_REASON_TORPROTOCOL); + return 0; + } + + client_dns_set_addressmap(conn, + conn->socks_request->address, &addr, + conn->chosen_exit_name, ttl); + + { + char new_addr[TOR_ADDR_BUF_LEN]; + tor_addr_to_str(new_addr, &addr, sizeof(new_addr), 1); + if (strcmp(conn->socks_request->address, new_addr)) { + strlcpy(conn->socks_request->address, new_addr, + sizeof(conn->socks_request->address)); + control_event_stream_status(conn, STREAM_EVENT_REMAP, 0); + } + } + } + /* check if the exit *ought* to have allowed it */ + + adjust_exit_policy_from_exitpolicy_failure(circ, + conn, + exitrouter, + &addr); + + if (conn->chosen_exit_optional || + conn->chosen_exit_retries) { + /* stop wanting a specific exit */ + conn->chosen_exit_optional = 0; + /* A non-zero chosen_exit_retries can happen if we set a + * TrackHostExits for this address under a port that the exit + * relay allows, but then try the same address with a different + * port that it doesn't allow to exit. We shouldn't unregister + * the mapping, since it is probably still wanted on the + * original port. But now we give away to the exit relay that + * we probably have a TrackHostExits on it. So be it. */ + conn->chosen_exit_retries = 0; + tor_free(conn->chosen_exit_name); /* clears it */ + } + if (connection_ap_detach_retriable(conn, circ, control_reason) >= 0) + return 0; + /* else, conn will get closed below */ + break; + } + case END_STREAM_REASON_CONNECTREFUSED: + if (!conn->chosen_exit_optional) + break; /* break means it'll close, below */ + /* Else fall through: expire this circuit, clear the + * chosen_exit_name field, and try again. */ + /* Falls through. */ + case END_STREAM_REASON_RESOLVEFAILED: + case END_STREAM_REASON_TIMEOUT: + case END_STREAM_REASON_MISC: + case END_STREAM_REASON_NOROUTE: + if (client_dns_incr_failures(conn->socks_request->address) + < MAX_RESOLVE_FAILURES) { + /* We haven't retried too many times; reattach the connection. */ + circuit_log_path(LOG_INFO,LD_APP,circ); + /* Mark this circuit "unusable for new streams". */ + mark_circuit_unusable_for_new_conns(circ); + + if (conn->chosen_exit_optional) { + /* stop wanting a specific exit */ + conn->chosen_exit_optional = 0; + tor_free(conn->chosen_exit_name); /* clears it */ + } + if (connection_ap_detach_retriable(conn, circ, control_reason) >= 0) + return 0; + /* else, conn will get closed below */ + } else { + log_notice(LD_APP, + "Have tried resolving or connecting to address '%s' " + "at %d different places. Giving up.", + safe_str(conn->socks_request->address), + MAX_RESOLVE_FAILURES); + /* clear the failures, so it will have a full try next time */ + client_dns_clear_failures(conn->socks_request->address); + } + break; + case END_STREAM_REASON_HIBERNATING: + case END_STREAM_REASON_RESOURCELIMIT: + if (exitrouter) { + policies_set_node_exitpolicy_to_reject_all(exitrouter); + } + if (conn->chosen_exit_optional) { + /* stop wanting a specific exit */ + conn->chosen_exit_optional = 0; + tor_free(conn->chosen_exit_name); /* clears it */ + } + if (connection_ap_detach_retriable(conn, circ, control_reason) >= 0) + return 0; + /* else, will close below */ + break; + } /* end switch */ + log_info(LD_APP,"Giving up on retrying; conn can't be handled."); + } + + log_info(LD_APP, + "Edge got end (%s) before we're connected. Marking for close.", + stream_end_reason_to_string(rh->length > 0 ? reason : -1)); + circuit_log_path(LOG_INFO,LD_APP,circ); + /* need to test because of detach_retriable */ + if (!ENTRY_TO_CONN(conn)->marked_for_close) + connection_mark_unattached_ap(conn, control_reason); + return 0; +} + +/** Called when we have gotten an END_REASON_EXITPOLICY failure on <b>circ</b> + * for <b>conn</b>, while attempting to connect via <b>node</b>. If the node + * told us which address it rejected, then <b>addr</b> is that address; + * otherwise it is AF_UNSPEC. + * + * If we are sure the node should have allowed this address, mark the node as + * having a reject *:* exit policy. Otherwise, mark the circuit as unusable + * for this particular address. + **/ +static void +adjust_exit_policy_from_exitpolicy_failure(origin_circuit_t *circ, + entry_connection_t *conn, + node_t *node, + const tor_addr_t *addr) +{ + int make_reject_all = 0; + const sa_family_t family = tor_addr_family(addr); + + if (node) { + tor_addr_t tmp; + int asked_for_family = tor_addr_parse(&tmp, conn->socks_request->address); + if (family == AF_UNSPEC) { + make_reject_all = 1; + } else if (node_exit_policy_is_exact(node, family) && + asked_for_family != -1 && !conn->chosen_exit_name) { + make_reject_all = 1; + } + + if (make_reject_all) { + log_info(LD_APP, + "Exitrouter %s seems to be more restrictive than its exit " + "policy. Not using this router as exit for now.", + node_describe(node)); + policies_set_node_exitpolicy_to_reject_all(node); + } + } + + if (family != AF_UNSPEC) + addr_policy_append_reject_addr(&circ->prepend_policy, addr); +} + +/** Helper: change the socks_request->address field on conn to the + * dotted-quad representation of <b>new_addr</b>, + * and send an appropriate REMAP event. */ +static void +remap_event_helper(entry_connection_t *conn, const tor_addr_t *new_addr) +{ + tor_addr_to_str(conn->socks_request->address, new_addr, + sizeof(conn->socks_request->address), + 1); + control_event_stream_status(conn, STREAM_EVENT_REMAP, + REMAP_STREAM_SOURCE_EXIT); +} + +/** Extract the contents of a connected cell in <b>cell</b>, whose relay + * header has already been parsed into <b>rh</b>. On success, set + * <b>addr_out</b> to the address we're connected to, and <b>ttl_out</b> to + * the ttl of that address, in seconds, and return 0. On failure, return + * -1. + * + * Note that the resulting address can be UNSPEC if the connected cell had no + * address (as for a stream to an union service or a tunneled directory + * connection), and that the ttl can be absent (in which case <b>ttl_out</b> + * is set to -1). */ +STATIC int +connected_cell_parse(const relay_header_t *rh, const cell_t *cell, + tor_addr_t *addr_out, int *ttl_out) +{ + uint32_t bytes; + const uint8_t *payload = cell->payload + RELAY_HEADER_SIZE; + + tor_addr_make_unspec(addr_out); + *ttl_out = -1; + if (rh->length == 0) + return 0; + if (rh->length < 4) + return -1; + bytes = ntohl(get_uint32(payload)); + + /* If bytes is 0, this is maybe a v6 address. Otherwise it's a v4 address */ + if (bytes != 0) { + /* v4 address */ + tor_addr_from_ipv4h(addr_out, bytes); + if (rh->length >= 8) { + bytes = ntohl(get_uint32(payload + 4)); + if (bytes <= INT32_MAX) + *ttl_out = bytes; + } + } else { + if (rh->length < 25) /* 4 bytes of 0s, 1 addr, 16 ipv4, 4 ttl. */ + return -1; + if (get_uint8(payload + 4) != 6) + return -1; + tor_addr_from_ipv6_bytes(addr_out, (char*)(payload + 5)); + bytes = ntohl(get_uint32(payload + 21)); + if (bytes <= INT32_MAX) + *ttl_out = (int) bytes; + } + return 0; +} + +/** Drop all storage held by <b>addr</b>. */ +STATIC void +address_ttl_free_(address_ttl_t *addr) +{ + if (!addr) + return; + tor_free(addr->hostname); + tor_free(addr); +} + +/** Parse a resolved cell in <b>cell</b>, with parsed header in <b>rh</b>. + * Return -1 on parse error. On success, add one or more newly allocated + * address_ttl_t to <b>addresses_out</b>; set *<b>errcode_out</b> to + * one of 0, RESOLVED_TYPE_ERROR, or RESOLVED_TYPE_ERROR_TRANSIENT, and + * return 0. */ +STATIC int +resolved_cell_parse(const cell_t *cell, const relay_header_t *rh, + smartlist_t *addresses_out, int *errcode_out) +{ + const uint8_t *cp; + uint8_t answer_type; + size_t answer_len; + address_ttl_t *addr; + size_t remaining; + int errcode = 0; + smartlist_t *addrs; + + tor_assert(cell); + tor_assert(rh); + tor_assert(addresses_out); + tor_assert(errcode_out); + + *errcode_out = 0; + + if (rh->length > RELAY_PAYLOAD_SIZE) + return -1; + + addrs = smartlist_new(); + + cp = cell->payload + RELAY_HEADER_SIZE; + + remaining = rh->length; + while (remaining) { + const uint8_t *cp_orig = cp; + if (remaining < 2) + goto err; + answer_type = *cp++; + answer_len = *cp++; + if (remaining < 2 + answer_len + 4) { + goto err; + } + if (answer_type == RESOLVED_TYPE_IPV4) { + if (answer_len != 4) { + goto err; + } + addr = tor_malloc_zero(sizeof(*addr)); + tor_addr_from_ipv4n(&addr->addr, get_uint32(cp)); + cp += 4; + addr->ttl = ntohl(get_uint32(cp)); + cp += 4; + smartlist_add(addrs, addr); + } else if (answer_type == RESOLVED_TYPE_IPV6) { + if (answer_len != 16) + goto err; + addr = tor_malloc_zero(sizeof(*addr)); + tor_addr_from_ipv6_bytes(&addr->addr, (const char*) cp); + cp += 16; + addr->ttl = ntohl(get_uint32(cp)); + cp += 4; + smartlist_add(addrs, addr); + } else if (answer_type == RESOLVED_TYPE_HOSTNAME) { + if (answer_len == 0) { + goto err; + } + addr = tor_malloc_zero(sizeof(*addr)); + addr->hostname = tor_memdup_nulterm(cp, answer_len); + cp += answer_len; + addr->ttl = ntohl(get_uint32(cp)); + cp += 4; + smartlist_add(addrs, addr); + } else if (answer_type == RESOLVED_TYPE_ERROR_TRANSIENT || + answer_type == RESOLVED_TYPE_ERROR) { + errcode = answer_type; + /* Ignore the error contents */ + cp += answer_len + 4; + } else { + cp += answer_len + 4; + } + tor_assert(((ssize_t)remaining) >= (cp - cp_orig)); + remaining -= (cp - cp_orig); + } + + if (errcode && smartlist_len(addrs) == 0) { + /* Report an error only if there were no results. */ + *errcode_out = errcode; + } + + smartlist_add_all(addresses_out, addrs); + smartlist_free(addrs); + + return 0; + + err: + /* On parse error, don't report any results */ + SMARTLIST_FOREACH(addrs, address_ttl_t *, a, address_ttl_free(a)); + smartlist_free(addrs); + return -1; +} + +/** Helper for connection_edge_process_resolved_cell: given an error code, + * an entry_connection, and a list of address_ttl_t *, report the best answer + * to the entry_connection. */ +static void +connection_ap_handshake_socks_got_resolved_cell(entry_connection_t *conn, + int error_code, + smartlist_t *results) +{ + address_ttl_t *addr_ipv4 = NULL; + address_ttl_t *addr_ipv6 = NULL; + address_ttl_t *addr_hostname = NULL; + address_ttl_t *addr_best = NULL; + + /* If it's an error code, that's easy. */ + if (error_code) { + tor_assert(error_code == RESOLVED_TYPE_ERROR || + error_code == RESOLVED_TYPE_ERROR_TRANSIENT); + connection_ap_handshake_socks_resolved(conn, + error_code,0,NULL,-1,-1); + return; + } + + /* Get the first answer of each type. */ + SMARTLIST_FOREACH_BEGIN(results, address_ttl_t *, addr) { + if (addr->hostname) { + if (!addr_hostname) { + addr_hostname = addr; + } + } else if (tor_addr_family(&addr->addr) == AF_INET) { + if (!addr_ipv4 && conn->entry_cfg.ipv4_traffic) { + addr_ipv4 = addr; + } + } else if (tor_addr_family(&addr->addr) == AF_INET6) { + if (!addr_ipv6 && conn->entry_cfg.ipv6_traffic) { + addr_ipv6 = addr; + } + } + } SMARTLIST_FOREACH_END(addr); + + /* Now figure out which type we wanted to deliver. */ + if (conn->socks_request->command == SOCKS_COMMAND_RESOLVE_PTR) { + if (addr_hostname) { + connection_ap_handshake_socks_resolved(conn, + RESOLVED_TYPE_HOSTNAME, + strlen(addr_hostname->hostname), + (uint8_t*)addr_hostname->hostname, + addr_hostname->ttl,-1); + } else { + connection_ap_handshake_socks_resolved(conn, + RESOLVED_TYPE_ERROR,0,NULL,-1,-1); + } + return; + } + + if (conn->entry_cfg.prefer_ipv6) { + addr_best = addr_ipv6 ? addr_ipv6 : addr_ipv4; + } else { + addr_best = addr_ipv4 ? addr_ipv4 : addr_ipv6; + } + + /* Now convert it to the ugly old interface */ + if (! addr_best) { + connection_ap_handshake_socks_resolved(conn, + RESOLVED_TYPE_ERROR,0,NULL,-1,-1); + return; + } + + connection_ap_handshake_socks_resolved_addr(conn, + &addr_best->addr, + addr_best->ttl, + -1); + + remap_event_helper(conn, &addr_best->addr); +} + +/** Handle a RELAY_COMMAND_RESOLVED cell that we received on a non-open AP + * stream. */ +STATIC int +connection_edge_process_resolved_cell(edge_connection_t *conn, + const cell_t *cell, + const relay_header_t *rh) +{ + entry_connection_t *entry_conn = EDGE_TO_ENTRY_CONN(conn); + smartlist_t *resolved_addresses = NULL; + int errcode = 0; + + if (conn->base_.state != AP_CONN_STATE_RESOLVE_WAIT) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, "Got a 'resolved' cell while " + "not in state resolve_wait. Dropping."); + return 0; + } + tor_assert(SOCKS_COMMAND_IS_RESOLVE(entry_conn->socks_request->command)); + + resolved_addresses = smartlist_new(); + if (resolved_cell_parse(cell, rh, resolved_addresses, &errcode)) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Dropping malformed 'resolved' cell"); + connection_mark_unattached_ap(entry_conn, END_STREAM_REASON_TORPROTOCOL); + goto done; + } + + if (get_options()->ClientDNSRejectInternalAddresses) { + int orig_len = smartlist_len(resolved_addresses); + SMARTLIST_FOREACH_BEGIN(resolved_addresses, address_ttl_t *, addr) { + if (addr->hostname == NULL && tor_addr_is_internal(&addr->addr, 0)) { + log_info(LD_APP, "Got a resolved cell with answer %s; dropping that " + "answer.", + safe_str_client(fmt_addr(&addr->addr))); + address_ttl_free(addr); + SMARTLIST_DEL_CURRENT(resolved_addresses, addr); + } + } SMARTLIST_FOREACH_END(addr); + if (orig_len && smartlist_len(resolved_addresses) == 0) { + log_info(LD_APP, "Got a resolved cell with only private addresses; " + "dropping it."); + connection_ap_handshake_socks_resolved(entry_conn, + RESOLVED_TYPE_ERROR_TRANSIENT, + 0, NULL, 0, TIME_MAX); + connection_mark_unattached_ap(entry_conn, + END_STREAM_REASON_TORPROTOCOL); + goto done; + } + } + + /* This is valid data at this point. Count it */ + if (conn->on_circuit && CIRCUIT_IS_ORIGIN(conn->on_circuit)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(conn->on_circuit), + rh->length); + } + + connection_ap_handshake_socks_got_resolved_cell(entry_conn, + errcode, + resolved_addresses); + + connection_mark_unattached_ap(entry_conn, + END_STREAM_REASON_DONE | + END_STREAM_REASON_FLAG_ALREADY_SOCKS_REPLIED); + + done: + SMARTLIST_FOREACH(resolved_addresses, address_ttl_t *, addr, + address_ttl_free(addr)); + smartlist_free(resolved_addresses); + return 0; +} + +/** An incoming relay cell has arrived from circuit <b>circ</b> to + * stream <b>conn</b>. + * + * The arguments here are the same as in + * connection_edge_process_relay_cell() below; this function is called + * from there when <b>conn</b> is defined and not in an open state. + */ +static int +connection_edge_process_relay_cell_not_open( + relay_header_t *rh, cell_t *cell, circuit_t *circ, + edge_connection_t *conn, crypt_path_t *layer_hint) +{ + if (rh->command == RELAY_COMMAND_END) { + if (CIRCUIT_IS_ORIGIN(circ) && conn->base_.type == CONN_TYPE_AP) { + return connection_ap_process_end_not_open(rh, cell, + TO_ORIGIN_CIRCUIT(circ), + EDGE_TO_ENTRY_CONN(conn), + layer_hint); + } else { + /* we just got an 'end', don't need to send one */ + conn->edge_has_sent_end = 1; + conn->end_reason = *(cell->payload+RELAY_HEADER_SIZE) | + END_STREAM_REASON_FLAG_REMOTE; + connection_mark_for_close(TO_CONN(conn)); + return 0; + } + } + + if (conn->base_.type == CONN_TYPE_AP && + rh->command == RELAY_COMMAND_CONNECTED) { + tor_addr_t addr; + int ttl; + entry_connection_t *entry_conn = EDGE_TO_ENTRY_CONN(conn); + tor_assert(CIRCUIT_IS_ORIGIN(circ)); + if (conn->base_.state != AP_CONN_STATE_CONNECT_WAIT) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Got 'connected' while not in state connect_wait. Dropping."); + return 0; + } + CONNECTION_AP_EXPECT_NONPENDING(entry_conn); + conn->base_.state = AP_CONN_STATE_OPEN; + log_info(LD_APP,"'connected' received for circid %u streamid %d " + "after %d seconds.", + (unsigned)circ->n_circ_id, + rh->stream_id, + (int)(time(NULL) - conn->base_.timestamp_last_read_allowed)); + if (connected_cell_parse(rh, cell, &addr, &ttl) < 0) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Got a badly formatted connected cell. Closing."); + connection_edge_end(conn, END_STREAM_REASON_TORPROTOCOL); + connection_mark_unattached_ap(entry_conn, END_STREAM_REASON_TORPROTOCOL); + return 0; + } + if (tor_addr_family(&addr) != AF_UNSPEC) { + /* The family is not UNSPEC: so we were given an address in the + * connected cell. (This is normal, except for BEGINDIR and onion + * service streams.) */ + const sa_family_t family = tor_addr_family(&addr); + if (tor_addr_is_null(&addr) || + (get_options()->ClientDNSRejectInternalAddresses && + tor_addr_is_internal(&addr, 0))) { + log_info(LD_APP, "...but it claims the IP address was %s. Closing.", + fmt_addr(&addr)); + connection_edge_end(conn, END_STREAM_REASON_TORPROTOCOL); + connection_mark_unattached_ap(entry_conn, + END_STREAM_REASON_TORPROTOCOL); + return 0; + } + + if ((family == AF_INET && ! entry_conn->entry_cfg.ipv4_traffic) || + (family == AF_INET6 && ! entry_conn->entry_cfg.ipv6_traffic)) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Got a connected cell to %s with unsupported address family." + " Closing.", fmt_addr(&addr)); + connection_edge_end(conn, END_STREAM_REASON_TORPROTOCOL); + connection_mark_unattached_ap(entry_conn, + END_STREAM_REASON_TORPROTOCOL); + return 0; + } + + client_dns_set_addressmap(entry_conn, + entry_conn->socks_request->address, &addr, + entry_conn->chosen_exit_name, ttl); + + remap_event_helper(entry_conn, &addr); + } + circuit_log_path(LOG_INFO,LD_APP,TO_ORIGIN_CIRCUIT(circ)); + /* don't send a socks reply to transparent conns */ + tor_assert(entry_conn->socks_request != NULL); + if (!entry_conn->socks_request->has_finished) { + connection_ap_handshake_socks_reply(entry_conn, NULL, 0, 0); + } + + /* Was it a linked dir conn? If so, a dir request just started to + * fetch something; this could be a bootstrap status milestone. */ + log_debug(LD_APP, "considering"); + if (TO_CONN(conn)->linked_conn && + TO_CONN(conn)->linked_conn->type == CONN_TYPE_DIR) { + connection_t *dirconn = TO_CONN(conn)->linked_conn; + log_debug(LD_APP, "it is! %d", dirconn->purpose); + switch (dirconn->purpose) { + case DIR_PURPOSE_FETCH_CERTIFICATE: + if (consensus_is_waiting_for_certs()) + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_KEYS, 0); + break; + case DIR_PURPOSE_FETCH_CONSENSUS: + control_event_bootstrap(BOOTSTRAP_STATUS_LOADING_STATUS, 0); + break; + case DIR_PURPOSE_FETCH_SERVERDESC: + case DIR_PURPOSE_FETCH_MICRODESC: + if (TO_DIR_CONN(dirconn)->router_purpose == ROUTER_PURPOSE_GENERAL) + control_event_boot_dir(BOOTSTRAP_STATUS_LOADING_DESCRIPTORS, + count_loading_descriptors_progress()); + break; + } + } + /* This is definitely a success, so forget about any pending data we + * had sent. */ + if (entry_conn->pending_optimistic_data) { + buf_free(entry_conn->pending_optimistic_data); + entry_conn->pending_optimistic_data = NULL; + } + + /* This is valid data at this point. Count it */ + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh->length); + + /* handle anything that might have queued */ + if (connection_edge_package_raw_inbuf(conn, 1, NULL) < 0) { + /* (We already sent an end cell if possible) */ + connection_mark_for_close(TO_CONN(conn)); + return 0; + } + return 0; + } + if (conn->base_.type == CONN_TYPE_AP && + rh->command == RELAY_COMMAND_RESOLVED) { + return connection_edge_process_resolved_cell(conn, cell, rh); + } + + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Got an unexpected relay command %d, in state %d (%s). Dropping.", + rh->command, conn->base_.state, + conn_state_to_string(conn->base_.type, conn->base_.state)); + return 0; /* for forward compatibility, don't kill the circuit */ +// connection_edge_end(conn, END_STREAM_REASON_TORPROTOCOL); +// connection_mark_for_close(conn); +// return -1; +} + +/** An incoming relay cell has arrived on circuit <b>circ</b>. If + * <b>conn</b> is NULL this is a control cell, else <b>cell</b> is + * destined for <b>conn</b>. + * + * If <b>layer_hint</b> is defined, then we're the origin of the + * circuit, and it specifies the hop that packaged <b>cell</b>. + * + * Return -reason if you want to warn and tear down the circuit, else 0. + */ +STATIC int +connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, + edge_connection_t *conn, + crypt_path_t *layer_hint) +{ + static int num_seen=0; + relay_header_t rh; + unsigned domain = layer_hint?LD_APP:LD_EXIT; + int reason; + int optimistic_data = 0; /* Set to 1 if we receive data on a stream + * that's in the EXIT_CONN_STATE_RESOLVING + * or EXIT_CONN_STATE_CONNECTING states. */ + + tor_assert(cell); + tor_assert(circ); + + relay_header_unpack(&rh, cell->payload); +// log_fn(LOG_DEBUG,"command %d stream %d", rh.command, rh.stream_id); + num_seen++; + log_debug(domain, "Now seen %d relay cells here (command %d, stream %d).", + num_seen, rh.command, rh.stream_id); + + if (rh.length > RELAY_PAYLOAD_SIZE) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Relay cell length field too long. Closing circuit."); + return - END_CIRC_REASON_TORPROTOCOL; + } + + if (rh.stream_id == 0) { + switch (rh.command) { + case RELAY_COMMAND_BEGIN: + case RELAY_COMMAND_CONNECTED: + case RELAY_COMMAND_END: + case RELAY_COMMAND_RESOLVE: + case RELAY_COMMAND_RESOLVED: + case RELAY_COMMAND_BEGIN_DIR: + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, "Relay command %d with zero " + "stream_id. Dropping.", (int)rh.command); + return 0; + default: + ; + } + } + + /* either conn is NULL, in which case we've got a control cell, or else + * conn points to the recognized stream. */ + + if (conn && !connection_state_is_open(TO_CONN(conn))) { + if (conn->base_.type == CONN_TYPE_EXIT && + (conn->base_.state == EXIT_CONN_STATE_CONNECTING || + conn->base_.state == EXIT_CONN_STATE_RESOLVING) && + rh.command == RELAY_COMMAND_DATA) { + /* Allow DATA cells to be delivered to an exit node in state + * EXIT_CONN_STATE_CONNECTING or EXIT_CONN_STATE_RESOLVING. + * This speeds up HTTP, for example. */ + optimistic_data = 1; + } else if (rh.stream_id == 0 && rh.command == RELAY_COMMAND_DATA) { + log_warn(LD_BUG, "Somehow I had a connection that matched a " + "data cell with stream ID 0."); + } else { + return connection_edge_process_relay_cell_not_open( + &rh, cell, circ, conn, layer_hint); + } + } + + switch (rh.command) { + case RELAY_COMMAND_DROP: + rep_hist_padding_count_read(PADDING_TYPE_DROP); +// log_info(domain,"Got a relay-level padding cell. Dropping."); + return 0; + case RELAY_COMMAND_BEGIN: + case RELAY_COMMAND_BEGIN_DIR: + if (layer_hint && + circ->purpose != CIRCUIT_PURPOSE_S_REND_JOINED) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Relay begin request unsupported at AP. Dropping."); + return 0; + } + if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED && + layer_hint != TO_ORIGIN_CIRCUIT(circ)->cpath->prev) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "Relay begin request to Hidden Service " + "from intermediary node. Dropping."); + return 0; + } + if (conn) { + log_fn(LOG_PROTOCOL_WARN, domain, + "Begin cell for known stream. Dropping."); + return 0; + } + if (rh.command == RELAY_COMMAND_BEGIN_DIR && + circ->purpose != CIRCUIT_PURPOSE_S_REND_JOINED) { + /* Assign this circuit and its app-ward OR connection a unique ID, + * so that we can measure download times. The local edge and dir + * connection will be assigned the same ID when they are created + * and linked. */ + static uint64_t next_id = 0; + circ->dirreq_id = ++next_id; + TO_OR_CIRCUIT(circ)->p_chan->dirreq_id = circ->dirreq_id; + } + return connection_exit_begin_conn(cell, circ); + case RELAY_COMMAND_DATA: + ++stats_n_data_cells_received; + if (( layer_hint && --layer_hint->deliver_window < 0) || + (!layer_hint && --circ->deliver_window < 0)) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "(relay data) circ deliver_window below 0. Killing."); + if (conn) { + /* XXXX Do we actually need to do this? Will killing the circuit + * not send an END and mark the stream for close as appropriate? */ + connection_edge_end(conn, END_STREAM_REASON_TORPROTOCOL); + connection_mark_for_close(TO_CONN(conn)); + } + return -END_CIRC_REASON_TORPROTOCOL; + } + log_debug(domain,"circ deliver_window now %d.", layer_hint ? + layer_hint->deliver_window : circ->deliver_window); + + circuit_consider_sending_sendme(circ, layer_hint); + + if (rh.stream_id == 0) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, "Relay data cell with zero " + "stream_id. Dropping."); + return 0; + } else if (!conn) { + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (connection_half_edge_is_valid_data(ocirc->half_streams, + rh.stream_id)) { + circuit_read_valid_data(ocirc, rh.length); + log_info(domain, + "data cell on circ %u valid on half-closed " + "stream id %d", ocirc->global_identifier, rh.stream_id); + } + } + + log_info(domain,"data cell dropped, unknown stream (streamid %d).", + rh.stream_id); + return 0; + } + + if (--conn->deliver_window < 0) { /* is it below 0 after decrement? */ + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "(relay data) conn deliver_window below 0. Killing."); + return -END_CIRC_REASON_TORPROTOCOL; + } + /* Total all valid application bytes delivered */ + if (CIRCUIT_IS_ORIGIN(circ) && rh.length > 0) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length); + } + + stats_n_data_bytes_received += rh.length; + connection_buf_add((char*)(cell->payload + RELAY_HEADER_SIZE), + rh.length, TO_CONN(conn)); + +#ifdef MEASUREMENTS_21206 + /* Count number of RELAY_DATA cells received on a linked directory + * connection. */ + connection_t *linked_conn = TO_CONN(conn)->linked_conn; + + if (linked_conn && linked_conn->type == CONN_TYPE_DIR) { + ++(TO_DIR_CONN(linked_conn)->data_cells_received); + } +#endif /* defined(MEASUREMENTS_21206) */ + + if (!optimistic_data) { + /* Only send a SENDME if we're not getting optimistic data; otherwise + * a SENDME could arrive before the CONNECTED. + */ + connection_edge_consider_sending_sendme(conn); + } + + return 0; + case RELAY_COMMAND_END: + reason = rh.length > 0 ? + get_uint8(cell->payload+RELAY_HEADER_SIZE) : END_STREAM_REASON_MISC; + if (!conn) { + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (connection_half_edge_is_valid_end(ocirc->half_streams, + rh.stream_id)) { + + circuit_read_valid_data(ocirc, rh.length); + log_info(domain, + "end cell (%s) on circ %u valid on half-closed " + "stream id %d", + stream_end_reason_to_string(reason), + ocirc->global_identifier, rh.stream_id); + return 0; + } + } + log_info(domain,"end cell (%s) dropped, unknown stream.", + stream_end_reason_to_string(reason)); + return 0; + } +/* XXX add to this log_fn the exit node's nickname? */ + log_info(domain,TOR_SOCKET_T_FORMAT": end cell (%s) for stream %d. " + "Removing stream.", + conn->base_.s, + stream_end_reason_to_string(reason), + conn->stream_id); + if (conn->base_.type == CONN_TYPE_AP) { + entry_connection_t *entry_conn = EDGE_TO_ENTRY_CONN(conn); + if (entry_conn->socks_request && + !entry_conn->socks_request->has_finished) + log_warn(LD_BUG, + "open stream hasn't sent socks answer yet? Closing."); + } + /* We just *got* an end; no reason to send one. */ + conn->edge_has_sent_end = 1; + if (!conn->end_reason) + conn->end_reason = reason | END_STREAM_REASON_FLAG_REMOTE; + if (!conn->base_.marked_for_close) { + /* only mark it if not already marked. it's possible to + * get the 'end' right around when the client hangs up on us. */ + connection_mark_and_flush(TO_CONN(conn)); + + /* Total all valid application bytes delivered */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length); + } + } + return 0; + case RELAY_COMMAND_EXTEND: + case RELAY_COMMAND_EXTEND2: { + static uint64_t total_n_extend=0, total_nonearly=0; + total_n_extend++; + if (rh.stream_id) { + log_fn(LOG_PROTOCOL_WARN, domain, + "'extend' cell received for non-zero stream. Dropping."); + return 0; + } + if (cell->command != CELL_RELAY_EARLY && + !networkstatus_get_param(NULL,"AllowNonearlyExtend",0,0,1)) { +#define EARLY_WARNING_INTERVAL 3600 + static ratelim_t early_warning_limit = + RATELIM_INIT(EARLY_WARNING_INTERVAL); + char *m; + if (cell->command == CELL_RELAY) { + ++total_nonearly; + if ((m = rate_limit_log(&early_warning_limit, approx_time()))) { + double percentage = ((double)total_nonearly)/total_n_extend; + percentage *= 100; + log_fn(LOG_PROTOCOL_WARN, domain, "EXTEND cell received, " + "but not via RELAY_EARLY. Dropping.%s", m); + log_fn(LOG_PROTOCOL_WARN, domain, " (We have dropped %.02f%% of " + "all EXTEND cells for this reason)", percentage); + tor_free(m); + } + } else { + log_fn(LOG_WARN, domain, + "EXTEND cell received, in a cell with type %d! Dropping.", + cell->command); + } + return 0; + } + return circuit_extend(cell, circ); + } + case RELAY_COMMAND_EXTENDED: + case RELAY_COMMAND_EXTENDED2: + if (!layer_hint) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "'extended' unsupported at non-origin. Dropping."); + return 0; + } + log_debug(domain,"Got an extended cell! Yay."); + { + extended_cell_t extended_cell; + if (extended_cell_parse(&extended_cell, rh.command, + (const uint8_t*)cell->payload+RELAY_HEADER_SIZE, + rh.length)<0) { + log_warn(LD_PROTOCOL, + "Can't parse EXTENDED cell; killing circuit."); + return -END_CIRC_REASON_TORPROTOCOL; + } + if ((reason = circuit_finish_handshake(TO_ORIGIN_CIRCUIT(circ), + &extended_cell.created_cell)) < 0) { + circuit_mark_for_close(circ, -reason); + return 0; /* We don't want to cause a warning, so we mark the circuit + * here. */ + } + } + if ((reason=circuit_send_next_onion_skin(TO_ORIGIN_CIRCUIT(circ)))<0) { + log_info(domain,"circuit_send_next_onion_skin() failed."); + return reason; + } + /* Total all valid bytes delivered. */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length); + } + return 0; + case RELAY_COMMAND_TRUNCATE: + if (layer_hint) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "'truncate' unsupported at origin. Dropping."); + return 0; + } + if (circ->n_hop) { + if (circ->n_chan) + log_warn(LD_BUG, "n_chan and n_hop set on the same circuit!"); + extend_info_free(circ->n_hop); + circ->n_hop = NULL; + tor_free(circ->n_chan_create_cell); + circuit_set_state(circ, CIRCUIT_STATE_OPEN); + } + if (circ->n_chan) { + uint8_t trunc_reason = get_uint8(cell->payload + RELAY_HEADER_SIZE); ++ circuit_synchronize_written_or_bandwidth(circ, CIRCUIT_N_CHAN); + circuit_clear_cell_queue(circ, circ->n_chan); + channel_send_destroy(circ->n_circ_id, circ->n_chan, + trunc_reason); + circuit_set_n_circid_chan(circ, 0, NULL); + } + log_debug(LD_EXIT, "Processed 'truncate', replying."); + { + char payload[1]; + payload[0] = (char)END_CIRC_REASON_REQUESTED; + relay_send_command_from_edge(0, circ, RELAY_COMMAND_TRUNCATED, + payload, sizeof(payload), NULL); + } + return 0; + case RELAY_COMMAND_TRUNCATED: + if (!layer_hint) { + log_fn(LOG_PROTOCOL_WARN, LD_EXIT, + "'truncated' unsupported at non-origin. Dropping."); + return 0; + } + + /* Count the truncated as valid, for completeness. The + * circuit is being torn down anyway, though. */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), + rh.length); + } + circuit_truncated(TO_ORIGIN_CIRCUIT(circ), + get_uint8(cell->payload + RELAY_HEADER_SIZE)); + return 0; + case RELAY_COMMAND_CONNECTED: + if (conn) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "'connected' unsupported while open. Closing circ."); + return -END_CIRC_REASON_TORPROTOCOL; + } + + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (connection_half_edge_is_valid_connected(ocirc->half_streams, + rh.stream_id)) { + circuit_read_valid_data(ocirc, rh.length); + log_info(domain, + "connected cell on circ %u valid on half-closed " + "stream id %d", ocirc->global_identifier, rh.stream_id); + return 0; + } + } + + log_info(domain, + "'connected' received on circid %u for streamid %d, " + "no conn attached anymore. Ignoring.", + (unsigned)circ->n_circ_id, rh.stream_id); + return 0; + case RELAY_COMMAND_SENDME: + if (!rh.stream_id) { + if (layer_hint) { + if (layer_hint->package_window + CIRCWINDOW_INCREMENT > + CIRCWINDOW_START_MAX) { + static struct ratelim_t exit_warn_ratelim = RATELIM_INIT(600); + log_fn_ratelim(&exit_warn_ratelim, LOG_WARN, LD_PROTOCOL, + "Unexpected sendme cell from exit relay. " + "Closing circ."); + return -END_CIRC_REASON_TORPROTOCOL; + } + layer_hint->package_window += CIRCWINDOW_INCREMENT; + log_debug(LD_APP,"circ-level sendme at origin, packagewindow %d.", + layer_hint->package_window); + circuit_resume_edge_reading(circ, layer_hint); + + /* We count circuit-level sendme's as valid delivered data because + * they are rate limited. + */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), + rh.length); + } + + } else { + if (circ->package_window + CIRCWINDOW_INCREMENT > + CIRCWINDOW_START_MAX) { + static struct ratelim_t client_warn_ratelim = RATELIM_INIT(600); + log_fn_ratelim(&client_warn_ratelim,LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Unexpected sendme cell from client. " + "Closing circ (window %d).", + circ->package_window); + return -END_CIRC_REASON_TORPROTOCOL; + } + circ->package_window += CIRCWINDOW_INCREMENT; + log_debug(LD_APP, + "circ-level sendme at non-origin, packagewindow %d.", + circ->package_window); + circuit_resume_edge_reading(circ, layer_hint); + } + return 0; + } + if (!conn) { + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (connection_half_edge_is_valid_sendme(ocirc->half_streams, + rh.stream_id)) { + circuit_read_valid_data(ocirc, rh.length); + log_info(domain, + "sendme cell on circ %u valid on half-closed " + "stream id %d", ocirc->global_identifier, rh.stream_id); + } + } + + log_info(domain,"sendme cell dropped, unknown stream (streamid %d).", + rh.stream_id); + return 0; + } + + /* Don't allow the other endpoint to request more than our maximum + * (i.e. initial) stream SENDME window worth of data. Well-behaved + * stock clients will not request more than this max (as per the check + * in the while loop of connection_edge_consider_sending_sendme()). + */ + if (conn->package_window + STREAMWINDOW_INCREMENT > + STREAMWINDOW_START_MAX) { + static struct ratelim_t stream_warn_ratelim = RATELIM_INIT(600); + log_fn_ratelim(&stream_warn_ratelim, LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Unexpected stream sendme cell. Closing circ (window %d).", + conn->package_window); + return -END_CIRC_REASON_TORPROTOCOL; + } + + /* At this point, the stream sendme is valid */ + if (CIRCUIT_IS_ORIGIN(circ)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), + rh.length); + } + + conn->package_window += STREAMWINDOW_INCREMENT; + log_debug(domain,"stream-level sendme, packagewindow now %d.", + conn->package_window); + if (circuit_queue_streams_are_blocked(circ)) { + /* Still waiting for queue to flush; don't touch conn */ + return 0; + } + connection_start_reading(TO_CONN(conn)); + /* handle whatever might still be on the inbuf */ + if (connection_edge_package_raw_inbuf(conn, 1, NULL) < 0) { + /* (We already sent an end cell if possible) */ + connection_mark_for_close(TO_CONN(conn)); + return 0; + } + return 0; + case RELAY_COMMAND_RESOLVE: + if (layer_hint) { + log_fn(LOG_PROTOCOL_WARN, LD_APP, + "resolve request unsupported at AP; dropping."); + return 0; + } else if (conn) { + log_fn(LOG_PROTOCOL_WARN, domain, + "resolve request for known stream; dropping."); + return 0; + } else if (circ->purpose != CIRCUIT_PURPOSE_OR) { + log_fn(LOG_PROTOCOL_WARN, domain, + "resolve request on circ with purpose %d; dropping", + circ->purpose); + return 0; + } + connection_exit_begin_resolve(cell, TO_OR_CIRCUIT(circ)); + return 0; + case RELAY_COMMAND_RESOLVED: + if (conn) { + log_fn(LOG_PROTOCOL_WARN, domain, + "'resolved' unsupported while open. Closing circ."); + return -END_CIRC_REASON_TORPROTOCOL; + } + + if (CIRCUIT_IS_ORIGIN(circ)) { + origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ); + if (connection_half_edge_is_valid_resolved(ocirc->half_streams, + rh.stream_id)) { + circuit_read_valid_data(ocirc, rh.length); + log_info(domain, + "resolved cell on circ %u valid on half-closed " + "stream id %d", ocirc->global_identifier, rh.stream_id); + return 0; + } + } + + log_info(domain, + "'resolved' received, no conn attached anymore. Ignoring."); + return 0; + case RELAY_COMMAND_ESTABLISH_INTRO: + case RELAY_COMMAND_ESTABLISH_RENDEZVOUS: + case RELAY_COMMAND_INTRODUCE1: + case RELAY_COMMAND_INTRODUCE2: + case RELAY_COMMAND_INTRODUCE_ACK: + case RELAY_COMMAND_RENDEZVOUS1: + case RELAY_COMMAND_RENDEZVOUS2: + case RELAY_COMMAND_INTRO_ESTABLISHED: + case RELAY_COMMAND_RENDEZVOUS_ESTABLISHED: + rend_process_relay_cell(circ, layer_hint, + rh.command, rh.length, + cell->payload+RELAY_HEADER_SIZE); + return 0; + } + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "Received unknown relay command %d. Perhaps the other side is using " + "a newer version of Tor? Dropping.", + rh.command); + return 0; /* for forward compatibility, don't kill the circuit */ +} + +/** How many relay_data cells have we built, ever? */ +uint64_t stats_n_data_cells_packaged = 0; +/** How many bytes of data have we put in relay_data cells have we built, + * ever? This would be RELAY_PAYLOAD_SIZE*stats_n_data_cells_packaged if + * every relay cell we ever sent were completely full of data. */ +uint64_t stats_n_data_bytes_packaged = 0; +/** How many relay_data cells have we received, ever? */ +uint64_t stats_n_data_cells_received = 0; +/** How many bytes of data have we received relay_data cells, ever? This would + * be RELAY_PAYLOAD_SIZE*stats_n_data_cells_packaged if every relay cell we + * ever received were completely full of data. */ +uint64_t stats_n_data_bytes_received = 0; + +/** If <b>conn</b> has an entire relay payload of bytes on its inbuf (or + * <b>package_partial</b> is true), and the appropriate package windows aren't + * empty, grab a cell and send it down the circuit. + * + * If *<b>max_cells</b> is given, package no more than max_cells. Decrement + * *<b>max_cells</b> by the number of cells packaged. + * + * Return -1 (and send a RELAY_COMMAND_END cell if necessary) if conn should + * be marked for close, else return 0. + */ +int +connection_edge_package_raw_inbuf(edge_connection_t *conn, int package_partial, + int *max_cells) +{ + size_t bytes_to_process, length; + char payload[CELL_PAYLOAD_SIZE]; + circuit_t *circ; + const unsigned domain = conn->base_.type == CONN_TYPE_AP ? LD_APP : LD_EXIT; + int sending_from_optimistic = 0; + entry_connection_t *entry_conn = + conn->base_.type == CONN_TYPE_AP ? EDGE_TO_ENTRY_CONN(conn) : NULL; + const int sending_optimistically = + entry_conn && + conn->base_.type == CONN_TYPE_AP && + conn->base_.state != AP_CONN_STATE_OPEN; + crypt_path_t *cpath_layer = conn->cpath_layer; + + tor_assert(conn); + + if (conn->base_.marked_for_close) { + log_warn(LD_BUG, + "called on conn that's already marked for close at %s:%d.", + conn->base_.marked_for_close_file, conn->base_.marked_for_close); + return 0; + } + + if (max_cells && *max_cells <= 0) + return 0; + + repeat_connection_edge_package_raw_inbuf: + + circ = circuit_get_by_edge_conn(conn); + if (!circ) { + log_info(domain,"conn has no circuit! Closing."); + conn->end_reason = END_STREAM_REASON_CANT_ATTACH; + return -1; + } + + if (circuit_consider_stop_edge_reading(circ, cpath_layer)) + return 0; + + if (conn->package_window <= 0) { + log_info(domain,"called with package_window %d. Skipping.", + conn->package_window); + connection_stop_reading(TO_CONN(conn)); + return 0; + } + + sending_from_optimistic = entry_conn && + entry_conn->sending_optimistic_data != NULL; + + if (PREDICT_UNLIKELY(sending_from_optimistic)) { + bytes_to_process = buf_datalen(entry_conn->sending_optimistic_data); + if (PREDICT_UNLIKELY(!bytes_to_process)) { + log_warn(LD_BUG, "sending_optimistic_data was non-NULL but empty"); + bytes_to_process = connection_get_inbuf_len(TO_CONN(conn)); + sending_from_optimistic = 0; + } + } else { + bytes_to_process = connection_get_inbuf_len(TO_CONN(conn)); + } + + if (!bytes_to_process) + return 0; + + if (!package_partial && bytes_to_process < RELAY_PAYLOAD_SIZE) + return 0; + + if (bytes_to_process > RELAY_PAYLOAD_SIZE) { + length = RELAY_PAYLOAD_SIZE; + } else { + length = bytes_to_process; + } + stats_n_data_bytes_packaged += length; + stats_n_data_cells_packaged += 1; + + if (PREDICT_UNLIKELY(sending_from_optimistic)) { + /* XXXX We could be more efficient here by sometimes packing + * previously-sent optimistic data in the same cell with data + * from the inbuf. */ + buf_get_bytes(entry_conn->sending_optimistic_data, payload, length); + if (!buf_datalen(entry_conn->sending_optimistic_data)) { + buf_free(entry_conn->sending_optimistic_data); + entry_conn->sending_optimistic_data = NULL; + } + } else { + connection_buf_get_bytes(payload, length, TO_CONN(conn)); + } + + log_debug(domain,TOR_SOCKET_T_FORMAT": Packaging %d bytes (%d waiting).", + conn->base_.s, + (int)length, (int)connection_get_inbuf_len(TO_CONN(conn))); + + if (sending_optimistically && !sending_from_optimistic) { + /* This is new optimistic data; remember it in case we need to detach and + retry */ + if (!entry_conn->pending_optimistic_data) + entry_conn->pending_optimistic_data = buf_new(); + buf_add(entry_conn->pending_optimistic_data, payload, length); + } + + if (connection_edge_send_command(conn, RELAY_COMMAND_DATA, + payload, length) < 0 ) { + /* circuit got marked for close, don't continue, don't need to mark conn */ + return 0; + } + + if (!cpath_layer) { /* non-rendezvous exit */ + tor_assert(circ->package_window > 0); + circ->package_window--; + } else { /* we're an AP, or an exit on a rendezvous circ */ + tor_assert(cpath_layer->package_window > 0); + cpath_layer->package_window--; + } + + if (--conn->package_window <= 0) { /* is it 0 after decrement? */ + connection_stop_reading(TO_CONN(conn)); + log_debug(domain,"conn->package_window reached 0."); + circuit_consider_stop_edge_reading(circ, cpath_layer); + return 0; /* don't process the inbuf any more */ + } + log_debug(domain,"conn->package_window is now %d",conn->package_window); + + if (max_cells) { + *max_cells -= 1; + if (*max_cells <= 0) + return 0; + } + + /* handle more if there's more, or return 0 if there isn't */ + goto repeat_connection_edge_package_raw_inbuf; +} + +/** Called when we've just received a relay data cell, when + * we've just finished flushing all bytes to stream <b>conn</b>, + * or when we've flushed *some* bytes to the stream <b>conn</b>. + * + * If conn->outbuf is not too full, and our deliver window is + * low, send back a suitable number of stream-level sendme cells. + */ +void +connection_edge_consider_sending_sendme(edge_connection_t *conn) +{ + circuit_t *circ; + + if (connection_outbuf_too_full(TO_CONN(conn))) + return; + + circ = circuit_get_by_edge_conn(conn); + if (!circ) { + /* this can legitimately happen if the destroy has already + * arrived and torn down the circuit */ + log_info(LD_APP,"No circuit associated with conn. Skipping."); + return; + } + + while (conn->deliver_window <= STREAMWINDOW_START - STREAMWINDOW_INCREMENT) { + log_debug(conn->base_.type == CONN_TYPE_AP ?LD_APP:LD_EXIT, + "Outbuf %d, Queuing stream sendme.", + (int)conn->base_.outbuf_flushlen); + conn->deliver_window += STREAMWINDOW_INCREMENT; + if (connection_edge_send_command(conn, RELAY_COMMAND_SENDME, + NULL, 0) < 0) { + log_warn(LD_APP,"connection_edge_send_command failed. Skipping."); + return; /* the circuit's closed, don't continue */ + } + } +} + +/** The circuit <b>circ</b> has received a circuit-level sendme + * (on hop <b>layer_hint</b>, if we're the OP). Go through all the + * attached streams and let them resume reading and packaging, if + * their stream windows allow it. + */ +static void +circuit_resume_edge_reading(circuit_t *circ, crypt_path_t *layer_hint) +{ + if (circuit_queue_streams_are_blocked(circ)) { + log_debug(layer_hint?LD_APP:LD_EXIT,"Too big queue, no resuming"); + return; + } + log_debug(layer_hint?LD_APP:LD_EXIT,"resuming"); + + if (CIRCUIT_IS_ORIGIN(circ)) + circuit_resume_edge_reading_helper(TO_ORIGIN_CIRCUIT(circ)->p_streams, + circ, layer_hint); + else + circuit_resume_edge_reading_helper(TO_OR_CIRCUIT(circ)->n_streams, + circ, layer_hint); +} + +void +stream_choice_seed_weak_rng(void) +{ + crypto_seed_weak_rng(&stream_choice_rng); +} + +/** A helper function for circuit_resume_edge_reading() above. + * The arguments are the same, except that <b>conn</b> is the head + * of a linked list of edge streams that should each be considered. + */ +static int +circuit_resume_edge_reading_helper(edge_connection_t *first_conn, + circuit_t *circ, + crypt_path_t *layer_hint) +{ + edge_connection_t *conn; + int n_packaging_streams, n_streams_left; + int packaged_this_round; + int cells_on_queue; + int cells_per_conn; + edge_connection_t *chosen_stream = NULL; + int max_to_package; + + if (first_conn == NULL) { + /* Don't bother to try to do the rest of this if there are no connections + * to resume. */ + return 0; + } + + /* How many cells do we have space for? It will be the minimum of + * the number needed to exhaust the package window, and the minimum + * needed to fill the cell queue. */ + max_to_package = circ->package_window; + if (CIRCUIT_IS_ORIGIN(circ)) { + cells_on_queue = circ->n_chan_cells.n; + } else { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + cells_on_queue = or_circ->p_chan_cells.n; + } + if (CELL_QUEUE_HIGHWATER_SIZE - cells_on_queue < max_to_package) + max_to_package = CELL_QUEUE_HIGHWATER_SIZE - cells_on_queue; + + /* Once we used to start listening on the streams in the order they + * appeared in the linked list. That leads to starvation on the + * streams that appeared later on the list, since the first streams + * would always get to read first. Instead, we just pick a random + * stream on the list, and enable reading for streams starting at that + * point (and wrapping around as if the list were circular). It would + * probably be better to actually remember which streams we've + * serviced in the past, but this is simple and effective. */ + + /* Select a stream uniformly at random from the linked list. We + * don't need cryptographic randomness here. */ + { + int num_streams = 0; + for (conn = first_conn; conn; conn = conn->next_stream) { + num_streams++; + if (tor_weak_random_one_in_n(&stream_choice_rng, num_streams)) { + chosen_stream = conn; + } + /* Invariant: chosen_stream has been chosen uniformly at random from + * among the first num_streams streams on first_conn. + * + * (Note that we iterate over every stream on the circuit, so that after + * we've considered the first stream, we've chosen it with P=1; and + * after we consider the second stream, we've switched to it with P=1/2 + * and stayed with the first stream with P=1/2; and after we've + * considered the third stream, we've switched to it with P=1/3 and + * remained with one of the first two streams with P=(2/3), giving each + * one P=(1/2)(2/3) )=(1/3).) */ + } + } + + /* Count how many non-marked streams there are that have anything on + * their inbuf, and enable reading on all of the connections. */ + n_packaging_streams = 0; + /* Activate reading starting from the chosen stream */ + for (conn=chosen_stream; conn; conn = conn->next_stream) { + /* Start reading for the streams starting from here */ + if (conn->base_.marked_for_close || conn->package_window <= 0) + continue; + if (!layer_hint || conn->cpath_layer == layer_hint) { + connection_start_reading(TO_CONN(conn)); + + if (connection_get_inbuf_len(TO_CONN(conn)) > 0) + ++n_packaging_streams; + } + } + /* Go back and do the ones we skipped, circular-style */ + for (conn = first_conn; conn != chosen_stream; conn = conn->next_stream) { + if (conn->base_.marked_for_close || conn->package_window <= 0) + continue; + if (!layer_hint || conn->cpath_layer == layer_hint) { + connection_start_reading(TO_CONN(conn)); + + if (connection_get_inbuf_len(TO_CONN(conn)) > 0) + ++n_packaging_streams; + } + } + + if (n_packaging_streams == 0) /* avoid divide-by-zero */ + return 0; + + again: + + cells_per_conn = CEIL_DIV(max_to_package, n_packaging_streams); + + packaged_this_round = 0; + n_streams_left = 0; + + /* Iterate over all connections. Package up to cells_per_conn cells on + * each. Update packaged_this_round with the total number of cells + * packaged, and n_streams_left with the number that still have data to + * package. + */ + for (conn=first_conn; conn; conn=conn->next_stream) { + if (conn->base_.marked_for_close || conn->package_window <= 0) + continue; + if (!layer_hint || conn->cpath_layer == layer_hint) { + int n = cells_per_conn, r; + /* handle whatever might still be on the inbuf */ + r = connection_edge_package_raw_inbuf(conn, 1, &n); + + /* Note how many we packaged */ + packaged_this_round += (cells_per_conn-n); + + if (r<0) { + /* Problem while packaging. (We already sent an end cell if + * possible) */ + connection_mark_for_close(TO_CONN(conn)); + continue; + } + + /* If there's still data to read, we'll be coming back to this stream. */ + if (connection_get_inbuf_len(TO_CONN(conn))) + ++n_streams_left; + + /* If the circuit won't accept any more data, return without looking + * at any more of the streams. Any connections that should be stopped + * have already been stopped by connection_edge_package_raw_inbuf. */ + if (circuit_consider_stop_edge_reading(circ, layer_hint)) + return -1; + /* XXXX should we also stop immediately if we fill up the cell queue? + * Probably. */ + } + } + + /* If we made progress, and we are willing to package more, and there are + * any streams left that want to package stuff... try again! + */ + if (packaged_this_round && packaged_this_round < max_to_package && + n_streams_left) { + max_to_package -= packaged_this_round; + n_packaging_streams = n_streams_left; + goto again; + } + + return 0; +} + +/** Check if the package window for <b>circ</b> is empty (at + * hop <b>layer_hint</b> if it's defined). + * + * If yes, tell edge streams to stop reading and return 1. + * Else return 0. + */ +static int +circuit_consider_stop_edge_reading(circuit_t *circ, crypt_path_t *layer_hint) +{ + edge_connection_t *conn = NULL; + unsigned domain = layer_hint ? LD_APP : LD_EXIT; + + if (!layer_hint) { + or_circuit_t *or_circ = TO_OR_CIRCUIT(circ); + log_debug(domain,"considering circ->package_window %d", + circ->package_window); + if (circ->package_window <= 0) { + log_debug(domain,"yes, not-at-origin. stopped."); + for (conn = or_circ->n_streams; conn; conn=conn->next_stream) + connection_stop_reading(TO_CONN(conn)); + return 1; + } + return 0; + } + /* else, layer hint is defined, use it */ + log_debug(domain,"considering layer_hint->package_window %d", + layer_hint->package_window); + if (layer_hint->package_window <= 0) { + log_debug(domain,"yes, at-origin. stopped."); + for (conn = TO_ORIGIN_CIRCUIT(circ)->p_streams; conn; + conn=conn->next_stream) { + if (conn->cpath_layer == layer_hint) + connection_stop_reading(TO_CONN(conn)); + } + return 1; + } + return 0; +} + +/** Check if the deliver_window for circuit <b>circ</b> (at hop + * <b>layer_hint</b> if it's defined) is low enough that we should + * send a circuit-level sendme back down the circuit. If so, send + * enough sendmes that the window would be overfull if we sent any + * more. + */ +static void +circuit_consider_sending_sendme(circuit_t *circ, crypt_path_t *layer_hint) +{ +// log_fn(LOG_INFO,"Considering: layer_hint is %s", +// layer_hint ? "defined" : "null"); + while ((layer_hint ? layer_hint->deliver_window : circ->deliver_window) <= + CIRCWINDOW_START - CIRCWINDOW_INCREMENT) { + log_debug(LD_CIRC,"Queuing circuit sendme."); + if (layer_hint) + layer_hint->deliver_window += CIRCWINDOW_INCREMENT; + else + circ->deliver_window += CIRCWINDOW_INCREMENT; + if (relay_send_command_from_edge(0, circ, RELAY_COMMAND_SENDME, + NULL, 0, layer_hint) < 0) { + log_warn(LD_CIRC, + "relay_send_command_from_edge failed. Circuit's closed."); + return; /* the circuit's closed, don't continue */ + } + } +} + +/** The total number of cells we have allocated. */ +static size_t total_cells_allocated = 0; + +/** Release storage held by <b>cell</b>. */ +static inline void +packed_cell_free_unchecked(packed_cell_t *cell) +{ + --total_cells_allocated; + tor_free(cell); +} + +/** Allocate and return a new packed_cell_t. */ +STATIC packed_cell_t * +packed_cell_new(void) +{ + ++total_cells_allocated; + return tor_malloc_zero(sizeof(packed_cell_t)); +} + +/** Return a packed cell used outside by channel_t lower layer */ +void +packed_cell_free_(packed_cell_t *cell) +{ + if (!cell) + return; + packed_cell_free_unchecked(cell); +} + +/** Log current statistics for cell pool allocation at log level + * <b>severity</b>. */ +void +dump_cell_pool_usage(int severity) +{ + int n_circs = 0; + int n_cells = 0; + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, c) { + n_cells += c->n_chan_cells.n; + if (!CIRCUIT_IS_ORIGIN(c)) + n_cells += TO_OR_CIRCUIT(c)->p_chan_cells.n; + ++n_circs; + } + SMARTLIST_FOREACH_END(c); + tor_log(severity, LD_MM, + "%d cells allocated on %d circuits. %d cells leaked.", + n_cells, n_circs, (int)total_cells_allocated - n_cells); +} + +/** Allocate a new copy of packed <b>cell</b>. */ +static inline packed_cell_t * +packed_cell_copy(const cell_t *cell, int wide_circ_ids) +{ + packed_cell_t *c = packed_cell_new(); + cell_pack(c, cell, wide_circ_ids); + return c; +} + +/** Append <b>cell</b> to the end of <b>queue</b>. */ +void +cell_queue_append(cell_queue_t *queue, packed_cell_t *cell) +{ + TOR_SIMPLEQ_INSERT_TAIL(&queue->head, cell, next); + ++queue->n; +} + +/** Append a newly allocated copy of <b>cell</b> to the end of the + * <b>exitward</b> (or app-ward) <b>queue</b> of <b>circ</b>. If + * <b>use_stats</b> is true, record statistics about the cell. + */ +void +cell_queue_append_packed_copy(circuit_t *circ, cell_queue_t *queue, + int exitward, const cell_t *cell, + int wide_circ_ids, int use_stats) +{ + packed_cell_t *copy = packed_cell_copy(cell, wide_circ_ids); + (void)circ; + (void)exitward; + (void)use_stats; + + copy->inserted_timestamp = monotime_coarse_get_stamp(); + + cell_queue_append(queue, copy); +} + +/** Initialize <b>queue</b> as an empty cell queue. */ +void +cell_queue_init(cell_queue_t *queue) +{ + memset(queue, 0, sizeof(cell_queue_t)); + TOR_SIMPLEQ_INIT(&queue->head); +} + +/** Remove and free every cell in <b>queue</b>. */ +void +cell_queue_clear(cell_queue_t *queue) +{ + packed_cell_t *cell; + while ((cell = TOR_SIMPLEQ_FIRST(&queue->head))) { + TOR_SIMPLEQ_REMOVE_HEAD(&queue->head, next); + packed_cell_free_unchecked(cell); + } + TOR_SIMPLEQ_INIT(&queue->head); + queue->n = 0; +} + +/** Extract and return the cell at the head of <b>queue</b>; return NULL if + * <b>queue</b> is empty. */ +STATIC packed_cell_t * +cell_queue_pop(cell_queue_t *queue) +{ + packed_cell_t *cell = TOR_SIMPLEQ_FIRST(&queue->head); + if (!cell) + return NULL; + TOR_SIMPLEQ_REMOVE_HEAD(&queue->head, next); + --queue->n; + return cell; +} + +/** Initialize <b>queue</b> as an empty cell queue. */ +void +destroy_cell_queue_init(destroy_cell_queue_t *queue) +{ + memset(queue, 0, sizeof(destroy_cell_queue_t)); + TOR_SIMPLEQ_INIT(&queue->head); +} + +/** Remove and free every cell in <b>queue</b>. */ +void +destroy_cell_queue_clear(destroy_cell_queue_t *queue) +{ + destroy_cell_t *cell; + while ((cell = TOR_SIMPLEQ_FIRST(&queue->head))) { + TOR_SIMPLEQ_REMOVE_HEAD(&queue->head, next); + tor_free(cell); + } + TOR_SIMPLEQ_INIT(&queue->head); + queue->n = 0; +} + +/** Extract and return the cell at the head of <b>queue</b>; return NULL if + * <b>queue</b> is empty. */ +STATIC destroy_cell_t * +destroy_cell_queue_pop(destroy_cell_queue_t *queue) +{ + destroy_cell_t *cell = TOR_SIMPLEQ_FIRST(&queue->head); + if (!cell) + return NULL; + TOR_SIMPLEQ_REMOVE_HEAD(&queue->head, next); + --queue->n; + return cell; +} + +/** Append a destroy cell for <b>circid</b> to <b>queue</b>. */ +void +destroy_cell_queue_append(destroy_cell_queue_t *queue, + circid_t circid, + uint8_t reason) +{ + destroy_cell_t *cell = tor_malloc_zero(sizeof(destroy_cell_t)); + cell->circid = circid; + cell->reason = reason; + /* Not yet used, but will be required for OOM handling. */ + cell->inserted_timestamp = monotime_coarse_get_stamp(); + + TOR_SIMPLEQ_INSERT_TAIL(&queue->head, cell, next); + ++queue->n; +} + +/** Convert a destroy_cell_t to a newly allocated cell_t. Frees its input. */ +static packed_cell_t * +destroy_cell_to_packed_cell(destroy_cell_t *inp, int wide_circ_ids) +{ + packed_cell_t *packed = packed_cell_new(); + cell_t cell; + memset(&cell, 0, sizeof(cell)); + cell.circ_id = inp->circid; + cell.command = CELL_DESTROY; + cell.payload[0] = inp->reason; + cell_pack(packed, &cell, wide_circ_ids); + + tor_free(inp); + return packed; +} + +/** Return the total number of bytes used for each packed_cell in a queue. + * Approximate. */ +size_t +packed_cell_mem_cost(void) +{ + return sizeof(packed_cell_t); +} + +/* DOCDOC */ +size_t +cell_queues_get_total_allocation(void) +{ + return total_cells_allocated * packed_cell_mem_cost(); +} + +/** How long after we've been low on memory should we try to conserve it? */ +#define MEMORY_PRESSURE_INTERVAL (30*60) + +/** The time at which we were last low on memory. */ +static time_t last_time_under_memory_pressure = 0; + +/** Check whether we've got too much space used for cells. If so, + * call the OOM handler and return 1. Otherwise, return 0. */ +STATIC int +cell_queues_check_size(void) +{ + time_t now = time(NULL); + size_t alloc = cell_queues_get_total_allocation(); + alloc += buf_get_total_allocation(); + alloc += tor_compress_get_total_allocation(); + const size_t rend_cache_total = rend_cache_get_total_allocation(); + alloc += rend_cache_total; + const size_t geoip_client_cache_total = + geoip_client_cache_total_allocation(); + alloc += geoip_client_cache_total; + const size_t dns_cache_total = dns_cache_total_allocation(); + alloc += dns_cache_total; + if (alloc >= get_options()->MaxMemInQueues_low_threshold) { + last_time_under_memory_pressure = approx_time(); + if (alloc >= get_options()->MaxMemInQueues) { + /* If we're spending over 20% of the memory limit on hidden service + * descriptors, free them until we're down to 10%. Do the same for geoip + * client cache. */ + if (rend_cache_total > get_options()->MaxMemInQueues / 5) { + const size_t bytes_to_remove = + rend_cache_total - (size_t)(get_options()->MaxMemInQueues / 10); + alloc -= hs_cache_handle_oom(now, bytes_to_remove); + } + if (geoip_client_cache_total > get_options()->MaxMemInQueues / 5) { + const size_t bytes_to_remove = + geoip_client_cache_total - + (size_t)(get_options()->MaxMemInQueues / 10); + alloc -= geoip_client_cache_handle_oom(now, bytes_to_remove); + } + if (dns_cache_total > get_options()->MaxMemInQueues / 5) { + const size_t bytes_to_remove = + dns_cache_total - (size_t)(get_options()->MaxMemInQueues / 10); + alloc -= dns_cache_handle_oom(now, bytes_to_remove); + } + circuits_handle_oom(alloc); + return 1; + } + } + return 0; +} + +/** Return true if we've been under memory pressure in the last + * MEMORY_PRESSURE_INTERVAL seconds. */ +int +have_been_under_memory_pressure(void) +{ + return last_time_under_memory_pressure + MEMORY_PRESSURE_INTERVAL + < approx_time(); +} + +/** + * Update the number of cells available on the circuit's n_chan or p_chan's + * circuit mux. + */ +void +update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction, + const char *file, int lineno) +{ + channel_t *chan = NULL; + or_circuit_t *or_circ = NULL; + circuitmux_t *cmux = NULL; + + tor_assert(circ); + + /* Okay, get the channel */ + if (direction == CELL_DIRECTION_OUT) { + chan = circ->n_chan; + } else { + or_circ = TO_OR_CIRCUIT(circ); + chan = or_circ->p_chan; + } + + tor_assert(chan); + tor_assert(chan->cmux); + + /* Now get the cmux */ + cmux = chan->cmux; + + /* Cmux sanity check */ + if (! circuitmux_is_circuit_attached(cmux, circ)) { + log_warn(LD_BUG, "called on non-attached circuit from %s:%d", + file, lineno); + return; + } + tor_assert(circuitmux_attached_circuit_direction(cmux, circ) == direction); + + /* Update the number of cells we have for the circuit mux */ + if (direction == CELL_DIRECTION_OUT) { + circuitmux_set_num_cells(cmux, circ, circ->n_chan_cells.n); + } else { + circuitmux_set_num_cells(cmux, circ, or_circ->p_chan_cells.n); + } +} + +/** Remove all circuits from the cmux on <b>chan</b>. + * + * If <b>circuits_out</b> is non-NULL, add all detached circuits to + * <b>circuits_out</b>. + **/ +void +channel_unlink_all_circuits(channel_t *chan, smartlist_t *circuits_out) +{ + tor_assert(chan); + tor_assert(chan->cmux); + + circuitmux_detach_all_circuits(chan->cmux, circuits_out); + chan->num_n_circuits = 0; + chan->num_p_circuits = 0; +} + +/** Block (if <b>block</b> is true) or unblock (if <b>block</b> is false) + * every edge connection that is using <b>circ</b> to write to <b>chan</b>, + * and start or stop reading as appropriate. + * + * If <b>stream_id</b> is nonzero, block only the edge connection whose + * stream_id matches it. + * + * Returns the number of streams whose status we changed. + */ +static int +set_streams_blocked_on_circ(circuit_t *circ, channel_t *chan, + int block, streamid_t stream_id) +{ + edge_connection_t *edge = NULL; + int n = 0; + if (circ->n_chan == chan) { + circ->streams_blocked_on_n_chan = block; + if (CIRCUIT_IS_ORIGIN(circ)) + edge = TO_ORIGIN_CIRCUIT(circ)->p_streams; + } else { + circ->streams_blocked_on_p_chan = block; + tor_assert(!CIRCUIT_IS_ORIGIN(circ)); + edge = TO_OR_CIRCUIT(circ)->n_streams; + } + + for (; edge; edge = edge->next_stream) { + connection_t *conn = TO_CONN(edge); + if (stream_id && edge->stream_id != stream_id) + continue; + + if (edge->edge_blocked_on_circ != block) { + ++n; + edge->edge_blocked_on_circ = block; + } + + if (!conn->read_event) { + /* This connection is a placeholder for something; probably a DNS + * request. It can't actually stop or start reading.*/ + continue; + } + + if (block) { + if (connection_is_reading(conn)) + connection_stop_reading(conn); + } else { + /* Is this right? */ + if (!connection_is_reading(conn)) + connection_start_reading(conn); + } + } + + return n; +} + +/** Extract the command from a packed cell. */ +static uint8_t +packed_cell_get_command(const packed_cell_t *cell, int wide_circ_ids) +{ + if (wide_circ_ids) { + return get_uint8(cell->body+4); + } else { + return get_uint8(cell->body+2); + } +} + +/** Extract the circuit ID from a packed cell. */ +circid_t +packed_cell_get_circid(const packed_cell_t *cell, int wide_circ_ids) +{ + if (wide_circ_ids) { + return ntohl(get_uint32(cell->body)); + } else { + return ntohs(get_uint16(cell->body)); + } +} + +/** Pull as many cells as possible (but no more than <b>max</b>) from the + * queue of the first active circuit on <b>chan</b>, and write them to + * <b>chan</b>->outbuf. Return the number of cells written. Advance + * the active circuit pointer to the next active circuit in the ring. */ +MOCK_IMPL(int, +channel_flush_from_first_active_circuit, (channel_t *chan, int max)) +{ + circuitmux_t *cmux = NULL; + int n_flushed = 0; + cell_queue_t *queue; + destroy_cell_queue_t *destroy_queue=NULL; + circuit_t *circ; + or_circuit_t *or_circ; + int streams_blocked; + packed_cell_t *cell; + + /* Get the cmux */ + tor_assert(chan); + tor_assert(chan->cmux); + cmux = chan->cmux; + + /* Main loop: pick a circuit, send a cell, update the cmux */ + while (n_flushed < max) { + circ = circuitmux_get_first_active_circuit(cmux, &destroy_queue); + if (destroy_queue) { + destroy_cell_t *dcell; + /* this code is duplicated from some of the logic below. Ugly! XXXX */ + /* If we are given a destroy_queue here, then it is required to be + * nonempty... */ + tor_assert(destroy_queue->n > 0); + dcell = destroy_cell_queue_pop(destroy_queue); + /* ...and pop() will always yield a cell from a nonempty queue. */ + tor_assert(dcell); + /* frees dcell */ + cell = destroy_cell_to_packed_cell(dcell, chan->wide_circ_ids); + /* Send the DESTROY cell. It is very unlikely that this fails but just + * in case, get rid of the channel. */ + if (channel_write_packed_cell(chan, cell) < 0) { + /* The cell has been freed. */ + channel_mark_for_close(chan); + continue; + } + /* Update the cmux destroy counter */ + circuitmux_notify_xmit_destroy(cmux); + cell = NULL; + ++n_flushed; + continue; + } + /* If it returns NULL, no cells left to send */ + if (!circ) break; + + if (circ->n_chan == chan) { + queue = &circ->n_chan_cells; + streams_blocked = circ->streams_blocked_on_n_chan; + } else { + or_circ = TO_OR_CIRCUIT(circ); + tor_assert(or_circ->p_chan == chan); + queue = &TO_OR_CIRCUIT(circ)->p_chan_cells; + streams_blocked = circ->streams_blocked_on_p_chan; + } + + /* Circuitmux told us this was active, so it should have cells */ + if (/*BUG(*/ queue->n == 0 /*)*/) { + log_warn(LD_BUG, "Found a supposedly active circuit with no cells " + "to send. Trying to recover."); + circuitmux_set_num_cells(cmux, circ, 0); + if (! circ->marked_for_close) + circuit_mark_for_close(circ, END_CIRC_REASON_INTERNAL); + continue; + } + + tor_assert(queue->n > 0); + + /* + * Get just one cell here; once we've sent it, that can change the circuit + * selection, so we have to loop around for another even if this circuit + * has more than one. + */ + cell = cell_queue_pop(queue); + + /* Calculate the exact time that this cell has spent in the queue. */ + if (get_options()->CellStatistics || + get_options()->TestingEnableCellStatsEvent) { + uint32_t timestamp_now = monotime_coarse_get_stamp(); + uint32_t msec_waiting = + (uint32_t) monotime_coarse_stamp_units_to_approx_msec( + timestamp_now - cell->inserted_timestamp); + + if (get_options()->CellStatistics && !CIRCUIT_IS_ORIGIN(circ)) { + or_circ = TO_OR_CIRCUIT(circ); + or_circ->total_cell_waiting_time += msec_waiting; + or_circ->processed_cells++; + } + + if (get_options()->TestingEnableCellStatsEvent) { + uint8_t command = packed_cell_get_command(cell, chan->wide_circ_ids); + + testing_cell_stats_entry_t *ent = + tor_malloc_zero(sizeof(testing_cell_stats_entry_t)); + ent->command = command; + ent->waiting_time = msec_waiting / 10; + ent->removed = 1; + if (circ->n_chan == chan) + ent->exitward = 1; + if (!circ->testing_cell_stats) + circ->testing_cell_stats = smartlist_new(); + smartlist_add(circ->testing_cell_stats, ent); + } + } + + /* If we just flushed our queue and this circuit is used for a + * tunneled directory request, possibly advance its state. */ + if (queue->n == 0 && chan->dirreq_id) + geoip_change_dirreq_state(chan->dirreq_id, + DIRREQ_TUNNELED, + DIRREQ_CIRC_QUEUE_FLUSHED); + + /* Now send the cell. It is very unlikely that this fails but just in + * case, get rid of the channel. */ + if (channel_write_packed_cell(chan, cell) < 0) { + /* The cell has been freed at this point. */ + channel_mark_for_close(chan); + continue; + } + cell = NULL; + + /* + * Don't packed_cell_free_unchecked(cell) here because the channel will + * do so when it gets out of the channel queue (probably already did, in + * which case that was an immediate double-free bug). + */ + + /* Update the counter */ + ++n_flushed; + + /* + * Now update the cmux; tell it we've just sent a cell, and how many + * we have left. + */ + circuitmux_notify_xmit_cells(cmux, circ, 1); + circuitmux_set_num_cells(cmux, circ, queue->n); + if (queue->n == 0) + log_debug(LD_GENERAL, "Made a circuit inactive."); + + /* Is the cell queue low enough to unblock all the streams that are waiting + * to write to this circuit? */ + if (streams_blocked && queue->n <= CELL_QUEUE_LOWWATER_SIZE) + set_streams_blocked_on_circ(circ, chan, 0, 0); /* unblock streams */ + + /* If n_flushed < max still, loop around and pick another circuit */ + } + + /* Okay, we're done sending now */ + return n_flushed; +} + +/* Minimum value is the maximum circuit window size. + * + * SENDME cells makes it that we can control how many cells can be inflight on + * a circuit from end to end. This logic makes it that on any circuit cell + * queue, we have a maximum of cells possible. + * + * Because the Tor protocol allows for a client to exit at any hop in a + * circuit and a circuit can be of a maximum of 8 hops, so in theory the + * normal worst case will be the circuit window start value times the maximum + * number of hops (8). Having more cells then that means something is wrong. + * + * However, because padding cells aren't counted in the package window, we set + * the maximum size to a reasonably large size for which we expect that we'll + * never reach in theory. And if we ever do because of future changes, we'll + * be able to control it with a consensus parameter. + * + * XXX: Unfortunately, END cells aren't accounted for in the circuit window + * which means that for instance if a client opens 8001 streams, the 8001 + * following END cells will queue up in the circuit which will get closed if + * the max limit is 8000. Which is sad because it is allowed by the Tor + * protocol. But, we need an upper bound on circuit queue in order to avoid + * DoS memory pressure so the default size is a middle ground between not + * having any limit and having a very restricted one. This is why we can also + * control it through a consensus parameter. */ +#define RELAY_CIRC_CELL_QUEUE_SIZE_MIN CIRCWINDOW_START_MAX +/* We can't have a consensus parameter above this value. */ +#define RELAY_CIRC_CELL_QUEUE_SIZE_MAX INT32_MAX +/* Default value is set to a large value so we can handle padding cells + * properly which aren't accounted for in the SENDME window. Default is 50000 + * allowed cells in the queue resulting in ~25MB. */ +#define RELAY_CIRC_CELL_QUEUE_SIZE_DEFAULT \ + (50 * RELAY_CIRC_CELL_QUEUE_SIZE_MIN) + +/* The maximum number of cell a circuit queue can contain. This is updated at + * every new consensus and controlled by a parameter. */ +static int32_t max_circuit_cell_queue_size = + RELAY_CIRC_CELL_QUEUE_SIZE_DEFAULT; + +/* Called when the consensus has changed. At this stage, the global consensus + * object has NOT been updated. It is called from + * notify_before_networkstatus_changes(). */ +void +relay_consensus_has_changed(const networkstatus_t *ns) +{ + tor_assert(ns); + + /* Update the circuit max cell queue size from the consensus. */ + max_circuit_cell_queue_size = + networkstatus_get_param(ns, "circ_max_cell_queue_size", + RELAY_CIRC_CELL_QUEUE_SIZE_DEFAULT, + RELAY_CIRC_CELL_QUEUE_SIZE_MIN, + RELAY_CIRC_CELL_QUEUE_SIZE_MAX); +} + +/** Add <b>cell</b> to the queue of <b>circ</b> writing to <b>chan</b> + * transmitting in <b>direction</b>. + * + * The given <b>cell</b> is copied onto the circuit queue so the caller must + * cleanup the memory. + * + * This function is part of the fast path. */ +void +append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan, + cell_t *cell, cell_direction_t direction, + streamid_t fromstream) +{ + or_circuit_t *orcirc = NULL; + cell_queue_t *queue; + int streams_blocked; + int exitward; + if (circ->marked_for_close) + return; + + exitward = (direction == CELL_DIRECTION_OUT); + if (exitward) { + queue = &circ->n_chan_cells; + streams_blocked = circ->streams_blocked_on_n_chan; + } else { + orcirc = TO_OR_CIRCUIT(circ); + queue = &orcirc->p_chan_cells; + streams_blocked = circ->streams_blocked_on_p_chan; + } + + if (PREDICT_UNLIKELY(queue->n >= max_circuit_cell_queue_size)) { + log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL, + "%s circuit has %d cells in its queue, maximum allowed is %d. " + "Closing circuit for safety reasons.", + (exitward) ? "Outbound" : "Inbound", queue->n, + max_circuit_cell_queue_size); + circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT); + stats_n_circ_max_cell_reached++; + return; + } + + /* Very important that we copy to the circuit queue because all calls to + * this function use the stack for the cell memory. */ + cell_queue_append_packed_copy(circ, queue, exitward, cell, + chan->wide_circ_ids, 1); + + /* Check and run the OOM if needed. */ + if (PREDICT_UNLIKELY(cell_queues_check_size())) { + /* We ran the OOM handler which might have closed this circuit. */ + if (circ->marked_for_close) + return; + } + + /* If we have too many cells on the circuit, we should stop reading from + * the edge streams for a while. */ + if (!streams_blocked && queue->n >= CELL_QUEUE_HIGHWATER_SIZE) + set_streams_blocked_on_circ(circ, chan, 1, 0); /* block streams */ + + if (streams_blocked && fromstream) { + /* This edge connection is apparently not blocked; block it. */ + set_streams_blocked_on_circ(circ, chan, 1, fromstream); + } + + update_circuit_on_cmux(circ, direction); + if (queue->n == 1) { + /* This was the first cell added to the queue. We just made this + * circuit active. */ + log_debug(LD_GENERAL, "Made a circuit active."); + } + + /* New way: mark this as having waiting cells for the scheduler */ + scheduler_channel_has_waiting_cells(chan); +} + +/** Append an encoded value of <b>addr</b> to <b>payload_out</b>, which must + * have at least 18 bytes of free space. The encoding is, as specified in + * tor-spec.txt: + * RESOLVED_TYPE_IPV4 or RESOLVED_TYPE_IPV6 [1 byte] + * LENGTH [1 byte] + * ADDRESS [length bytes] + * Return the number of bytes added, or -1 on error */ +int +append_address_to_payload(uint8_t *payload_out, const tor_addr_t *addr) +{ + uint32_t a; + switch (tor_addr_family(addr)) { + case AF_INET: + payload_out[0] = RESOLVED_TYPE_IPV4; + payload_out[1] = 4; + a = tor_addr_to_ipv4n(addr); + memcpy(payload_out+2, &a, 4); + return 6; + case AF_INET6: + payload_out[0] = RESOLVED_TYPE_IPV6; + payload_out[1] = 16; + memcpy(payload_out+2, tor_addr_to_in6_addr8(addr), 16); + return 18; + case AF_UNSPEC: + default: + return -1; + } +} + +/** Given <b>payload_len</b> bytes at <b>payload</b>, starting with an address + * encoded as by append_address_to_payload(), try to decode the address into + * *<b>addr_out</b>. Return the next byte in the payload after the address on + * success, or NULL on failure. */ +const uint8_t * +decode_address_from_payload(tor_addr_t *addr_out, const uint8_t *payload, + int payload_len) +{ + if (payload_len < 2) + return NULL; + if (payload_len < 2+payload[1]) + return NULL; + + switch (payload[0]) { + case RESOLVED_TYPE_IPV4: + if (payload[1] != 4) + return NULL; + tor_addr_from_ipv4n(addr_out, get_uint32(payload+2)); + break; + case RESOLVED_TYPE_IPV6: + if (payload[1] != 16) + return NULL; + tor_addr_from_ipv6_bytes(addr_out, (char*)(payload+2)); + break; + default: + tor_addr_make_unspec(addr_out); + break; + } + return payload + 2 + payload[1]; +} + +/** Remove all the cells queued on <b>circ</b> for <b>chan</b>. */ +void +circuit_clear_cell_queue(circuit_t *circ, channel_t *chan) +{ + cell_queue_t *queue; + cell_direction_t direction; + + if (circ->n_chan == chan) { + queue = &circ->n_chan_cells; + direction = CELL_DIRECTION_OUT; + } else { + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + tor_assert(orcirc->p_chan == chan); + queue = &orcirc->p_chan_cells; + direction = CELL_DIRECTION_IN; + } + + /* Clear the queue */ + cell_queue_clear(queue); + + /* Update the cell counter in the cmux */ + if (chan->cmux && circuitmux_is_circuit_attached(chan->cmux, circ)) + update_circuit_on_cmux(circ, direction); +} + +/** Return 1 if we shouldn't restart reading on this circuit, even if + * we get a SENDME. Else return 0. +*/ +static int +circuit_queue_streams_are_blocked(circuit_t *circ) +{ + if (CIRCUIT_IS_ORIGIN(circ)) { + return circ->streams_blocked_on_n_chan; + } else { + return circ->streams_blocked_on_p_chan; + } +} diff --cc src/core/or/scheduler_kist.c index 449478df7,000000000..41c346ac7 mode 100644,000000..100644 --- a/src/core/or/scheduler_kist.c +++ b/src/core/or/scheduler_kist.c @@@ -1,842 -1,0 +1,840 @@@ +/* Copyright (c) 2017-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#define SCHEDULER_KIST_PRIVATE + +#include "core/or/or.h" +#include "lib/container/buffers.h" +#include "app/config/config.h" +#include "core/mainloop/connection.h" +#include "feature/nodelist/networkstatus.h" +#define TOR_CHANNEL_INTERNAL_ +#include "core/or/channel.h" +#include "core/or/channeltls.h" +#define SCHEDULER_PRIVATE_ +#include "core/or/scheduler.h" +#include "lib/math/fp.h" + +#include "core/or/or_connection_st.h" + - #define TLS_PER_CELL_OVERHEAD 29 - +#ifdef HAVE_SYS_IOCTL_H +#include <sys/ioctl.h> +#endif + +#ifdef HAVE_KIST_SUPPORT +/* Kernel interface needed for KIST. */ +#include <netinet/tcp.h> +#include <linux/sockios.h> +#endif /* HAVE_KIST_SUPPORT */ + +/***************************************************************************** + * Data structures and supporting functions + *****************************************************************************/ + +/* Socket_table hash table stuff. The socket_table keeps track of per-socket + * limit information imposed by kist and used by kist. */ + +static uint32_t +socket_table_ent_hash(const socket_table_ent_t *ent) +{ + return (uint32_t)ent->chan->global_identifier; +} + +static unsigned +socket_table_ent_eq(const socket_table_ent_t *a, const socket_table_ent_t *b) +{ + return a->chan == b->chan; +} + +typedef HT_HEAD(socket_table_s, socket_table_ent_s) socket_table_t; + +static socket_table_t socket_table = HT_INITIALIZER(); + +HT_PROTOTYPE(socket_table_s, socket_table_ent_s, node, socket_table_ent_hash, + socket_table_ent_eq) +HT_GENERATE2(socket_table_s, socket_table_ent_s, node, socket_table_ent_hash, + socket_table_ent_eq, 0.6, tor_reallocarray, tor_free_) + +/* outbuf_table hash table stuff. The outbuf_table keeps track of which + * channels have data sitting in their outbuf so the kist scheduler can force + * a write from outbuf to kernel periodically during a run and at the end of a + * run. */ + +typedef struct outbuf_table_ent_s { + HT_ENTRY(outbuf_table_ent_s) node; + channel_t *chan; +} outbuf_table_ent_t; + +static uint32_t +outbuf_table_ent_hash(const outbuf_table_ent_t *ent) +{ + return (uint32_t)ent->chan->global_identifier; +} + +static unsigned +outbuf_table_ent_eq(const outbuf_table_ent_t *a, const outbuf_table_ent_t *b) +{ + return a->chan->global_identifier == b->chan->global_identifier; +} + +HT_PROTOTYPE(outbuf_table_s, outbuf_table_ent_s, node, outbuf_table_ent_hash, + outbuf_table_ent_eq) +HT_GENERATE2(outbuf_table_s, outbuf_table_ent_s, node, outbuf_table_ent_hash, + outbuf_table_ent_eq, 0.6, tor_reallocarray, tor_free_) + +/***************************************************************************** + * Other internal data + *****************************************************************************/ + +/* Store the last time the scheduler was run so we can decide when to next run + * the scheduler based on it. */ +static monotime_t scheduler_last_run; +/* This is a factor for the extra_space calculation in kist per-socket limits. + * It is the number of extra congestion windows we want to write to the kernel. + */ +static double sock_buf_size_factor = 1.0; +/* How often the scheduler runs. */ +STATIC int sched_run_interval = KIST_SCHED_RUN_INTERVAL_DEFAULT; + +#ifdef HAVE_KIST_SUPPORT +/* Indicate if KIST lite mode is on or off. We can disable it at runtime. + * Important to have because of the KISTLite -> KIST possible transition. */ +static unsigned int kist_lite_mode = 0; +/* Indicate if we don't have the kernel support. This can happen if the kernel + * changed and it doesn't recognized the values passed to the syscalls needed + * by KIST. In that case, fallback to the naive approach. */ +static unsigned int kist_no_kernel_support = 0; +#else /* !(defined(HAVE_KIST_SUPPORT)) */ +static unsigned int kist_lite_mode = 1; +#endif /* defined(HAVE_KIST_SUPPORT) */ + +/***************************************************************************** + * Internally called function implementations + *****************************************************************************/ + +/* Little helper function to get the length of a channel's output buffer */ +static inline size_t +channel_outbuf_length(channel_t *chan) +{ + /* In theory, this can not happen because we can not scheduler a channel + * without a connection that has its outbuf initialized. Just in case, bug + * on this so we can understand a bit more why it happened. */ + if (SCHED_BUG(BASE_CHAN_TO_TLS(chan)->conn == NULL, chan)) { + return 0; + } + return buf_datalen(TO_CONN(BASE_CHAN_TO_TLS(chan)->conn)->outbuf); +} + +/* Little helper function for HT_FOREACH_FN. */ +static int +each_channel_write_to_kernel(outbuf_table_ent_t *ent, void *data) +{ + (void) data; /* Make compiler happy. */ + channel_write_to_kernel(ent->chan); + return 0; /* Returning non-zero removes the element from the table. */ +} + +/* Free the given outbuf table entry ent. */ +static int +free_outbuf_info_by_ent(outbuf_table_ent_t *ent, void *data) +{ + (void) data; /* Make compiler happy. */ + log_debug(LD_SCHED, "Freeing outbuf table entry from chan=%" PRIu64, + ent->chan->global_identifier); + tor_free(ent); + return 1; /* So HT_FOREACH_FN will remove the element */ +} + +/* Free the given socket table entry ent. */ +static int +free_socket_info_by_ent(socket_table_ent_t *ent, void *data) +{ + (void) data; /* Make compiler happy. */ + log_debug(LD_SCHED, "Freeing socket table entry from chan=%" PRIu64, + ent->chan->global_identifier); + tor_free(ent); + return 1; /* So HT_FOREACH_FN will remove the element */ +} + +/* Clean up socket_table. Probably because the KIST sched impl is going away */ +static void +free_all_socket_info(void) +{ + HT_FOREACH_FN(socket_table_s, &socket_table, free_socket_info_by_ent, NULL); + HT_CLEAR(socket_table_s, &socket_table); +} + +static socket_table_ent_t * +socket_table_search(socket_table_t *table, const channel_t *chan) +{ + socket_table_ent_t search, *ent = NULL; + search.chan = chan; + ent = HT_FIND(socket_table_s, table, &search); + return ent; +} + +/* Free a socket entry in table for the given chan. */ +static void +free_socket_info_by_chan(socket_table_t *table, const channel_t *chan) +{ + socket_table_ent_t *ent = NULL; + ent = socket_table_search(table, chan); + if (!ent) + return; + log_debug(LD_SCHED, "scheduler free socket info for chan=%" PRIu64, + chan->global_identifier); + HT_REMOVE(socket_table_s, table, ent); + free_socket_info_by_ent(ent, NULL); +} + +/* Perform system calls for the given socket in order to calculate kist's + * per-socket limit as documented in the function body. */ +MOCK_IMPL(void, +update_socket_info_impl, (socket_table_ent_t *ent)) +{ +#ifdef HAVE_KIST_SUPPORT + int64_t tcp_space, extra_space; + const tor_socket_t sock = + TO_CONN(BASE_CHAN_TO_TLS((channel_t *) ent->chan)->conn)->s; + struct tcp_info tcp; + socklen_t tcp_info_len = sizeof(tcp); + + if (kist_no_kernel_support || kist_lite_mode) { + goto fallback; + } + + /* Gather information */ + if (getsockopt(sock, SOL_TCP, TCP_INFO, (void *)&(tcp), &tcp_info_len) < 0) { + if (errno == EINVAL) { + /* Oops, this option is not provided by the kernel, we'll have to + * disable KIST entirely. This can happen if tor was built on a machine + * with the support previously or if the kernel was updated and lost the + * support. */ + log_notice(LD_SCHED, "Looks like our kernel doesn't have the support " + "for KIST anymore. We will fallback to the naive " + "approach. Remove KIST from the Schedulers list " + "to disable."); + kist_no_kernel_support = 1; + } + goto fallback; + } + if (ioctl(sock, SIOCOUTQNSD, &(ent->notsent)) < 0) { + if (errno == EINVAL) { + log_notice(LD_SCHED, "Looks like our kernel doesn't have the support " + "for KIST anymore. We will fallback to the naive " + "approach. Remove KIST from the Schedulers list " + "to disable."); + /* Same reason as the above. */ + kist_no_kernel_support = 1; + } + goto fallback; + } + ent->cwnd = tcp.tcpi_snd_cwnd; + ent->unacked = tcp.tcpi_unacked; + ent->mss = tcp.tcpi_snd_mss; + + /* In order to reduce outbound kernel queuing delays and thus improve Tor's + * ability to prioritize circuits, KIST wants to set a socket write limit + * that is near the amount that the socket would be able to immediately send + * into the Internet. + * + * We first calculate how much the socket could send immediately (assuming + * completely full packets) according to the congestion window and the number + * of unacked packets. + * + * Then we add a little extra space in a controlled way. We do this so any + * when the kernel gets ACKs back for data currently sitting in the "TCP + * space", it will already have some more data to send immediately. It will + * not have to wait for the scheduler to run again. The amount of extra space + * is a factor of the current congestion window. With the suggested + * sock_buf_size_factor value of 1.0, we allow at most 2*cwnd bytes to sit in + * the kernel: 1 cwnd on the wire waiting for ACKs and 1 cwnd ready and + * waiting to be sent when those ACKs finally come. + * + * In the below diagram, we see some bytes in the TCP-space (denoted by '*') + * that have be sent onto the wire and are waiting for ACKs. We have a little + * more room in "TCP space" that we can fill with data that will be + * immediately sent. We also see the "extra space" KIST calculates. The sum + * of the empty "TCP space" and the "extra space" is the kist-imposed write + * limit for this socket. + * + * <----------------kernel-outbound-socket-queue----------------| + * <*********---------------------------------------------------| + * |----TCP-space-----|----extra-space-----| + * |------------------| + * ^ ((cwnd - unacked) * mss) bytes + * |--------------------| + * ^ ((cwnd * mss) * factor) bytes + */ + + /* These values from the kernel are uint32_t, they will always fit into a + * int64_t tcp_space variable but if the congestion window cwnd is smaller + * than the unacked packets, the remaining TCP space is set to 0. */ + if (ent->cwnd >= ent->unacked) { + tcp_space = (ent->cwnd - ent->unacked) * (int64_t)(ent->mss); + } else { + tcp_space = 0; + } + + /* The clamp_double_to_int64 makes sure the first part fits into an int64_t. + * In fact, if sock_buf_size_factor is still forced to be >= 0 in config.c, + * then it will be positive for sure. Then we subtract a uint32_t. Getting a + * negative value is OK, see after how it is being handled. */ + extra_space = + clamp_double_to_int64( + (ent->cwnd * (int64_t)ent->mss) * sock_buf_size_factor) - + ent->notsent; + if ((tcp_space + extra_space) < 0) { + /* This means that the "notsent" queue is just too big so we shouldn't put + * more in the kernel for now. */ + ent->limit = 0; + } else { + /* The positive sum of two int64_t will always fit into an uint64_t. + * And we know this will always be positive, since we checked above. */ + ent->limit = (uint64_t)tcp_space + (uint64_t)extra_space; + } + return; + +#else /* !(defined(HAVE_KIST_SUPPORT)) */ + goto fallback; +#endif /* defined(HAVE_KIST_SUPPORT) */ + + fallback: + /* If all of a sudden we don't have kist support, we just zero out all the + * variables for this socket since we don't know what they should be. We + * also allow the socket to write as much as it can from the estimated + * number of cells the lower layer can accept, effectively returning it to + * Vanilla scheduler behavior. */ + ent->cwnd = ent->unacked = ent->mss = ent->notsent = 0; + /* This function calls the specialized channel object (currently channeltls) + * and ask how many cells it can write on the outbuf which we then multiply + * by the size of the cells for this channel. The cast is because this + * function requires a non-const channel object, meh. */ + ent->limit = channel_num_cells_writeable((channel_t *) ent->chan) * + (get_cell_network_size(ent->chan->wide_circ_ids) + + TLS_PER_CELL_OVERHEAD); +} + +/* Given a socket that isn't in the table, add it. + * Given a socket that is in the table, re-init values that need init-ing + * every scheduling run + */ +static void +init_socket_info(socket_table_t *table, const channel_t *chan) +{ + socket_table_ent_t *ent = NULL; + ent = socket_table_search(table, chan); + if (!ent) { + log_debug(LD_SCHED, "scheduler init socket info for chan=%" PRIu64, + chan->global_identifier); + ent = tor_malloc_zero(sizeof(*ent)); + ent->chan = chan; + HT_INSERT(socket_table_s, table, ent); + } + ent->written = 0; +} + +/* Add chan to the outbuf table if it isn't already in it. If it is, then don't + * do anything */ +static void +outbuf_table_add(outbuf_table_t *table, channel_t *chan) +{ + outbuf_table_ent_t search, *ent; + search.chan = chan; + ent = HT_FIND(outbuf_table_s, table, &search); + if (!ent) { + log_debug(LD_SCHED, "scheduler init outbuf info for chan=%" PRIu64, + chan->global_identifier); + ent = tor_malloc_zero(sizeof(*ent)); + ent->chan = chan; + HT_INSERT(outbuf_table_s, table, ent); + } +} + +static void +outbuf_table_remove(outbuf_table_t *table, channel_t *chan) +{ + outbuf_table_ent_t search, *ent; + search.chan = chan; + ent = HT_FIND(outbuf_table_s, table, &search); + if (ent) { + HT_REMOVE(outbuf_table_s, table, ent); + free_outbuf_info_by_ent(ent, NULL); + } +} + +/* Set the scheduler running interval. */ +static void +set_scheduler_run_interval(void) +{ + int old_sched_run_interval = sched_run_interval; + sched_run_interval = kist_scheduler_run_interval(); + if (old_sched_run_interval != sched_run_interval) { + log_info(LD_SCHED, "Scheduler KIST changing its running interval " + "from %" PRId32 " to %" PRId32, + old_sched_run_interval, sched_run_interval); + } +} + +/* Return true iff the channel hasn't hit its kist-imposed write limit yet */ +static int +socket_can_write(socket_table_t *table, const channel_t *chan) +{ + socket_table_ent_t *ent = NULL; + ent = socket_table_search(table, chan); + if (SCHED_BUG(!ent, chan)) { + return 1; // Just return true, saying that kist wouldn't limit the socket + } + + /* We previously calculated a write limit for this socket. In the below + * calculation, first determine how much room is left in bytes. Then divide + * that by the amount of space a cell takes. If there's room for at least 1 + * cell, then KIST will allow the socket to write. */ + int64_t kist_limit_space = + (int64_t) (ent->limit - ent->written) / + (CELL_MAX_NETWORK_SIZE + TLS_PER_CELL_OVERHEAD); + return kist_limit_space > 0; +} + +/* Update the channel's socket kernel information. */ +static void +update_socket_info(socket_table_t *table, const channel_t *chan) +{ + socket_table_ent_t *ent = NULL; + ent = socket_table_search(table, chan); + if (SCHED_BUG(!ent, chan)) { + return; // Whelp. Entry didn't exist for some reason so nothing to do. + } + update_socket_info_impl(ent); + log_debug(LD_SCHED, "chan=%" PRIu64 " updated socket info, limit: %" PRIu64 + ", cwnd: %" PRIu32 ", unacked: %" PRIu32 + ", notsent: %" PRIu32 ", mss: %" PRIu32, + ent->chan->global_identifier, ent->limit, ent->cwnd, ent->unacked, + ent->notsent, ent->mss); +} + +/* Increment the channel's socket written value by the number of bytes. */ +static void +update_socket_written(socket_table_t *table, channel_t *chan, size_t bytes) +{ + socket_table_ent_t *ent = NULL; + ent = socket_table_search(table, chan); + if (SCHED_BUG(!ent, chan)) { + return; // Whelp. Entry didn't exist so nothing to do. + } + + log_debug(LD_SCHED, "chan=%" PRIu64 " wrote %lu bytes, old was %" PRIi64, + chan->global_identifier, (unsigned long) bytes, ent->written); + + ent->written += bytes; +} + +/* + * A naive KIST impl would write every single cell all the way to the kernel. + * That would take a lot of system calls. A less bad KIST impl would write a + * channel's outbuf to the kernel only when we are switching to a different + * channel. But if we have two channels with equal priority, we end up writing + * one cell for each and bouncing back and forth. This KIST impl avoids that + * by only writing a channel's outbuf to the kernel if it has 8 cells or more + * in it. + */ +MOCK_IMPL(int, channel_should_write_to_kernel, + (outbuf_table_t *table, channel_t *chan)) +{ + outbuf_table_add(table, chan); + /* CELL_MAX_NETWORK_SIZE * 8 because we only want to write the outbuf to the + * kernel if there's 8 or more cells waiting */ + return channel_outbuf_length(chan) > (CELL_MAX_NETWORK_SIZE * 8); +} + +/* Little helper function to write a channel's outbuf all the way to the + * kernel */ +MOCK_IMPL(void, channel_write_to_kernel, (channel_t *chan)) +{ + log_debug(LD_SCHED, "Writing %lu bytes to kernel for chan %" PRIu64, + (unsigned long)channel_outbuf_length(chan), + chan->global_identifier); + connection_handle_write(TO_CONN(BASE_CHAN_TO_TLS(chan)->conn), 0); +} + +/* Return true iff the scheduler has work to perform. */ +static int +have_work(void) +{ + smartlist_t *cp = get_channels_pending(); + IF_BUG_ONCE(!cp) { + return 0; // channels_pending doesn't exist so... no work? + } + return smartlist_len(cp) > 0; +} + +/* Function of the scheduler interface: free_all() */ +static void +kist_free_all(void) +{ + free_all_socket_info(); +} + +/* Function of the scheduler interface: on_channel_free() */ +static void +kist_on_channel_free_fn(const channel_t *chan) +{ + free_socket_info_by_chan(&socket_table, chan); +} + +/* Function of the scheduler interface: on_new_consensus() */ +static void +kist_scheduler_on_new_consensus(void) +{ + set_scheduler_run_interval(); +} + +/* Function of the scheduler interface: on_new_options() */ +static void +kist_scheduler_on_new_options(void) +{ + sock_buf_size_factor = get_options()->KISTSockBufSizeFactor; + + /* Calls kist_scheduler_run_interval which calls get_options(). */ + set_scheduler_run_interval(); +} + +/* Function of the scheduler interface: init() */ +static void +kist_scheduler_init(void) +{ + /* When initializing the scheduler, the last run could be 0 because it is + * declared static or a value in the past that was set when it was last + * used. In both cases, we want to initialize it to now so we don't risk + * using the value 0 which doesn't play well with our monotonic time + * interface. + * + * One side effect is that the first scheduler run will be at the next tick + * that is in now + 10 msec (KIST_SCHED_RUN_INTERVAL_DEFAULT) by default. */ + monotime_get(&scheduler_last_run); + + kist_scheduler_on_new_options(); + IF_BUG_ONCE(sched_run_interval == 0) { + log_warn(LD_SCHED, "We are initing the KIST scheduler and noticed the " + "KISTSchedRunInterval is telling us to not use KIST. That's " + "weird! We'll continue using KIST, but at %" PRId32 "ms.", + KIST_SCHED_RUN_INTERVAL_DEFAULT); + sched_run_interval = KIST_SCHED_RUN_INTERVAL_DEFAULT; + } +} + +/* Function of the scheduler interface: schedule() */ +static void +kist_scheduler_schedule(void) +{ + struct monotime_t now; + struct timeval next_run; + int64_t diff; + + if (!have_work()) { + return; + } + monotime_get(&now); + + /* If time is really monotonic, we can never have now being smaller than the + * last scheduler run. The scheduler_last_run at first is set to 0. + * Unfortunately, not all platforms guarantee monotonic time so we log at + * info level but don't make it more noisy. */ + diff = monotime_diff_msec(&scheduler_last_run, &now); + if (diff < 0) { + log_info(LD_SCHED, "Monotonic time between now and last run of scheduler " + "is negative: %" PRId64 ". Setting diff to 0.", diff); + diff = 0; + } + if (diff < sched_run_interval) { + next_run.tv_sec = 0; + /* Takes 1000 ms -> us. This will always be valid because diff can NOT be + * negative and can NOT be bigger than sched_run_interval so values can + * only go from 1000 usec (diff set to interval - 1) to 100000 usec (diff + * set to 0) for the maximum allowed run interval (100ms). */ + next_run.tv_usec = (int) ((sched_run_interval - diff) * 1000); + /* Re-adding an event reschedules it. It does not duplicate it. */ + scheduler_ev_add(&next_run); + } else { + scheduler_ev_active(); + } +} + +/* Function of the scheduler interface: run() */ +static void +kist_scheduler_run(void) +{ + /* Define variables */ + channel_t *chan = NULL; // current working channel + /* The last distinct chan served in a sched loop. */ + channel_t *prev_chan = NULL; + int flush_result; // temporarily store results from flush calls + /* Channels to be re-adding to pending at the end */ + smartlist_t *to_readd = NULL; + smartlist_t *cp = get_channels_pending(); + + outbuf_table_t outbuf_table = HT_INITIALIZER(); + + /* For each pending channel, collect new kernel information */ + SMARTLIST_FOREACH_BEGIN(cp, const channel_t *, pchan) { + init_socket_info(&socket_table, pchan); + update_socket_info(&socket_table, pchan); + } SMARTLIST_FOREACH_END(pchan); + + log_debug(LD_SCHED, "Running the scheduler. %d channels pending", + smartlist_len(cp)); + + /* The main scheduling loop. Loop until there are no more pending channels */ + while (smartlist_len(cp) > 0) { + /* get best channel */ + chan = smartlist_pqueue_pop(cp, scheduler_compare_channels, + offsetof(channel_t, sched_heap_idx)); + if (SCHED_BUG(!chan, NULL)) { + /* Some-freaking-how a NULL got into the channels_pending. That should + * never happen, but it should be harmless to ignore it and keep looping. + */ + continue; + } + outbuf_table_add(&outbuf_table, chan); + + /* if we have switched to a new channel, consider writing the previous + * channel's outbuf to the kernel. */ + if (!prev_chan) { + prev_chan = chan; + } + if (prev_chan != chan) { + if (channel_should_write_to_kernel(&outbuf_table, prev_chan)) { + channel_write_to_kernel(prev_chan); + outbuf_table_remove(&outbuf_table, prev_chan); + } + prev_chan = chan; + } + + /* Only flush and write if the per-socket limit hasn't been hit */ + if (socket_can_write(&socket_table, chan)) { + /* flush to channel queue/outbuf */ + flush_result = (int)channel_flush_some_cells(chan, 1); // 1 for num cells + /* XXX: While flushing cells, it is possible that the connection write + * fails leading to the channel to be closed which triggers a release + * and free its entry in the socket table. And because of a engineering + * design issue, the error is not propagated back so we don't get an + * error at this point. So before we continue, make sure the channel is + * open and if not just ignore it. See #23751. */ + if (!CHANNEL_IS_OPEN(chan)) { + /* Channel isn't open so we put it back in IDLE mode. It is either + * renegotiating its TLS session or about to be released. */ + scheduler_set_channel_state(chan, SCHED_CHAN_IDLE); + continue; + } + /* flush_result has the # cells flushed */ + if (flush_result > 0) { + update_socket_written(&socket_table, chan, flush_result * + (CELL_MAX_NETWORK_SIZE + TLS_PER_CELL_OVERHEAD)); + } else { + /* XXX: This can happen because tor sometimes does flush in an + * opportunistic way cells from the circuit to the outbuf so the + * channel can end up here without having anything to flush nor needed + * to write to the kernel. Hopefully we'll fix that soon but for now + * we have to handle this case which happens kind of often. */ + log_debug(LD_SCHED, + "We didn't flush anything on a chan that we think " + "can write and wants to write. The channel's state is '%s' " + "and in scheduler state '%s'. We're going to mark it as " + "waiting_for_cells (as that's most likely the issue) and " + "stop scheduling it this round.", + channel_state_to_string(chan->state), + get_scheduler_state_string(chan->scheduler_state)); + scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS); + continue; + } + } + + /* Decide what to do with the channel now */ + + if (!channel_more_to_flush(chan) && + !socket_can_write(&socket_table, chan)) { + + /* Case 1: no more cells to send, and cannot write */ + + /* + * You might think we should put the channel in SCHED_CHAN_IDLE. And + * you're probably correct. While implementing KIST, we found that the + * scheduling system would sometimes lose track of channels when we did + * that. We suspect it has to do with the difference between "can't + * write because socket/outbuf is full" and KIST's "can't write because + * we've arbitrarily decided that that's enough for now." Sometimes + * channels run out of cells at the same time they hit their + * kist-imposed write limit and maybe the rest of Tor doesn't put the + * channel back in pending when it is supposed to. + * + * This should be investigated again. It is as simple as changing + * SCHED_CHAN_WAITING_FOR_CELLS to SCHED_CHAN_IDLE and seeing if Tor + * starts having serious throughput issues. Best done in shadow/chutney. + */ + scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS); + } else if (!channel_more_to_flush(chan)) { + + /* Case 2: no more cells to send, but still open for writes */ + + scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS); + } else if (!socket_can_write(&socket_table, chan)) { + + /* Case 3: cells to send, but cannot write */ + + /* + * We want to write, but can't. If we left the channel in + * channels_pending, we would never exit the scheduling loop. We need to + * add it to a temporary list of channels to be added to channels_pending + * after the scheduling loop is over. They can hopefully be taken care of + * in the next scheduling round. + */ + if (!to_readd) { + to_readd = smartlist_new(); + } + smartlist_add(to_readd, chan); + } else { + + /* Case 4: cells to send, and still open for writes */ + + scheduler_set_channel_state(chan, SCHED_CHAN_PENDING); + if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) { + smartlist_pqueue_add(cp, scheduler_compare_channels, + offsetof(channel_t, sched_heap_idx), chan); + } + } + } /* End of main scheduling loop */ + + /* Write the outbuf of any channels that still have data */ + HT_FOREACH_FN(outbuf_table_s, &outbuf_table, each_channel_write_to_kernel, + NULL); + /* We are done with it. */ + HT_FOREACH_FN(outbuf_table_s, &outbuf_table, free_outbuf_info_by_ent, NULL); + HT_CLEAR(outbuf_table_s, &outbuf_table); + + log_debug(LD_SCHED, "len pending=%d, len to_readd=%d", + smartlist_len(cp), + (to_readd ? smartlist_len(to_readd) : -1)); + + /* Re-add any channels we need to */ + if (to_readd) { + SMARTLIST_FOREACH_BEGIN(to_readd, channel_t *, readd_chan) { + scheduler_set_channel_state(readd_chan, SCHED_CHAN_PENDING); + if (!smartlist_contains(cp, readd_chan)) { + if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) { + /* XXXX Note that the check above is in theory redundant with + * the smartlist_contains check. But let's make sure we're + * not messing anything up, and leave them both for now. */ + smartlist_pqueue_add(cp, scheduler_compare_channels, + offsetof(channel_t, sched_heap_idx), readd_chan); + } + } + } SMARTLIST_FOREACH_END(readd_chan); + smartlist_free(to_readd); + } + + monotime_get(&scheduler_last_run); +} + +/***************************************************************************** + * Externally called function implementations not called through scheduler_t + *****************************************************************************/ + +/* Stores the kist scheduler function pointers. */ +static scheduler_t kist_scheduler = { + .type = SCHEDULER_KIST, + .free_all = kist_free_all, + .on_channel_free = kist_on_channel_free_fn, + .init = kist_scheduler_init, + .on_new_consensus = kist_scheduler_on_new_consensus, + .schedule = kist_scheduler_schedule, + .run = kist_scheduler_run, + .on_new_options = kist_scheduler_on_new_options, +}; + +/* Return the KIST scheduler object. If it didn't exists, return a newly + * allocated one but init() is not called. */ +scheduler_t * +get_kist_scheduler(void) +{ + return &kist_scheduler; +} + +/* Check the torrc (and maybe consensus) for the configured KIST scheduler run + * interval. + * - If torrc > 0, then return the positive torrc value (should use KIST, and + * should use the set value) + * - If torrc == 0, then look in the consensus for what the value should be. + * - If == 0, then return 0 (don't use KIST) + * - If > 0, then return the positive consensus value + * - If consensus doesn't say anything, return 10 milliseconds, default. + */ +int +kist_scheduler_run_interval(void) +{ + int run_interval = get_options()->KISTSchedRunInterval; + + if (run_interval != 0) { + log_debug(LD_SCHED, "Found KISTSchedRunInterval=%" PRId32 " in torrc. " + "Using that.", run_interval); + return run_interval; + } + + log_debug(LD_SCHED, "KISTSchedRunInterval=0, turning to the consensus."); + + /* Will either be the consensus value or the default. Note that 0 can be + * returned which means the consensus wants us to NOT use KIST. */ + return networkstatus_get_param(NULL, "KISTSchedRunInterval", + KIST_SCHED_RUN_INTERVAL_DEFAULT, + KIST_SCHED_RUN_INTERVAL_MIN, + KIST_SCHED_RUN_INTERVAL_MAX); +} + +/* Set KISTLite mode that is KIST without kernel support. */ +void +scheduler_kist_set_lite_mode(void) +{ + kist_lite_mode = 1; + kist_scheduler.type = SCHEDULER_KIST_LITE; + log_info(LD_SCHED, + "Setting KIST scheduler without kernel support (KISTLite mode)"); +} + +/* Set KIST mode that is KIST with kernel support. */ +void +scheduler_kist_set_full_mode(void) +{ + kist_lite_mode = 0; + kist_scheduler.type = SCHEDULER_KIST; + log_info(LD_SCHED, + "Setting KIST scheduler with kernel support (KIST mode)"); +} + +#ifdef HAVE_KIST_SUPPORT + +/* Return true iff the scheduler subsystem should use KIST. */ +int +scheduler_can_use_kist(void) +{ + if (kist_no_kernel_support) { + /* We have no kernel support so we can't use KIST. */ + return 0; + } + + /* We do have the support, time to check if we can get the interval that the + * consensus can be disabling. */ + int run_interval = kist_scheduler_run_interval(); + log_debug(LD_SCHED, "Determined KIST sched_run_interval should be " + "%" PRId32 ". Can%s use KIST.", + run_interval, (run_interval > 0 ? "" : " not")); + return run_interval > 0; +} + +#else /* !(defined(HAVE_KIST_SUPPORT)) */ + +int +scheduler_can_use_kist(void) +{ + return 0; +} + +#endif /* defined(HAVE_KIST_SUPPORT) */ diff --cc src/feature/stats/rephist.c index 405efc26e,000000000..edb079ef2 mode 100644,000000..100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@@ -1,3222 -1,0 +1,3227 @@@ +/* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file rephist.c + * \brief Basic history and performance-tracking functionality. + * + * Basic history and performance-tracking functionality to remember + * which servers have worked in the past, how much bandwidth we've + * been using, which ports we tend to want, and so on; further, + * exit port statistics, cell statistics, and connection statistics. + * + * The history and information tracked in this module could sensibly be + * divided into several categories: + * + * <ul><li>Statistics used by authorities to remember the uptime and + * stability information about various relays, including "uptime", + * "weighted fractional uptime" and "mean time between failures". + * + * <li>Bandwidth usage history, used by relays to self-report how much + * bandwidth they've used for different purposes over last day or so, + * in order to generate the {dirreq-,}{read,write}-history lines in + * that they publish. + * + * <li>Predicted ports, used by clients to remember how long it's been + * since they opened an exit connection to each given target + * port. Clients use this information in order to try to keep circuits + * open to exit nodes that can connect to the ports that they care + * about. (The predicted ports mechanism also handles predicted circuit + * usage that _isn't_ port-specific, such as resolves, internal circuits, + * and so on.) + * + * <li>Public key operation counters, for tracking how many times we've + * done each public key operation. (This is unmaintained and we should + * remove it.) + * + * <li>Exit statistics by port, used by exits to keep track of the + * number of streams and bytes they've served at each exit port, so they + * can generate their exit-kibibytes-{read,written} and + * exit-streams-opened statistics. + * + * <li>Circuit stats, used by relays instances to tract circuit + * queue fullness and delay over time, and generate cell-processed-cells, + * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile + * statistics. + * + * <li>Descriptor serving statistics, used by directory caches to track + * how many descriptors they've served. + * + * <li>Connection statistics, used by relays to track one-way and + * bidirectional connections. + * + * <li>Onion handshake statistics, used by relays to count how many + * TAP and ntor handshakes they've handled. + * + * <li>Hidden service statistics, used by relays to count rendezvous + * traffic and HSDir-stored descriptors. + * + * <li>Link protocol statistics, used by relays to count how many times + * each link protocol has been used. + * + * </ul> + * + * The entry points for this module are scattered throughout the + * codebase. Sending data, receiving data, connecting to a relay, + * losing a connection to a relay, and so on can all trigger a change in + * our current stats. Relays also invoke this module in order to + * extract their statistics when building routerinfo and extrainfo + * objects in router.c. + * + * TODO: This module should be broken up. + * + * (The "rephist" name originally stood for "reputation and history". ) + **/ + +#include "core/or/or.h" +#include "core/or/circuitlist.h" +#include "core/or/circuituse.h" +#include "app/config/config.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "feature/nodelist/networkstatus.h" +#include "feature/nodelist/nodelist.h" +#include "feature/stats/rephist.h" +#include "feature/relay/router.h" +#include "feature/nodelist/routerlist.h" +#include "ht.h" +#include "core/or/channelpadding.h" +#include "core/or/connection_or.h" +#include "app/config/statefile.h" + +#include "feature/nodelist/networkstatus_st.h" +#include "core/or/or_circuit_st.h" +#include "app/config/or_state_st.h" + +#include "lib/container/bloomfilt.h" +#include "lib/container/order.h" +#include "lib/math/fp.h" +#include "lib/math/laplace.h" +#include "lib/time/tvdiff.h" + +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +static void bw_arrays_init(void); +static void predicted_ports_alloc(void); + ++typedef struct bw_array_t bw_array_t; ++STATIC uint64_t find_largest_max(bw_array_t *b); ++STATIC void commit_max(bw_array_t *b); ++STATIC void advance_obs(bw_array_t *b); ++ +/** Total number of bytes currently allocated in fields used by rephist.c. */ +uint64_t rephist_total_alloc=0; +/** Number of or_history_t objects currently allocated. */ +uint32_t rephist_total_num=0; + +/** If the total weighted run count of all runs for a router ever falls + * below this amount, the router can be treated as having 0 MTBF. */ +#define STABILITY_EPSILON 0.0001 +/** Value by which to discount all old intervals for MTBF purposes. This + * is compounded every STABILITY_INTERVAL. */ +#define STABILITY_ALPHA 0.95 +/** Interval at which to discount all old intervals for MTBF purposes. */ +#define STABILITY_INTERVAL (12*60*60) +/* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an + * interval that just ended counts twice as much as one that ended a week ago, + * 20X as much as one that ended a month ago, and routers that have had no + * uptime data for about half a year will get forgotten.) */ + +/** History of an OR. */ +typedef struct or_history_t { + /** When did we start tracking this OR? */ + time_t since; + /** When did we most recently note a change to this OR? */ + time_t changed; + + /** The address at which we most recently connected to this OR + * successfully. */ + tor_addr_t last_reached_addr; + + /** The port at which we most recently connected to this OR successfully */ + uint16_t last_reached_port; + + /* === For MTBF tracking: */ + /** Weighted sum total of all times that this router has been online. + */ + unsigned long weighted_run_length; + /** If the router is now online (according to stability-checking rules), + * when did it come online? */ + time_t start_of_run; + /** Sum of weights for runs in weighted_run_length. */ + double total_run_weights; + /* === For fractional uptime tracking: */ + time_t start_of_downtime; + unsigned long weighted_uptime; + unsigned long total_weighted_time; +} or_history_t; + +/** + * This structure holds accounting needed to calculate the padding overhead. + */ +typedef struct padding_counts_t { + /** Total number of cells we have received, including padding */ + uint64_t read_cell_count; + /** Total number of cells we have sent, including padding */ + uint64_t write_cell_count; + /** Total number of CELL_PADDING cells we have received */ + uint64_t read_pad_cell_count; + /** Total number of CELL_PADDING cells we have sent */ + uint64_t write_pad_cell_count; + /** Total number of read cells on padding-enabled conns */ + uint64_t enabled_read_cell_count; + /** Total number of sent cells on padding-enabled conns */ + uint64_t enabled_write_cell_count; + /** Total number of read CELL_PADDING cells on padding-enabled cons */ + uint64_t enabled_read_pad_cell_count; + /** Total number of sent CELL_PADDING cells on padding-enabled cons */ + uint64_t enabled_write_pad_cell_count; + /** Total number of RELAY_DROP cells we have received */ + uint64_t read_drop_cell_count; + /** Total number of RELAY_DROP cells we have sent */ + uint64_t write_drop_cell_count; + /** The maximum number of padding timers we've seen in 24 hours */ + uint64_t maximum_chanpad_timers; + /** When did we first copy padding_current into padding_published? */ + char first_published_at[ISO_TIME_LEN+1]; +} padding_counts_t; + +/** Holds the current values of our padding statistics. + * It is not published until it is transferred to padding_published. */ +static padding_counts_t padding_current; + +/** Remains fixed for a 24 hour period, and then is replaced + * by a redacted copy of padding_current */ +static padding_counts_t padding_published; + +/** When did we last multiply all routers' weighted_run_length and + * total_run_weights by STABILITY_ALPHA? */ +static time_t stability_last_downrated = 0; + +/** */ +static time_t started_tracking_stability = 0; + +/** Map from hex OR identity digest to or_history_t. */ +static digestmap_t *history_map = NULL; + +/** Return the or_history_t for the OR with identity digest <b>id</b>, + * creating it if necessary. */ +static or_history_t * +get_or_history(const char* id) +{ + or_history_t *hist; + + if (tor_digest_is_zero(id)) + return NULL; + + hist = digestmap_get(history_map, id); + if (!hist) { + hist = tor_malloc_zero(sizeof(or_history_t)); + rephist_total_alloc += sizeof(or_history_t); + rephist_total_num++; + hist->since = hist->changed = time(NULL); + tor_addr_make_unspec(&hist->last_reached_addr); + digestmap_set(history_map, id, hist); + } + return hist; +} + +/** Helper: free storage held by a single OR history entry. */ +static void +free_or_history(void *_hist) +{ + or_history_t *hist = _hist; + rephist_total_alloc -= sizeof(or_history_t); + rephist_total_num--; + tor_free(hist); +} + +/** Initialize the static data structures for tracking history. */ +void +rep_hist_init(void) +{ + history_map = digestmap_new(); + bw_arrays_init(); + predicted_ports_alloc(); +} + +/** We have just decided that this router with identity digest <b>id</b> is + * reachable, meaning we will give it a "Running" flag for the next while. */ +void +rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + const uint16_t at_port, time_t when) +{ + or_history_t *hist = get_or_history(id); + int was_in_run = 1; + char tbuf[ISO_TIME_LEN+1]; + int addr_changed, port_changed; + + tor_assert(hist); + tor_assert((!at_addr && !at_port) || (at_addr && at_port)); + + addr_changed = at_addr && !tor_addr_is_null(&hist->last_reached_addr) && + tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0; + port_changed = at_port && hist->last_reached_port && + at_port != hist->last_reached_port; + + if (!started_tracking_stability) + started_tracking_stability = time(NULL); + if (!hist->start_of_run) { + hist->start_of_run = when; + was_in_run = 0; + } + if (hist->start_of_downtime) { + long down_length; + + format_local_iso_time(tbuf, hist->start_of_downtime); + log_info(LD_HIST, "Router %s is now Running; it had been down since %s.", + hex_str(id, DIGEST_LEN), tbuf); + if (was_in_run) + log_info(LD_HIST, " (Paradoxically, it was already Running too.)"); + + down_length = when - hist->start_of_downtime; + hist->total_weighted_time += down_length; + hist->start_of_downtime = 0; + } else if (addr_changed || port_changed) { + /* If we're reachable, but the address changed, treat this as some + * downtime. */ + int penalty = get_options()->TestingTorNetwork ? 240 : 3600; + networkstatus_t *ns; + + if ((ns = networkstatus_get_latest_consensus())) { + int fresh_interval = (int)(ns->fresh_until - ns->valid_after); + int live_interval = (int)(ns->valid_until - ns->valid_after); + /* on average, a descriptor addr change takes .5 intervals to make it + * into a consensus, and half a liveness period to make it to + * clients. */ + penalty = (int)(fresh_interval + live_interval) / 2; + } + format_local_iso_time(tbuf, hist->start_of_run); + log_info(LD_HIST,"Router %s still seems Running, but its address appears " + "to have changed since the last time it was reachable. I'm " + "going to treat it as having been down for %d seconds", + hex_str(id, DIGEST_LEN), penalty); + rep_hist_note_router_unreachable(id, when-penalty); + rep_hist_note_router_reachable(id, NULL, 0, when); + } else { + format_local_iso_time(tbuf, hist->start_of_run); + if (was_in_run) + log_debug(LD_HIST, "Router %s is still Running; it has been Running " + "since %s", hex_str(id, DIGEST_LEN), tbuf); + else + log_info(LD_HIST,"Router %s is now Running; it was previously untracked", + hex_str(id, DIGEST_LEN)); + } + if (at_addr) + tor_addr_copy(&hist->last_reached_addr, at_addr); + if (at_port) + hist->last_reached_port = at_port; +} + +/** We have just decided that this router is unreachable, meaning + * we are taking away its "Running" flag. */ +void +rep_hist_note_router_unreachable(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + char tbuf[ISO_TIME_LEN+1]; + int was_running = 0; + if (!started_tracking_stability) + started_tracking_stability = time(NULL); + + tor_assert(hist); + if (hist->start_of_run) { + /*XXXX We could treat failed connections differently from failed + * connect attempts. */ + long run_length = when - hist->start_of_run; + format_local_iso_time(tbuf, hist->start_of_run); + + hist->total_run_weights += 1.0; + hist->start_of_run = 0; + if (run_length < 0) { + unsigned long penalty = -run_length; +#define SUBTRACT_CLAMPED(var, penalty) \ + do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0) + + SUBTRACT_CLAMPED(hist->weighted_run_length, penalty); + SUBTRACT_CLAMPED(hist->weighted_uptime, penalty); + } else { + hist->weighted_run_length += run_length; + hist->weighted_uptime += run_length; + hist->total_weighted_time += run_length; + } + was_running = 1; + log_info(LD_HIST, "Router %s is now non-Running: it had previously been " + "Running since %s. Its total weighted uptime is %lu/%lu.", + hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime, + hist->total_weighted_time); + } + if (!hist->start_of_downtime) { + hist->start_of_downtime = when; + + if (!was_running) + log_info(LD_HIST, "Router %s is now non-Running; it was previously " + "untracked.", hex_str(id, DIGEST_LEN)); + } else { + if (!was_running) { + format_local_iso_time(tbuf, hist->start_of_downtime); + + log_info(LD_HIST, "Router %s is still non-Running; it has been " + "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf); + } + } +} + +/** Mark a router with ID <b>id</b> as non-Running, and retroactively declare + * that it has never been running: give it no stability and no WFU. */ +void +rep_hist_make_router_pessimal(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + tor_assert(hist); + + rep_hist_note_router_unreachable(id, when); + + hist->weighted_run_length = 0; + hist->weighted_uptime = 0; +} + +/** Helper: Discount all old MTBF data, if it is time to do so. Return + * the time at which we should next discount MTBF data. */ +time_t +rep_hist_downrate_old_runs(time_t now) +{ + digestmap_iter_t *orhist_it; + const char *digest1; + or_history_t *hist; + void *hist_p; + double alpha = 1.0; + + if (!history_map) + history_map = digestmap_new(); + if (!stability_last_downrated) + stability_last_downrated = now; + if (stability_last_downrated + STABILITY_INTERVAL > now) + return stability_last_downrated + STABILITY_INTERVAL; + + /* Okay, we should downrate the data. By how much? */ + while (stability_last_downrated + STABILITY_INTERVAL < now) { + stability_last_downrated += STABILITY_INTERVAL; + alpha *= STABILITY_ALPHA; + } + + log_info(LD_HIST, "Discounting all old stability info by a factor of %f", + alpha); + + /* Multiply every w_r_l, t_r_w pair by alpha. */ + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + digestmap_iter_get(orhist_it, &digest1, &hist_p); + hist = hist_p; + + hist->weighted_run_length = + (unsigned long)(hist->weighted_run_length * alpha); + hist->total_run_weights *= alpha; + + hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha); + hist->total_weighted_time = (unsigned long) + (hist->total_weighted_time * alpha); + } + + return stability_last_downrated + STABILITY_INTERVAL; +} + +/** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */ +static double +get_stability(or_history_t *hist, time_t when) +{ + long total = hist->weighted_run_length; + double total_weights = hist->total_run_weights; + + if (hist->start_of_run) { + /* We're currently in a run. Let total and total_weights hold the values + * they would hold if the current run were to end now. */ + total += (when-hist->start_of_run); + total_weights += 1.0; + } + if (total_weights < STABILITY_EPSILON) { + /* Round down to zero, and avoid divide-by-zero. */ + return 0.0; + } + + return total / total_weights; +} + +/** Return the total amount of time we've been observing, with each run of + * time downrated by the appropriate factor. */ +static long +get_total_weighted_time(or_history_t *hist, time_t when) +{ + long total = hist->total_weighted_time; + if (hist->start_of_run) { + total += (when - hist->start_of_run); + } else if (hist->start_of_downtime) { + total += (when - hist->start_of_downtime); + } + return total; +} + +/** Helper: Return the weighted percent-of-time-online of the router with + * history <b>hist</b>. */ +static double +get_weighted_fractional_uptime(or_history_t *hist, time_t when) +{ + long total = hist->total_weighted_time; + long up = hist->weighted_uptime; + + if (hist->start_of_run) { + long run_length = (when - hist->start_of_run); + up += run_length; + total += run_length; + } else if (hist->start_of_downtime) { + total += (when - hist->start_of_downtime); + } + + if (!total) { + /* Avoid calling anybody's uptime infinity (which should be impossible if + * the code is working), or NaN (which can happen for any router we haven't + * observed up or down yet). */ + return 0.0; + } + + return ((double) up) / total; +} + +/** Return how long the router whose identity digest is <b>id</b> has + * been reachable. Return 0 if the router is unknown or currently deemed + * unreachable. */ +long +rep_hist_get_uptime(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0; + if (!hist->start_of_run || when < hist->start_of_run) + return 0; + return when - hist->start_of_run; +} + +/** Return an estimated MTBF for the router whose identity digest is + * <b>id</b>. Return 0 if the router is unknown. */ +double +rep_hist_get_stability(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0.0; + + return get_stability(hist, when); +} + +/** Return an estimated percent-of-time-online for the router whose identity + * digest is <b>id</b>. Return 0 if the router is unknown. */ +double +rep_hist_get_weighted_fractional_uptime(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0.0; + + return get_weighted_fractional_uptime(hist, when); +} + +/** Return a number representing how long we've known about the router whose + * digest is <b>id</b>. Return 0 if the router is unknown. + * + * Be careful: this measure increases monotonically as we know the router for + * longer and longer, but it doesn't increase linearly. + */ +long +rep_hist_get_weighted_time_known(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0; + + return get_total_weighted_time(hist, when); +} + +/** Return true if we've been measuring MTBFs for long enough to + * pronounce on Stability. */ +int +rep_hist_have_measured_enough_stability(void) +{ + /* XXXX++ This doesn't do so well when we change our opinion + * as to whether we're tracking router stability. */ + return started_tracking_stability < time(NULL) - 4*60*60; +} + +/** Log all the reliability data we have remembered, with the chosen + * severity. + */ +void +rep_hist_dump_stats(time_t now, int severity) +{ + digestmap_iter_t *orhist_it; + const char *name1, *digest1; + char hexdigest1[HEX_DIGEST_LEN+1]; + or_history_t *or_history; + void *or_history_p; + const node_t *node; + + rep_history_clean(now - get_options()->RephistTrackTime); + + tor_log(severity, LD_HIST, "--------------- Dumping history information:"); + + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + double s; + long stability; + digestmap_iter_get(orhist_it, &digest1, &or_history_p); + or_history = (or_history_t*) or_history_p; + + if ((node = node_get_by_id(digest1)) && node_get_nickname(node)) + name1 = node_get_nickname(node); + else + name1 = "(unknown)"; + base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN); + s = get_stability(or_history, now); + stability = (long)s; + tor_log(severity, LD_HIST, + "OR %s [%s]: wmtbf %lu:%02lu:%02lu", + name1, hexdigest1, + stability/3600, (stability/60)%60, stability%60); + } +} + +/** Remove history info for routers/links that haven't changed since + * <b>before</b>. + */ +void +rep_history_clean(time_t before) +{ + int authority = authdir_mode(get_options()); + or_history_t *or_history; + void *or_history_p; + digestmap_iter_t *orhist_it; + const char *d1; + + orhist_it = digestmap_iter_init(history_map); + while (!digestmap_iter_done(orhist_it)) { + int should_remove; + digestmap_iter_get(orhist_it, &d1, &or_history_p); + or_history = or_history_p; + + should_remove = authority ? + (or_history->total_run_weights < STABILITY_EPSILON && + !or_history->start_of_run) + : (or_history->changed < before); + if (should_remove) { + orhist_it = digestmap_iter_next_rmv(history_map, orhist_it); + free_or_history(or_history); + continue; + } + orhist_it = digestmap_iter_next(history_map, orhist_it); + } +} + +/** Write MTBF data to disk. Return 0 on success, negative on failure. + * + * If <b>missing_means_down</b>, then if we're about to write an entry + * that is still considered up but isn't in our routerlist, consider it + * to be down. */ +int +rep_hist_record_mtbf_data(time_t now, int missing_means_down) +{ + char time_buf[ISO_TIME_LEN+1]; + + digestmap_iter_t *orhist_it; + const char *digest; + void *or_history_p; + or_history_t *hist; + open_file_t *open_file = NULL; + FILE *f; + + { + char *filename = get_datadir_fname("router-stability"); + f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600, + &open_file); + tor_free(filename); + if (!f) + return -1; + } + + /* File format is: + * FormatLine *KeywordLine Data + * + * FormatLine = "format 1" NL + * KeywordLine = Keyword SP Arguments NL + * Data = "data" NL *RouterMTBFLine "." NL + * RouterMTBFLine = Fingerprint SP WeightedRunLen SP + * TotalRunWeights [SP S=StartRunTime] NL + */ +#define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END +#define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END + + PUT("format 2\n"); + + format_iso_time(time_buf, time(NULL)); + PRINTF((f, "stored-at %s\n", time_buf)); + + if (started_tracking_stability) { + format_iso_time(time_buf, started_tracking_stability); + PRINTF((f, "tracked-since %s\n", time_buf)); + } + if (stability_last_downrated) { + format_iso_time(time_buf, stability_last_downrated); + PRINTF((f, "last-downrated %s\n", time_buf)); + } + + PUT("data\n"); + + /* XXX Nick: now bridge auths record this for all routers too. + * Should we make them record it only for bridge routers? -RD + * Not for 0.2.0. -NM */ + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + char dbuf[HEX_DIGEST_LEN+1]; + const char *t = NULL; + digestmap_iter_get(orhist_it, &digest, &or_history_p); + hist = (or_history_t*) or_history_p; + + base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN); + + if (missing_means_down && hist->start_of_run && + !connection_or_digest_is_known_relay(digest)) { + /* We think this relay is running, but it's not listed in our + * consensus. Somehow it fell out without telling us it went + * down. Complain and also correct it. */ + log_info(LD_HIST, + "Relay '%s' is listed as up in rephist, but it's not in " + "our routerlist. Correcting.", dbuf); + rep_hist_note_router_unreachable(digest, now); + } + + PRINTF((f, "R %s\n", dbuf)); + if (hist->start_of_run > 0) { + format_iso_time(time_buf, hist->start_of_run); + t = time_buf; + } + PRINTF((f, "+MTBF %lu %.5f%s%s\n", + hist->weighted_run_length, hist->total_run_weights, + t ? " S=" : "", t ? t : "")); + t = NULL; + if (hist->start_of_downtime > 0) { + format_iso_time(time_buf, hist->start_of_downtime); + t = time_buf; + } + PRINTF((f, "+WFU %lu %lu%s%s\n", + hist->weighted_uptime, hist->total_weighted_time, + t ? " S=" : "", t ? t : "")); + } + + PUT(".\n"); + +#undef PUT +#undef PRINTF + + return finish_writing_to_file(open_file); + err: + abort_writing_to_file(open_file); + return -1; +} + +/** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and + * such that no line sl[k] with i <= k < j starts with "R ". Return -1 if no + * such line exists. */ +static int +find_next_with(smartlist_t *sl, int i, const char *prefix) +{ + for ( ; i < smartlist_len(sl); ++i) { + const char *line = smartlist_get(sl, i); + if (!strcmpstart(line, prefix)) + return i; + if (!strcmpstart(line, "R ")) + return -1; + } + return -1; +} + +/** How many bad times has parse_possibly_bad_iso_time() parsed? */ +static int n_bogus_times = 0; +/** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but + * round any pre-1970 date to Jan 1, 1970. */ +static int +parse_possibly_bad_iso_time(const char *s, time_t *time_out) +{ + int year; + char b[5]; + strlcpy(b, s, sizeof(b)); + b[4] = '\0'; + year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL); + if (year < 1970) { + *time_out = 0; + ++n_bogus_times; + return 0; + } else + return parse_iso_time(s, time_out); +} + +/** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which + * says we started measuring at <b>started_measuring</b>. Return a new number + * that's about as much before <b>now</b> as <b>t</b> was before + * <b>stored_at</b>. + */ +static inline time_t +correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring) +{ + if (t < started_measuring - 24*60*60*365) + return 0; + else if (t < started_measuring) + return started_measuring; + else if (t > stored_at) + return 0; + else { + long run_length = stored_at - t; + t = (time_t)(now - run_length); + if (t < started_measuring) + t = started_measuring; + return t; + } +} + +/** Load MTBF data from disk. Returns 0 on success or recoverable error, -1 + * on failure. */ +int +rep_hist_load_mtbf_data(time_t now) +{ + /* XXXX won't handle being called while history is already populated. */ + smartlist_t *lines; + const char *line = NULL; + int r=0, i; + time_t last_downrated = 0, stored_at = 0, tracked_since = 0; + time_t latest_possible_start = now; + long format = -1; + + { + char *filename = get_datadir_fname("router-stability"); + char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL); + tor_free(filename); + if (!d) + return -1; + lines = smartlist_new(); + smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0); + tor_free(d); + } + + { + const char *firstline; + if (smartlist_len(lines)>4) { + firstline = smartlist_get(lines, 0); + if (!strcmpstart(firstline, "format ")) + format = tor_parse_long(firstline+strlen("format "), + 10, -1, LONG_MAX, NULL, NULL); + } + } + if (format != 1 && format != 2) { + log_warn(LD_HIST, + "Unrecognized format in mtbf history file. Skipping."); + goto err; + } + for (i = 1; i < smartlist_len(lines); ++i) { + line = smartlist_get(lines, i); + if (!strcmp(line, "data")) + break; + if (!strcmpstart(line, "last-downrated ")) { + if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0) + log_warn(LD_HIST,"Couldn't parse downrate time in mtbf " + "history file."); + } + if (!strcmpstart(line, "stored-at ")) { + if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0) + log_warn(LD_HIST,"Couldn't parse stored time in mtbf " + "history file."); + } + if (!strcmpstart(line, "tracked-since ")) { + if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0) + log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf " + "history file."); + } + } + if (last_downrated > now) + last_downrated = now; + if (tracked_since > now) + tracked_since = now; + + if (!stored_at) { + log_warn(LD_HIST, "No stored time recorded."); + goto err; + } + + if (line && !strcmp(line, "data")) + ++i; + + n_bogus_times = 0; + + for (; i < smartlist_len(lines); ++i) { + char digest[DIGEST_LEN]; + char hexbuf[HEX_DIGEST_LEN+1]; + char mtbf_timebuf[ISO_TIME_LEN+1]; + char wfu_timebuf[ISO_TIME_LEN+1]; + time_t start_of_run = 0; + time_t start_of_downtime = 0; + int have_mtbf = 0, have_wfu = 0; + long wrl = 0; + double trw = 0; + long wt_uptime = 0, total_wt_time = 0; + int n; + or_history_t *hist; + line = smartlist_get(lines, i); + if (!strcmp(line, ".")) + break; + + mtbf_timebuf[0] = '\0'; + wfu_timebuf[0] = '\0'; + + if (format == 1) { + n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s", + hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11); + if (n != 3 && n != 5) { + log_warn(LD_HIST, "Couldn't scan line %s", escaped(line)); + continue; + } + have_mtbf = 1; + } else { + // format == 2. + int mtbf_idx, wfu_idx; + if (strcmpstart(line, "R ") || strlen(line) < 2+HEX_DIGEST_LEN) + continue; + strlcpy(hexbuf, line+2, sizeof(hexbuf)); + mtbf_idx = find_next_with(lines, i+1, "+MTBF "); + wfu_idx = find_next_with(lines, i+1, "+WFU "); + if (mtbf_idx >= 0) { + const char *mtbfline = smartlist_get(lines, mtbf_idx); + n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s", + &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11); + if (n == 2 || n == 4) { + have_mtbf = 1; + } else { + log_warn(LD_HIST, "Couldn't scan +MTBF line %s", + escaped(mtbfline)); + } + } + if (wfu_idx >= 0) { + const char *wfuline = smartlist_get(lines, wfu_idx); + n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s", + &wt_uptime, &total_wt_time, + wfu_timebuf, wfu_timebuf+11); + if (n == 2 || n == 4) { + have_wfu = 1; + } else { + log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline)); + } + } + if (wfu_idx > i) + i = wfu_idx; + if (mtbf_idx > i) + i = mtbf_idx; + } + if (base16_decode(digest, DIGEST_LEN, + hexbuf, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf)); + continue; + } + hist = get_or_history(digest); + if (!hist) + continue; + + if (have_mtbf) { + if (mtbf_timebuf[0]) { + mtbf_timebuf[10] = ' '; + if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0) + log_warn(LD_HIST, "Couldn't parse time %s", + escaped(mtbf_timebuf)); + } + hist->start_of_run = correct_time(start_of_run, now, stored_at, + tracked_since); + if (hist->start_of_run < latest_possible_start + wrl) + latest_possible_start = (time_t)(hist->start_of_run - wrl); + + hist->weighted_run_length = wrl; + hist->total_run_weights = trw; + } + if (have_wfu) { + if (wfu_timebuf[0]) { + wfu_timebuf[10] = ' '; + if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0) + log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf)); + } + } + hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at, + tracked_since); + hist->weighted_uptime = wt_uptime; + hist->total_weighted_time = total_wt_time; + } + if (strcmp(line, ".")) + log_warn(LD_HIST, "Truncated MTBF file."); + + if (tracked_since < 86400*365) /* Recover from insanely early value. */ + tracked_since = latest_possible_start; + + stability_last_downrated = last_downrated; + started_tracking_stability = tracked_since; + + goto done; + err: + r = -1; + done: + SMARTLIST_FOREACH(lines, char *, cp, tor_free(cp)); + smartlist_free(lines); + return r; +} + +/** For how many seconds do we keep track of individual per-second bandwidth + * totals? */ +#define NUM_SECS_ROLLING_MEASURE 10 +/** How large are the intervals for which we track and report bandwidth use? */ +#define NUM_SECS_BW_SUM_INTERVAL (24*60*60) +/** How far in the past do we remember and publish bandwidth use? */ +#define NUM_SECS_BW_SUM_IS_VALID (5*24*60*60) +/** How many bandwidth usage intervals do we remember? (derived) */ +#define NUM_TOTALS (NUM_SECS_BW_SUM_IS_VALID/NUM_SECS_BW_SUM_INTERVAL) + +/** Structure to track bandwidth use, and remember the maxima for a given + * time period. + */ +typedef struct bw_array_t { + /** Observation array: Total number of bytes transferred in each of the last + * NUM_SECS_ROLLING_MEASURE seconds. This is used as a circular array. */ + uint64_t obs[NUM_SECS_ROLLING_MEASURE]; + int cur_obs_idx; /**< Current position in obs. */ + time_t cur_obs_time; /**< Time represented in obs[cur_obs_idx] */ + uint64_t total_obs; /**< Total for all members of obs except + * obs[cur_obs_idx] */ + uint64_t max_total; /**< Largest value that total_obs has taken on in the + * current period. */ + uint64_t total_in_period; /**< Total bytes transferred in the current + * period. */ + + /** When does the next period begin? */ + time_t next_period; + /** Where in 'maxima' should the maximum bandwidth usage for the current + * period be stored? */ + int next_max_idx; + /** How many values in maxima/totals have been set ever? */ + int num_maxes_set; + /** Circular array of the maximum + * bandwidth-per-NUM_SECS_ROLLING_MEASURE usage for the last + * NUM_TOTALS periods */ + uint64_t maxima[NUM_TOTALS]; + /** Circular array of the total bandwidth usage for the last NUM_TOTALS + * periods */ + uint64_t totals[NUM_TOTALS]; +} bw_array_t; + +/** Shift the current period of b forward by one. */ - static void ++STATIC void +commit_max(bw_array_t *b) +{ + /* Store total from current period. */ + b->totals[b->next_max_idx] = b->total_in_period; + /* Store maximum from current period. */ + b->maxima[b->next_max_idx++] = b->max_total; + /* Advance next_period and next_max_idx */ + b->next_period += NUM_SECS_BW_SUM_INTERVAL; + if (b->next_max_idx == NUM_TOTALS) + b->next_max_idx = 0; + if (b->num_maxes_set < NUM_TOTALS) + ++b->num_maxes_set; + /* Reset max_total. */ + b->max_total = 0; + /* Reset total_in_period. */ + b->total_in_period = 0; +} + +/** Shift the current observation time of <b>b</b> forward by one second. */ - static inline void ++STATIC void +advance_obs(bw_array_t *b) +{ + int nextidx; + uint64_t total; + + /* Calculate the total bandwidth for the last NUM_SECS_ROLLING_MEASURE + * seconds; adjust max_total as needed.*/ + total = b->total_obs + b->obs[b->cur_obs_idx]; + if (total > b->max_total) + b->max_total = total; + + nextidx = b->cur_obs_idx+1; + if (nextidx == NUM_SECS_ROLLING_MEASURE) + nextidx = 0; + + b->total_obs = total - b->obs[nextidx]; + b->obs[nextidx]=0; + b->cur_obs_idx = nextidx; + + if (++b->cur_obs_time >= b->next_period) + commit_max(b); +} + +/** Add <b>n</b> bytes to the number of bytes in <b>b</b> for second + * <b>when</b>. */ +static inline void +add_obs(bw_array_t *b, time_t when, uint64_t n) +{ + if (when < b->cur_obs_time) + return; /* Don't record data in the past. */ + + /* If we're currently adding observations for an earlier second than + * 'when', advance b->cur_obs_time and b->cur_obs_idx by an + * appropriate number of seconds, and do all the other housekeeping. */ + while (when > b->cur_obs_time) { + /* Doing this one second at a time is potentially inefficient, if we start + with a state file that is very old. Fortunately, it doesn't seem to + show up in profiles, so we can just ignore it for now. */ + advance_obs(b); + } + + b->obs[b->cur_obs_idx] += n; + b->total_in_period += n; +} + +/** Allocate, initialize, and return a new bw_array. */ +static bw_array_t * +bw_array_new(void) +{ + bw_array_t *b; + time_t start; + b = tor_malloc_zero(sizeof(bw_array_t)); + rephist_total_alloc += sizeof(bw_array_t); + start = time(NULL); + b->cur_obs_time = start; + b->next_period = start + NUM_SECS_BW_SUM_INTERVAL; + return b; +} + +#define bw_array_free(val) \ + FREE_AND_NULL(bw_array_t, bw_array_free_, (val)) + +/** Free storage held by bandwidth array <b>b</b>. */ +static void +bw_array_free_(bw_array_t *b) +{ + if (!b) { + return; + } + + rephist_total_alloc -= sizeof(bw_array_t); + tor_free(b); +} + +/** Recent history of bandwidth observations for read operations. */ +static bw_array_t *read_array = NULL; +/** Recent history of bandwidth observations for write operations. */ - static bw_array_t *write_array = NULL; ++STATIC bw_array_t *write_array = NULL; +/** Recent history of bandwidth observations for read operations for the + directory protocol. */ +static bw_array_t *dir_read_array = NULL; +/** Recent history of bandwidth observations for write operations for the + directory protocol. */ +static bw_array_t *dir_write_array = NULL; + +/** Set up [dir-]read_array and [dir-]write_array, freeing them if they + * already exist. */ +static void +bw_arrays_init(void) +{ + bw_array_free(read_array); + bw_array_free(write_array); + bw_array_free(dir_read_array); + bw_array_free(dir_write_array); + + read_array = bw_array_new(); + write_array = bw_array_new(); + dir_read_array = bw_array_new(); + dir_write_array = bw_array_new(); +} + +/** Remember that we read <b>num_bytes</b> bytes in second <b>when</b>. + * + * Add num_bytes to the current running total for <b>when</b>. + * + * <b>when</b> can go back to time, but it's safe to ignore calls + * earlier than the latest <b>when</b> you've heard of. + */ +void - rep_hist_note_bytes_written(size_t num_bytes, time_t when) ++rep_hist_note_bytes_written(uint64_t num_bytes, time_t when) +{ +/* Maybe a circular array for recent seconds, and step to a new point + * every time a new second shows up. Or simpler is to just to have + * a normal array and push down each item every second; it's short. + */ +/* When a new second has rolled over, compute the sum of the bytes we've + * seen over when-1 to when-1-NUM_SECS_ROLLING_MEASURE, and stick it + * somewhere. See rep_hist_bandwidth_assess() below. + */ + add_obs(write_array, when, num_bytes); +} + +/** Remember that we wrote <b>num_bytes</b> bytes in second <b>when</b>. + * (like rep_hist_note_bytes_written() above) + */ +void - rep_hist_note_bytes_read(size_t num_bytes, time_t when) ++rep_hist_note_bytes_read(uint64_t num_bytes, time_t when) +{ +/* if we're smart, we can make this func and the one above share code */ + add_obs(read_array, when, num_bytes); +} + +/** Remember that we wrote <b>num_bytes</b> directory bytes in second + * <b>when</b>. (like rep_hist_note_bytes_written() above) + */ +void - rep_hist_note_dir_bytes_written(size_t num_bytes, time_t when) ++rep_hist_note_dir_bytes_written(uint64_t num_bytes, time_t when) +{ + add_obs(dir_write_array, when, num_bytes); +} + +/** Remember that we read <b>num_bytes</b> directory bytes in second + * <b>when</b>. (like rep_hist_note_bytes_written() above) + */ +void - rep_hist_note_dir_bytes_read(size_t num_bytes, time_t when) ++rep_hist_note_dir_bytes_read(uint64_t num_bytes, time_t when) +{ + add_obs(dir_read_array, when, num_bytes); +} + +/** Helper: Return the largest value in b->maxima. (This is equal to the + * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last + * NUM_SECS_BW_SUM_IS_VALID seconds.) + */ - static uint64_t ++STATIC uint64_t +find_largest_max(bw_array_t *b) +{ + int i; + uint64_t max; + max=0; + for (i=0; i<NUM_TOTALS; ++i) { + if (b->maxima[i]>max) + max = b->maxima[i]; + } + return max; +} + +/** Find the largest sums in the past NUM_SECS_BW_SUM_IS_VALID (roughly) + * seconds. Find one sum for reading and one for writing. They don't have + * to be at the same time. + * + * Return the smaller of these sums, divided by NUM_SECS_ROLLING_MEASURE. + */ +MOCK_IMPL(int, +rep_hist_bandwidth_assess,(void)) +{ + uint64_t w,r; + r = find_largest_max(read_array); + w = find_largest_max(write_array); + if (r>w) + return (int)(((double)w)/NUM_SECS_ROLLING_MEASURE); + else + return (int)(((double)r)/NUM_SECS_ROLLING_MEASURE); +} + +/** Print the bandwidth history of b (either [dir-]read_array or + * [dir-]write_array) into the buffer pointed to by buf. The format is + * simply comma separated numbers, from oldest to newest. + * + * It returns the number of bytes written. + */ +static size_t +rep_hist_fill_bandwidth_history(char *buf, size_t len, const bw_array_t *b) +{ + char *cp = buf; + int i, n; + const or_options_t *options = get_options(); + uint64_t cutoff; + + if (b->num_maxes_set <= b->next_max_idx) { + /* We haven't been through the circular array yet; time starts at i=0.*/ + i = 0; + } else { + /* We've been around the array at least once. The next i to be + overwritten is the oldest. */ + i = b->next_max_idx; + } + + if (options->RelayBandwidthRate) { + /* We don't want to report that we used more bandwidth than the max we're + * willing to relay; otherwise everybody will know how much traffic + * we used ourself. */ + cutoff = options->RelayBandwidthRate * NUM_SECS_BW_SUM_INTERVAL; + } else { + cutoff = UINT64_MAX; + } + + for (n=0; n<b->num_maxes_set; ++n,++i) { + uint64_t total; + if (i >= NUM_TOTALS) + i -= NUM_TOTALS; + tor_assert(i < NUM_TOTALS); + /* Round the bandwidth used down to the nearest 1k. */ + total = b->totals[i] & ~0x3ff; + if (total > cutoff) + total = cutoff; + + if (n==(b->num_maxes_set-1)) + tor_snprintf(cp, len-(cp-buf), "%"PRIu64, (total)); + else + tor_snprintf(cp, len-(cp-buf), "%"PRIu64",", (total)); + cp += strlen(cp); + } + return cp-buf; +} + +/** Allocate and return lines for representing this server's bandwidth + * history in its descriptor. We publish these lines in our extra-info + * descriptor. + */ +char * +rep_hist_get_bandwidth_lines(void) +{ + char *buf, *cp; + char t[ISO_TIME_LEN+1]; + int r; + bw_array_t *b = NULL; + const char *desc = NULL; + size_t len; + + /* [dirreq-](read|write)-history yyyy-mm-dd HH:MM:SS (n s) n,n,n... */ +/* The n,n,n part above. Largest representation of a uint64_t is 20 chars + * long, plus the comma. */ +#define MAX_HIST_VALUE_LEN (21*NUM_TOTALS) + len = (67+MAX_HIST_VALUE_LEN)*4; + buf = tor_malloc_zero(len); + cp = buf; + for (r=0;r<4;++r) { + char tmp[MAX_HIST_VALUE_LEN]; + size_t slen; + switch (r) { + case 0: + b = write_array; + desc = "write-history"; + break; + case 1: + b = read_array; + desc = "read-history"; + break; + case 2: + b = dir_write_array; + desc = "dirreq-write-history"; + break; + case 3: + b = dir_read_array; + desc = "dirreq-read-history"; + break; + } + tor_assert(b); + slen = rep_hist_fill_bandwidth_history(tmp, MAX_HIST_VALUE_LEN, b); + /* If we don't have anything to write, skip to the next entry. */ + if (slen == 0) + continue; + format_iso_time(t, b->next_period-NUM_SECS_BW_SUM_INTERVAL); + tor_snprintf(cp, len-(cp-buf), "%s %s (%d s) ", + desc, t, NUM_SECS_BW_SUM_INTERVAL); + cp += strlen(cp); + strlcat(cp, tmp, len-(cp-buf)); + cp += slen; + strlcat(cp, "\n", len-(cp-buf)); + ++cp; + } + return buf; +} + +/** Write a single bw_array_t into the Values, Ends, Interval, and Maximum + * entries of an or_state_t. Done before writing out a new state file. */ +static void +rep_hist_update_bwhist_state_section(or_state_t *state, + const bw_array_t *b, + smartlist_t **s_values, + smartlist_t **s_maxima, + time_t *s_begins, + int *s_interval) +{ + int i,j; + uint64_t maxval; + + if (*s_values) { + SMARTLIST_FOREACH(*s_values, char *, val, tor_free(val)); + smartlist_free(*s_values); + } + if (*s_maxima) { + SMARTLIST_FOREACH(*s_maxima, char *, val, tor_free(val)); + smartlist_free(*s_maxima); + } + if (! server_mode(get_options())) { + /* Clients don't need to store bandwidth history persistently; + * force these values to the defaults. */ + /* FFFF we should pull the default out of config.c's state table, + * so we don't have two defaults. */ + if (*s_begins != 0 || *s_interval != 900) { + time_t now = time(NULL); + time_t save_at = get_options()->AvoidDiskWrites ? now+3600 : now+600; + or_state_mark_dirty(state, save_at); + } + *s_begins = 0; + *s_interval = 900; + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); + return; + } + *s_begins = b->next_period; + *s_interval = NUM_SECS_BW_SUM_INTERVAL; + + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); + /* Set i to first position in circular array */ + i = (b->num_maxes_set <= b->next_max_idx) ? 0 : b->next_max_idx; + for (j=0; j < b->num_maxes_set; ++j,++i) { + if (i >= NUM_TOTALS) + i = 0; + smartlist_add_asprintf(*s_values, "%"PRIu64, + (b->totals[i] & ~0x3ff)); + maxval = b->maxima[i] / NUM_SECS_ROLLING_MEASURE; + smartlist_add_asprintf(*s_maxima, "%"PRIu64, + (maxval & ~0x3ff)); + } + smartlist_add_asprintf(*s_values, "%"PRIu64, + (b->total_in_period & ~0x3ff)); + maxval = b->max_total / NUM_SECS_ROLLING_MEASURE; + smartlist_add_asprintf(*s_maxima, "%"PRIu64, + (maxval & ~0x3ff)); +} + +/** Update <b>state</b> with the newest bandwidth history. Done before + * writing out a new state file. */ +void +rep_hist_update_state(or_state_t *state) +{ +#define UPDATE(arrname,st) \ + rep_hist_update_bwhist_state_section(state,\ + (arrname),\ + &state->BWHistory ## st ## Values, \ + &state->BWHistory ## st ## Maxima, \ + &state->BWHistory ## st ## Ends, \ + &state->BWHistory ## st ## Interval) + + UPDATE(write_array, Write); + UPDATE(read_array, Read); + UPDATE(dir_write_array, DirWrite); + UPDATE(dir_read_array, DirRead); + + if (server_mode(get_options())) { + or_state_mark_dirty(state, time(NULL)+(2*3600)); + } +#undef UPDATE +} + +/** Load a single bw_array_t from its Values, Ends, Maxima, and Interval + * entries in an or_state_t. Done while reading the state file. */ +static int +rep_hist_load_bwhist_state_section(bw_array_t *b, + const smartlist_t *s_values, + const smartlist_t *s_maxima, + const time_t s_begins, + const int s_interval) +{ + time_t now = time(NULL); + int retval = 0; + time_t start; + + uint64_t v, mv; + int i,ok,ok_m = 0; + int have_maxima = s_maxima && s_values && + (smartlist_len(s_values) == smartlist_len(s_maxima)); + + if (s_values && s_begins >= now - NUM_SECS_BW_SUM_INTERVAL*NUM_TOTALS) { + start = s_begins - s_interval*(smartlist_len(s_values)); + if (start > now) + return 0; + b->cur_obs_time = start; + b->next_period = start + NUM_SECS_BW_SUM_INTERVAL; + SMARTLIST_FOREACH_BEGIN(s_values, const char *, cp) { + const char *maxstr = NULL; + v = tor_parse_uint64(cp, 10, 0, UINT64_MAX, &ok, NULL); + if (have_maxima) { + maxstr = smartlist_get(s_maxima, cp_sl_idx); + mv = tor_parse_uint64(maxstr, 10, 0, UINT64_MAX, &ok_m, NULL); + mv *= NUM_SECS_ROLLING_MEASURE; + } else { + /* No maxima known; guess average rate to be conservative. */ + mv = (v / s_interval) * NUM_SECS_ROLLING_MEASURE; + } + if (!ok) { + retval = -1; + log_notice(LD_HIST, "Could not parse value '%s' into a number.'",cp); + } + if (maxstr && !ok_m) { + retval = -1; + log_notice(LD_HIST, "Could not parse maximum '%s' into a number.'", + maxstr); + } + + if (start < now) { + time_t cur_start = start; + time_t actual_interval_len = s_interval; + uint64_t cur_val = 0; + /* Calculate the average per second. This is the best we can do + * because our state file doesn't have per-second resolution. */ + if (start + s_interval > now) + actual_interval_len = now - start; + cur_val = v / actual_interval_len; + /* This is potentially inefficient, but since we don't do it very + * often it should be ok. */ + while (cur_start < start + actual_interval_len) { + add_obs(b, cur_start, cur_val); + ++cur_start; + } + b->max_total = mv; + /* This will result in some fairly choppy history if s_interval + * is not the same as NUM_SECS_BW_SUM_INTERVAL. XXXX */ + start += actual_interval_len; + } + } SMARTLIST_FOREACH_END(cp); + } + + /* Clean up maxima and observed */ + for (i=0; i<NUM_SECS_ROLLING_MEASURE; ++i) { + b->obs[i] = 0; + } + b->total_obs = 0; + + return retval; +} + +/** Set bandwidth history from the state file we just loaded. */ +int +rep_hist_load_state(or_state_t *state, char **err) +{ + int all_ok = 1; + + /* Assert they already have been malloced */ + tor_assert(read_array && write_array); + tor_assert(dir_read_array && dir_write_array); + +#define LOAD(arrname,st) \ + if (rep_hist_load_bwhist_state_section( \ + (arrname), \ + state->BWHistory ## st ## Values, \ + state->BWHistory ## st ## Maxima, \ + state->BWHistory ## st ## Ends, \ + state->BWHistory ## st ## Interval)<0) \ + all_ok = 0 + + LOAD(write_array, Write); + LOAD(read_array, Read); + LOAD(dir_write_array, DirWrite); + LOAD(dir_read_array, DirRead); + +#undef LOAD + if (!all_ok) { + *err = tor_strdup("Parsing of bandwidth history values failed"); + /* and create fresh arrays */ + bw_arrays_init(); + return -1; + } + return 0; +} + +/*********************************************************************/ + +/** A single predicted port: used to remember which ports we've made + * connections to, so that we can try to keep making circuits that can handle + * those ports. */ +typedef struct predicted_port_t { + /** The port we connected to */ + uint16_t port; + /** The time at which we last used it */ + time_t time; +} predicted_port_t; + +/** A list of port numbers that have been used recently. */ +static smartlist_t *predicted_ports_list=NULL; +/** How long do we keep predicting circuits? */ +static time_t prediction_timeout=0; +/** When was the last time we added a prediction entry (HS or port) */ +static time_t last_prediction_add_time=0; + +/** + * How much time left until we stop predicting circuits? + */ +int +predicted_ports_prediction_time_remaining(time_t now) +{ + time_t seconds_waited; + time_t seconds_left; + + /* Protect against overflow of return value. This can happen if the clock + * jumps backwards in time. Update the last prediction time (aka last + * active time) to prevent it. This update is preferable to using monotonic + * time because it prevents clock jumps into the past from simply causing + * very long idle timeouts while the monotonic time stands still. */ + seconds_waited = time_diff(last_prediction_add_time, now); + if (seconds_waited == TIME_MAX) { + last_prediction_add_time = now; + seconds_waited = 0; + } + + /* Protect against underflow of the return value. This can happen for very + * large periods of inactivity/system sleep. */ + if (seconds_waited > prediction_timeout) + return 0; + + seconds_left = time_diff(seconds_waited, prediction_timeout); + if (BUG(seconds_left == TIME_MAX)) + return INT_MAX; + + return (int)(seconds_left); +} + +/** We just got an application request for a connection with + * port <b>port</b>. Remember it for the future, so we can keep + * some circuits open that will exit to this port. + */ +static void +add_predicted_port(time_t now, uint16_t port) +{ + predicted_port_t *pp = tor_malloc(sizeof(predicted_port_t)); + + // If the list is empty, re-randomize predicted ports lifetime + if (!any_predicted_circuits(now)) { + prediction_timeout = + (time_t)channelpadding_get_circuits_available_timeout(); + } + + last_prediction_add_time = now; + + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ building " + "for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + + pp->port = port; + pp->time = now; + rephist_total_alloc += sizeof(*pp); + smartlist_add(predicted_ports_list, pp); +} + +/** + * Allocate whatever memory and structs are needed for predicting + * which ports will be used. Also seed it with port 80, so we'll build + * circuits on start-up. + */ +static void +predicted_ports_alloc(void) +{ + predicted_ports_list = smartlist_new(); +} + +void +predicted_ports_init(void) +{ + add_predicted_port(time(NULL), 443); // Add a port to get us started +} + +/** Free whatever memory is needed for predicting which ports will + * be used. + */ +static void +predicted_ports_free_all(void) +{ + rephist_total_alloc -= + smartlist_len(predicted_ports_list)*sizeof(predicted_port_t); + SMARTLIST_FOREACH(predicted_ports_list, predicted_port_t *, + pp, tor_free(pp)); + smartlist_free(predicted_ports_list); +} + +/** Remember that <b>port</b> has been asked for as of time <b>now</b>. + * This is used for predicting what sorts of streams we'll make in the + * future and making exit circuits to anticipate that. + */ +void +rep_hist_note_used_port(time_t now, uint16_t port) +{ + tor_assert(predicted_ports_list); + + if (!port) /* record nothing */ + return; + + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->port == port) { + pp->time = now; + + last_prediction_add_time = now; + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ " + "building for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + return; + } + } SMARTLIST_FOREACH_END(pp); + /* it's not there yet; we need to add it */ + add_predicted_port(now, port); +} + +/** Return a newly allocated pointer to a list of uint16_t * for ports that + * are likely to be asked for in the near future. + */ +smartlist_t * +rep_hist_get_predicted_ports(time_t now) +{ + int predicted_circs_relevance_time; + smartlist_t *out = smartlist_new(); + tor_assert(predicted_ports_list); + + predicted_circs_relevance_time = (int)prediction_timeout; + + /* clean out obsolete entries */ + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->time + predicted_circs_relevance_time < now) { + log_debug(LD_CIRC, "Expiring predicted port %d", pp->port); + + rephist_total_alloc -= sizeof(predicted_port_t); + tor_free(pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } else { + smartlist_add(out, tor_memdup(&pp->port, sizeof(uint16_t))); + } + } SMARTLIST_FOREACH_END(pp); + return out; +} + +/** + * Take a list of uint16_t *, and remove every port in the list from the + * current list of predicted ports. + */ +void +rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports) +{ + /* Let's do this on O(N), not O(N^2). */ + bitarray_t *remove_ports = bitarray_init_zero(UINT16_MAX); + SMARTLIST_FOREACH(rmv_ports, const uint16_t *, p, + bitarray_set(remove_ports, *p)); + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (bitarray_is_set(remove_ports, pp->port)) { + tor_free(pp); + rephist_total_alloc -= sizeof(*pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } + } SMARTLIST_FOREACH_END(pp); + bitarray_free(remove_ports); +} + +/** The user asked us to do a resolve. Rather than keeping track of + * timings and such of resolves, we fake it for now by treating + * it the same way as a connection to port 80. This way we will continue + * to have circuits lying around if the user only uses Tor for resolves. + */ +void +rep_hist_note_used_resolve(time_t now) +{ + rep_hist_note_used_port(now, 80); +} + +/** The last time at which we needed an internal circ. */ +static time_t predicted_internal_time = 0; +/** The last time we needed an internal circ with good uptime. */ +static time_t predicted_internal_uptime_time = 0; +/** The last time we needed an internal circ with good capacity. */ +static time_t predicted_internal_capacity_time = 0; + +/** Remember that we used an internal circ at time <b>now</b>. */ +void +rep_hist_note_used_internal(time_t now, int need_uptime, int need_capacity) +{ + // If the list is empty, re-randomize predicted ports lifetime + if (!any_predicted_circuits(now)) { + prediction_timeout = channelpadding_get_circuits_available_timeout(); + } + + last_prediction_add_time = now; + + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ building " + "for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + + predicted_internal_time = now; + if (need_uptime) + predicted_internal_uptime_time = now; + if (need_capacity) + predicted_internal_capacity_time = now; +} + +/** Return 1 if we've used an internal circ recently; else return 0. */ +int +rep_hist_get_predicted_internal(time_t now, int *need_uptime, + int *need_capacity) +{ + int predicted_circs_relevance_time; + + predicted_circs_relevance_time = (int)prediction_timeout; + + if (!predicted_internal_time) { /* initialize it */ + predicted_internal_time = now; + predicted_internal_uptime_time = now; + predicted_internal_capacity_time = now; + } + if (predicted_internal_time + predicted_circs_relevance_time < now) + return 0; /* too long ago */ + if (predicted_internal_uptime_time + predicted_circs_relevance_time >= now) + *need_uptime = 1; + // Always predict that we need capacity. + *need_capacity = 1; + return 1; +} + +/** Any ports used lately? These are pre-seeded if we just started + * up or if we're running a hidden service. */ +int +any_predicted_circuits(time_t now) +{ + int predicted_circs_relevance_time; + predicted_circs_relevance_time = (int)prediction_timeout; + + return smartlist_len(predicted_ports_list) || + predicted_internal_time + predicted_circs_relevance_time >= now; +} + +/** Return 1 if we have no need for circuits currently, else return 0. */ +int +rep_hist_circbuilding_dormant(time_t now) +{ + const or_options_t *options = get_options(); + + if (any_predicted_circuits(now)) + return 0; + + /* see if we'll still need to build testing circuits */ + if (server_mode(options) && + (!check_whether_orport_reachable(options) || + !circuit_enough_testing_circs())) + return 0; + if (!check_whether_dirport_reachable(options)) + return 0; + + return 1; +} + +/*** Exit port statistics ***/ + +/* Some constants */ +/** To what multiple should byte numbers be rounded up? */ +#define EXIT_STATS_ROUND_UP_BYTES 1024 +/** To what multiple should stream counts be rounded up? */ +#define EXIT_STATS_ROUND_UP_STREAMS 4 +/** Number of TCP ports */ +#define EXIT_STATS_NUM_PORTS 65536 +/** Top n ports that will be included in exit stats. */ +#define EXIT_STATS_TOP_N_PORTS 10 + +/* The following data structures are arrays and no fancy smartlists or maps, + * so that all write operations can be done in constant time. This comes at + * the price of some memory (1.25 MB) and linear complexity when writing + * stats for measuring relays. */ +/** Number of bytes read in current period by exit port */ +static uint64_t *exit_bytes_read = NULL; +/** Number of bytes written in current period by exit port */ +static uint64_t *exit_bytes_written = NULL; +/** Number of streams opened in current period by exit port */ +static uint32_t *exit_streams = NULL; + +/** Start time of exit stats or 0 if we're not collecting exit stats. */ +static time_t start_of_exit_stats_interval; + +/** Initialize exit port stats. */ +void +rep_hist_exit_stats_init(time_t now) +{ + start_of_exit_stats_interval = now; + exit_bytes_read = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t)); + exit_bytes_written = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t)); + exit_streams = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint32_t)); +} + +/** Reset counters for exit port statistics. */ +void +rep_hist_reset_exit_stats(time_t now) +{ + start_of_exit_stats_interval = now; + memset(exit_bytes_read, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t)); + memset(exit_bytes_written, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t)); + memset(exit_streams, 0, EXIT_STATS_NUM_PORTS * sizeof(uint32_t)); +} + +/** Stop collecting exit port stats in a way that we can re-start doing + * so in rep_hist_exit_stats_init(). */ +void +rep_hist_exit_stats_term(void) +{ + start_of_exit_stats_interval = 0; + tor_free(exit_bytes_read); + tor_free(exit_bytes_written); + tor_free(exit_streams); +} + +/** Helper for qsort: compare two ints. Does not handle overflow properly, + * but works fine for sorting an array of port numbers, which is what we use + * it for. */ +static int +compare_int_(const void *x, const void *y) +{ + return (*(int*)x - *(int*)y); +} + +/** Return a newly allocated string containing the exit port statistics + * until <b>now</b>, or NULL if we're not collecting exit stats. Caller + * must ensure start_of_exit_stats_interval is in the past. */ +char * +rep_hist_format_exit_stats(time_t now) +{ + int i, j, top_elements = 0, cur_min_idx = 0, cur_port; + uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS]; + int top_ports[EXIT_STATS_TOP_N_PORTS]; + uint64_t cur_bytes = 0, other_read = 0, other_written = 0, + total_read = 0, total_written = 0; + uint32_t total_streams = 0, other_streams = 0; + smartlist_t *written_strings, *read_strings, *streams_strings; + char *written_string, *read_string, *streams_string; + char t[ISO_TIME_LEN+1]; + char *result; + + if (!start_of_exit_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_exit_stats_interval); + + /* Go through all ports to find the n ports that saw most written and + * read bytes. + * + * Invariant: at the end of the loop for iteration i, + * total_read is the sum of all exit_bytes_read[0..i] + * total_written is the sum of all exit_bytes_written[0..i] + * total_stream is the sum of all exit_streams[0..i] + * + * top_elements = MAX(EXIT_STATS_TOP_N_PORTS, + * #{j | 0 <= j <= i && volume(i) > 0}) + * + * For all 0 <= j < top_elements, + * top_bytes[j] > 0 + * 0 <= top_ports[j] <= 65535 + * top_bytes[j] = volume(top_ports[j]) + * + * There is no j in 0..i and k in 0..top_elements such that: + * volume(j) > top_bytes[k] AND j is not in top_ports[0..top_elements] + * + * There is no j!=cur_min_idx in 0..top_elements such that: + * top_bytes[j] < top_bytes[cur_min_idx] + * + * where volume(x) == exit_bytes_read[x]+exit_bytes_written[x] + * + * Worst case: O(EXIT_STATS_NUM_PORTS * EXIT_STATS_TOP_N_PORTS) + */ + for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { + total_read += exit_bytes_read[i]; + total_written += exit_bytes_written[i]; + total_streams += exit_streams[i]; + cur_bytes = exit_bytes_read[i] + exit_bytes_written[i]; + if (cur_bytes == 0) { + continue; + } + if (top_elements < EXIT_STATS_TOP_N_PORTS) { + top_bytes[top_elements] = cur_bytes; + top_ports[top_elements++] = i; + } else if (cur_bytes > top_bytes[cur_min_idx]) { + top_bytes[cur_min_idx] = cur_bytes; + top_ports[cur_min_idx] = i; + } else { + continue; + } + cur_min_idx = 0; + for (j = 1; j < top_elements; j++) { + if (top_bytes[j] < top_bytes[cur_min_idx]) { + cur_min_idx = j; + } + } + } + + /* Add observations of top ports to smartlists. */ + written_strings = smartlist_new(); + read_strings = smartlist_new(); + streams_strings = smartlist_new(); + other_read = total_read; + other_written = total_written; + other_streams = total_streams; + /* Sort the ports; this puts them out of sync with top_bytes, but we + * won't be using top_bytes again anyway */ + qsort(top_ports, top_elements, sizeof(int), compare_int_); + for (j = 0; j < top_elements; j++) { + cur_port = top_ports[j]; + if (exit_bytes_written[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_written[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + smartlist_add_asprintf(written_strings, "%d=%"PRIu64, + cur_port, (num)); + other_written -= exit_bytes_written[cur_port]; + } + if (exit_bytes_read[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_read[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + smartlist_add_asprintf(read_strings, "%d=%"PRIu64, + cur_port, (num)); + other_read -= exit_bytes_read[cur_port]; + } + if (exit_streams[cur_port] > 0) { + uint32_t num = round_uint32_to_next_multiple_of( + exit_streams[cur_port], + EXIT_STATS_ROUND_UP_STREAMS); + smartlist_add_asprintf(streams_strings, "%d=%u", cur_port, num); + other_streams -= exit_streams[cur_port]; + } + } + + /* Add observations of other ports in a single element. */ + other_written = round_uint64_to_next_multiple_of(other_written, + EXIT_STATS_ROUND_UP_BYTES); + other_written /= 1024; + smartlist_add_asprintf(written_strings, "other=%"PRIu64, + (other_written)); + other_read = round_uint64_to_next_multiple_of(other_read, + EXIT_STATS_ROUND_UP_BYTES); + other_read /= 1024; + smartlist_add_asprintf(read_strings, "other=%"PRIu64, + (other_read)); + other_streams = round_uint32_to_next_multiple_of(other_streams, + EXIT_STATS_ROUND_UP_STREAMS); + smartlist_add_asprintf(streams_strings, "other=%u", other_streams); + + /* Join all observations in single strings. */ + written_string = smartlist_join_strings(written_strings, ",", 0, NULL); + read_string = smartlist_join_strings(read_strings, ",", 0, NULL); + streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL); + SMARTLIST_FOREACH(written_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(read_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(streams_strings, char *, cp, tor_free(cp)); + smartlist_free(written_strings); + smartlist_free(read_strings); + smartlist_free(streams_strings); + + /* Put everything together. */ + format_iso_time(t, now); + tor_asprintf(&result, "exit-stats-end %s (%d s)\n" + "exit-kibibytes-written %s\n" + "exit-kibibytes-read %s\n" + "exit-streams-opened %s\n", + t, (unsigned) (now - start_of_exit_stats_interval), + written_string, + read_string, + streams_string); + tor_free(written_string); + tor_free(read_string); + tor_free(streams_string); + return result; +} + +/** If 24 hours have passed since the beginning of the current exit port + * stats period, write exit stats to $DATADIR/stats/exit-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write exit stats or 0 if we never want to write. */ +time_t +rep_hist_exit_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_exit_stats_interval) + return 0; /* Not initialized. */ + if (start_of_exit_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write. */ + + log_info(LD_HIST, "Writing exit port statistics to disk."); + + /* Generate history string. */ + str = rep_hist_format_exit_stats(now); + + /* Reset counters. */ + rep_hist_reset_exit_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "exit-stats", str, "exit port statistics"); + } + + done: + tor_free(str); + return start_of_exit_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Note that we wrote <b>num_written</b> bytes and read <b>num_read</b> + * bytes to/from an exit connection to <b>port</b>. */ +void +rep_hist_note_exit_bytes(uint16_t port, size_t num_written, + size_t num_read) +{ + if (!start_of_exit_stats_interval) + return; /* Not initialized. */ + exit_bytes_written[port] += num_written; + exit_bytes_read[port] += num_read; + log_debug(LD_HIST, "Written %lu bytes and read %lu bytes to/from an " + "exit connection to port %d.", + (unsigned long)num_written, (unsigned long)num_read, port); +} + +/** Note that we opened an exit stream to <b>port</b>. */ +void +rep_hist_note_exit_stream_opened(uint16_t port) +{ + if (!start_of_exit_stats_interval) + return; /* Not initialized. */ + exit_streams[port]++; + log_debug(LD_HIST, "Opened exit stream to port %d", port); +} + +/*** cell statistics ***/ + +/** Start of the current buffer stats interval or 0 if we're not + * collecting buffer statistics. */ +static time_t start_of_buffer_stats_interval; + +/** Initialize buffer stats. */ +void +rep_hist_buffer_stats_init(time_t now) +{ + start_of_buffer_stats_interval = now; +} + +/** Statistics from a single circuit. Collected when the circuit closes, or + * when we flush statistics to disk. */ +typedef struct circ_buffer_stats_t { + /** Average number of cells in the circuit's queue */ + double mean_num_cells_in_queue; + /** Average time a cell waits in the queue. */ + double mean_time_cells_in_queue; + /** Total number of cells sent over this circuit */ + uint32_t processed_cells; +} circ_buffer_stats_t; + +/** List of circ_buffer_stats_t. */ +static smartlist_t *circuits_for_buffer_stats = NULL; + +/** Remember cell statistics <b>mean_num_cells_in_queue</b>, + * <b>mean_time_cells_in_queue</b>, and <b>processed_cells</b> of a + * circuit. */ +void +rep_hist_add_buffer_stats(double mean_num_cells_in_queue, + double mean_time_cells_in_queue, uint32_t processed_cells) +{ + circ_buffer_stats_t *stats; + if (!start_of_buffer_stats_interval) + return; /* Not initialized. */ + stats = tor_malloc_zero(sizeof(circ_buffer_stats_t)); + stats->mean_num_cells_in_queue = mean_num_cells_in_queue; + stats->mean_time_cells_in_queue = mean_time_cells_in_queue; + stats->processed_cells = processed_cells; + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + smartlist_add(circuits_for_buffer_stats, stats); +} + +/** Remember cell statistics for circuit <b>circ</b> at time + * <b>end_of_interval</b> and reset cell counters in case the circuit + * remains open in the next measurement interval. */ +void +rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval) +{ + time_t start_of_interval; + int interval_length; + or_circuit_t *orcirc; + double mean_num_cells_in_queue, mean_time_cells_in_queue; + uint32_t processed_cells; + if (CIRCUIT_IS_ORIGIN(circ)) + return; + orcirc = TO_OR_CIRCUIT(circ); + if (!orcirc->processed_cells) + return; + start_of_interval = (circ->timestamp_created.tv_sec > + start_of_buffer_stats_interval) ? + (time_t)circ->timestamp_created.tv_sec : + start_of_buffer_stats_interval; + interval_length = (int) (end_of_interval - start_of_interval); + if (interval_length <= 0) + return; + processed_cells = orcirc->processed_cells; + /* 1000.0 for s -> ms; 2.0 because of app-ward and exit-ward queues */ + mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time / + (double) interval_length / 1000.0 / 2.0; + mean_time_cells_in_queue = + (double) orcirc->total_cell_waiting_time / + (double) orcirc->processed_cells; + orcirc->total_cell_waiting_time = 0; + orcirc->processed_cells = 0; + rep_hist_add_buffer_stats(mean_num_cells_in_queue, + mean_time_cells_in_queue, + processed_cells); +} + +/** Sorting helper: return -1, 1, or 0 based on comparison of two + * circ_buffer_stats_t */ +static int +buffer_stats_compare_entries_(const void **_a, const void **_b) +{ + const circ_buffer_stats_t *a = *_a, *b = *_b; + if (a->processed_cells < b->processed_cells) + return 1; + else if (a->processed_cells > b->processed_cells) + return -1; + else + return 0; +} + +/** Stop collecting cell stats in a way that we can re-start doing so in + * rep_hist_buffer_stats_init(). */ +void +rep_hist_buffer_stats_term(void) +{ + rep_hist_reset_buffer_stats(0); +} + +/** Clear history of circuit statistics and set the measurement interval + * start to <b>now</b>. */ +void +rep_hist_reset_buffer_stats(time_t now) +{ + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, + stats, tor_free(stats)); + smartlist_clear(circuits_for_buffer_stats); + start_of_buffer_stats_interval = now; +} + +/** Return a newly allocated string containing the buffer statistics until + * <b>now</b>, or NULL if we're not collecting buffer stats. Caller must + * ensure start_of_buffer_stats_interval is in the past. */ +char * +rep_hist_format_buffer_stats(time_t now) +{ +#define SHARES 10 + uint64_t processed_cells[SHARES]; + uint32_t circs_in_share[SHARES]; + int number_of_circuits, i; + double queued_cells[SHARES], time_in_queue[SHARES]; + smartlist_t *processed_cells_strings, *queued_cells_strings, + *time_in_queue_strings; + char *processed_cells_string, *queued_cells_string, + *time_in_queue_string; + char t[ISO_TIME_LEN+1]; + char *result; + + if (!start_of_buffer_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_buffer_stats_interval); + + /* Calculate deciles if we saw at least one circuit. */ + memset(processed_cells, 0, SHARES * sizeof(uint64_t)); + memset(circs_in_share, 0, SHARES * sizeof(uint32_t)); + memset(queued_cells, 0, SHARES * sizeof(double)); + memset(time_in_queue, 0, SHARES * sizeof(double)); + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + number_of_circuits = smartlist_len(circuits_for_buffer_stats); + if (number_of_circuits > 0) { + smartlist_sort(circuits_for_buffer_stats, + buffer_stats_compare_entries_); + i = 0; + SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats, + circ_buffer_stats_t *, stats) + { + int share = i++ * SHARES / number_of_circuits; + processed_cells[share] += stats->processed_cells; + queued_cells[share] += stats->mean_num_cells_in_queue; + time_in_queue[share] += stats->mean_time_cells_in_queue; + circs_in_share[share]++; + } + SMARTLIST_FOREACH_END(stats); + } + + /* Write deciles to strings. */ + processed_cells_strings = smartlist_new(); + queued_cells_strings = smartlist_new(); + time_in_queue_strings = smartlist_new(); + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(processed_cells_strings, + "%"PRIu64, !circs_in_share[i] ? 0 : + (processed_cells[i] / + circs_in_share[i])); + } + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(queued_cells_strings, "%.2f", + circs_in_share[i] == 0 ? 0.0 : + queued_cells[i] / (double) circs_in_share[i]); + } + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(time_in_queue_strings, "%.0f", + circs_in_share[i] == 0 ? 0.0 : + time_in_queue[i] / (double) circs_in_share[i]); + } + + /* Join all observations in single strings. */ + processed_cells_string = smartlist_join_strings(processed_cells_strings, + ",", 0, NULL); + queued_cells_string = smartlist_join_strings(queued_cells_strings, + ",", 0, NULL); + time_in_queue_string = smartlist_join_strings(time_in_queue_strings, + ",", 0, NULL); + SMARTLIST_FOREACH(processed_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(queued_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(time_in_queue_strings, char *, cp, tor_free(cp)); + smartlist_free(processed_cells_strings); + smartlist_free(queued_cells_strings); + smartlist_free(time_in_queue_strings); + + /* Put everything together. */ + format_iso_time(t, now); + tor_asprintf(&result, "cell-stats-end %s (%d s)\n" + "cell-processed-cells %s\n" + "cell-queued-cells %s\n" + "cell-time-in-queue %s\n" + "cell-circuits-per-decile %d\n", + t, (unsigned) (now - start_of_buffer_stats_interval), + processed_cells_string, + queued_cells_string, + time_in_queue_string, + CEIL_DIV(number_of_circuits, SHARES)); + tor_free(processed_cells_string); + tor_free(queued_cells_string); + tor_free(time_in_queue_string); + return result; +#undef SHARES +} + +/** If 24 hours have passed since the beginning of the current buffer + * stats period, write buffer stats to $DATADIR/stats/buffer-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write buffer stats or 0 if we never want to + * write. */ +time_t +rep_hist_buffer_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_buffer_stats_interval) + return 0; /* Not initialized. */ + if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Add open circuits to the history. */ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + rep_hist_buffer_stats_add_circ(circ, now); + } + SMARTLIST_FOREACH_END(circ); + + /* Generate history string. */ + str = rep_hist_format_buffer_stats(now); + + /* Reset both buffer history and counters of open circuits. */ + rep_hist_reset_buffer_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "buffer-stats", str, "buffer statistics"); + } + + done: + tor_free(str); + return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL; +} + +/*** Descriptor serving statistics ***/ + +/** Digestmap to track which descriptors were downloaded this stats + * collection interval. It maps descriptor digest to pointers to 1, + * effectively turning this into a list. */ +static digestmap_t *served_descs = NULL; + +/** Number of how many descriptors were downloaded in total during this + * interval. */ +static unsigned long total_descriptor_downloads; + +/** Start time of served descs stats or 0 if we're not collecting those. */ +static time_t start_of_served_descs_stats_interval; + +/** Initialize descriptor stats. */ +void +rep_hist_desc_stats_init(time_t now) +{ + if (served_descs) { + log_warn(LD_BUG, "Called rep_hist_desc_stats_init() when desc stats were " + "already initialized. This is probably harmless."); + return; // Already initialized + } + served_descs = digestmap_new(); + total_descriptor_downloads = 0; + start_of_served_descs_stats_interval = now; +} + +/** Reset served descs stats to empty, starting a new interval <b>now</b>. */ +static void +rep_hist_reset_desc_stats(time_t now) +{ + rep_hist_desc_stats_term(); + rep_hist_desc_stats_init(now); +} + +/** Stop collecting served descs stats, so that rep_hist_desc_stats_init() is + * safe to be called again. */ +void +rep_hist_desc_stats_term(void) +{ + digestmap_free(served_descs, NULL); + served_descs = NULL; + start_of_served_descs_stats_interval = 0; + total_descriptor_downloads = 0; +} + +/** Helper for rep_hist_desc_stats_write(). Return a newly allocated string + * containing the served desc statistics until now, or NULL if we're not + * collecting served desc stats. Caller must ensure that now is not before + * start_of_served_descs_stats_interval. */ +static char * +rep_hist_format_desc_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *result; + + digestmap_iter_t *iter; + const char *key; + void *val; + unsigned size; + int *vals, max = 0, q3 = 0, md = 0, q1 = 0, min = 0; + int n = 0; + + if (!start_of_served_descs_stats_interval) + return NULL; + + size = digestmap_size(served_descs); + if (size > 0) { + vals = tor_calloc(size, sizeof(int)); + for (iter = digestmap_iter_init(served_descs); + !digestmap_iter_done(iter); + iter = digestmap_iter_next(served_descs, iter)) { + uintptr_t count; + digestmap_iter_get(iter, &key, &val); + count = (uintptr_t)val; + vals[n++] = (int)count; + (void)key; + } + max = find_nth_int(vals, size, size-1); + q3 = find_nth_int(vals, size, (3*size-1)/4); + md = find_nth_int(vals, size, (size-1)/2); + q1 = find_nth_int(vals, size, (size-1)/4); + min = find_nth_int(vals, size, 0); + tor_free(vals); + } + + format_iso_time(t, now); + + tor_asprintf(&result, + "served-descs-stats-end %s (%d s) total=%lu unique=%u " + "max=%d q3=%d md=%d q1=%d min=%d\n", + t, + (unsigned) (now - start_of_served_descs_stats_interval), + total_descriptor_downloads, + size, max, q3, md, q1, min); + + return result; +} + +/** If WRITE_STATS_INTERVAL seconds have passed since the beginning of + * the current served desc stats interval, write the stats to + * $DATADIR/stats/served-desc-stats (possibly appending to an existing file) + * and reset the state for the next interval. Return when we would next want + * to write served desc stats or 0 if we won't want to write. */ +time_t +rep_hist_desc_stats_write(time_t now) +{ + char *filename = NULL, *str = NULL; + + if (!start_of_served_descs_stats_interval) + return 0; /* We're not collecting stats. */ + if (start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL > now) + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; + + str = rep_hist_format_desc_stats(now); + tor_assert(str != NULL); + + if (check_or_create_data_subdir("stats") < 0) { + goto done; + } + filename = get_datadir_fname2("stats", "served-desc-stats"); + if (append_bytes_to_file(filename, str, strlen(str), 0) < 0) + log_warn(LD_HIST, "Unable to write served descs statistics to disk!"); + + rep_hist_reset_desc_stats(now); + + done: + tor_free(filename); + tor_free(str); + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Called to note that we've served a given descriptor (by + * digest). Increments the count of descriptors served, and the number + * of times we've served this descriptor. */ +void +rep_hist_note_desc_served(const char * desc) +{ + void *val; + uintptr_t count; + if (!served_descs) + return; // We're not collecting stats + val = digestmap_get(served_descs, desc); + count = (uintptr_t)val; + if (count != INT_MAX) + ++count; + digestmap_set(served_descs, desc, (void*)count); + total_descriptor_downloads++; +} + +/*** Connection statistics ***/ + +/** Start of the current connection stats interval or 0 if we're not + * collecting connection statistics. */ +static time_t start_of_conn_stats_interval; + +/** Initialize connection stats. */ +void +rep_hist_conn_stats_init(time_t now) +{ + start_of_conn_stats_interval = now; +} + +/* Count connections that we read and wrote less than these many bytes + * from/to as below threshold. */ +#define BIDI_THRESHOLD 20480 + +/* Count connections that we read or wrote at least this factor as many + * bytes from/to than we wrote or read to/from as mostly reading or + * writing. */ +#define BIDI_FACTOR 10 + +/* Interval length in seconds for considering read and written bytes for + * connection stats. */ +#define BIDI_INTERVAL 10 + +/** Start of next BIDI_INTERVAL second interval. */ +static time_t bidi_next_interval = 0; + +/** Number of connections that we read and wrote less than BIDI_THRESHOLD + * bytes from/to in BIDI_INTERVAL seconds. */ +static uint32_t below_threshold = 0; + +/** Number of connections that we read at least BIDI_FACTOR times more + * bytes from than we wrote to in BIDI_INTERVAL seconds. */ +static uint32_t mostly_read = 0; + +/** Number of connections that we wrote at least BIDI_FACTOR times more + * bytes to than we read from in BIDI_INTERVAL seconds. */ +static uint32_t mostly_written = 0; + +/** Number of connections that we read and wrote at least BIDI_THRESHOLD + * bytes from/to, but not BIDI_FACTOR times more in either direction in + * BIDI_INTERVAL seconds. */ +static uint32_t both_read_and_written = 0; + +/** Entry in a map from connection ID to the number of read and written + * bytes on this connection in a BIDI_INTERVAL second interval. */ +typedef struct bidi_map_entry_t { + HT_ENTRY(bidi_map_entry_t) node; + uint64_t conn_id; /**< Connection ID */ + size_t read; /**< Number of read bytes */ + size_t written; /**< Number of written bytes */ +} bidi_map_entry_t; + +/** Map of OR connections together with the number of read and written + * bytes in the current BIDI_INTERVAL second interval. */ +static HT_HEAD(bidimap, bidi_map_entry_t) bidi_map = + HT_INITIALIZER(); + +static int +bidi_map_ent_eq(const bidi_map_entry_t *a, const bidi_map_entry_t *b) +{ + return a->conn_id == b->conn_id; +} + +/* DOCDOC bidi_map_ent_hash */ +static unsigned +bidi_map_ent_hash(const bidi_map_entry_t *entry) +{ + return (unsigned) entry->conn_id; +} + +HT_PROTOTYPE(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq) +HT_GENERATE2(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq, 0.6, tor_reallocarray_, tor_free_) + +/* DOCDOC bidi_map_free */ +static void +bidi_map_free_all(void) +{ + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); + } + HT_CLEAR(bidimap, &bidi_map); +} + +/** Reset counters for conn statistics. */ +void +rep_hist_reset_conn_stats(time_t now) +{ + start_of_conn_stats_interval = now; + below_threshold = 0; + mostly_read = 0; + mostly_written = 0; + both_read_and_written = 0; + bidi_map_free_all(); +} + +/** Stop collecting connection stats in a way that we can re-start doing + * so in rep_hist_conn_stats_init(). */ +void +rep_hist_conn_stats_term(void) +{ + rep_hist_reset_conn_stats(0); +} + +/** We read <b>num_read</b> bytes and wrote <b>num_written</b> from/to OR + * connection <b>conn_id</b> in second <b>when</b>. If this is the first + * observation in a new interval, sum up the last observations. Add bytes + * for this connection. */ +void +rep_hist_note_or_conn_bytes(uint64_t conn_id, size_t num_read, + size_t num_written, time_t when) +{ + if (!start_of_conn_stats_interval) + return; + /* Initialize */ + if (bidi_next_interval == 0) + bidi_next_interval = when + BIDI_INTERVAL; + /* Sum up last period's statistics */ + if (when >= bidi_next_interval) { + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + if (ent->read + ent->written < BIDI_THRESHOLD) + below_threshold++; + else if (ent->read >= ent->written * BIDI_FACTOR) + mostly_read++; + else if (ent->written >= ent->read * BIDI_FACTOR) + mostly_written++; + else + both_read_and_written++; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); + } + while (when >= bidi_next_interval) + bidi_next_interval += BIDI_INTERVAL; + log_info(LD_GENERAL, "%d below threshold, %d mostly read, " + "%d mostly written, %d both read and written.", + below_threshold, mostly_read, mostly_written, + both_read_and_written); + } + /* Add this connection's bytes. */ + if (num_read > 0 || num_written > 0) { + bidi_map_entry_t *entry, lookup; + lookup.conn_id = conn_id; + entry = HT_FIND(bidimap, &bidi_map, &lookup); + if (entry) { + entry->written += num_written; + entry->read += num_read; + } else { + entry = tor_malloc_zero(sizeof(bidi_map_entry_t)); + entry->conn_id = conn_id; + entry->written = num_written; + entry->read = num_read; + HT_INSERT(bidimap, &bidi_map, entry); + } + } +} + +/** Return a newly allocated string containing the connection statistics + * until <b>now</b>, or NULL if we're not collecting conn stats. Caller must + * ensure start_of_conn_stats_interval is in the past. */ +char * +rep_hist_format_conn_stats(time_t now) +{ + char *result, written[ISO_TIME_LEN+1]; + + if (!start_of_conn_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_conn_stats_interval); + + format_iso_time(written, now); + tor_asprintf(&result, "conn-bi-direct %s (%d s) %d,%d,%d,%d\n", + written, + (unsigned) (now - start_of_conn_stats_interval), + below_threshold, + mostly_read, + mostly_written, + both_read_and_written); + return result; +} + +/** If 24 hours have passed since the beginning of the current conn stats + * period, write conn stats to $DATADIR/stats/conn-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write conn stats or 0 if we never want to write. */ +time_t +rep_hist_conn_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_conn_stats_interval) + return 0; /* Not initialized. */ + if (start_of_conn_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Generate history string. */ + str = rep_hist_format_conn_stats(now); + + /* Reset counters. */ + rep_hist_reset_conn_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "conn-stats", str, "connection statistics"); + } + + done: + tor_free(str); + return start_of_conn_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Internal statistics to track how many requests of each type of + * handshake we've received, and how many we've assigned to cpuworkers. + * Useful for seeing trends in cpu load. + * @{ */ +STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; +STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; +/**@}*/ + +/** A new onionskin (using the <b>type</b> handshake) has arrived. */ +void +rep_hist_note_circuit_handshake_requested(uint16_t type) +{ + if (type <= MAX_ONION_HANDSHAKE_TYPE) + onion_handshakes_requested[type]++; +} + +/** We've sent an onionskin (using the <b>type</b> handshake) to a + * cpuworker. */ +void +rep_hist_note_circuit_handshake_assigned(uint16_t type) +{ + if (type <= MAX_ONION_HANDSHAKE_TYPE) + onion_handshakes_assigned[type]++; +} + +/** Log our onionskin statistics since the last time we were called. */ +void +rep_hist_log_circuit_handshake_stats(time_t now) +{ + (void)now; + log_notice(LD_HEARTBEAT, "Circuit handshake stats since last time: " + "%d/%d TAP, %d/%d NTor.", + onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_TAP], + onion_handshakes_requested[ONION_HANDSHAKE_TYPE_TAP], + onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_NTOR], + onion_handshakes_requested[ONION_HANDSHAKE_TYPE_NTOR]); + memset(onion_handshakes_assigned, 0, sizeof(onion_handshakes_assigned)); + memset(onion_handshakes_requested, 0, sizeof(onion_handshakes_requested)); +} + +/* Hidden service statistics section */ + +/** Start of the current hidden service stats interval or 0 if we're + * not collecting hidden service statistics. */ +static time_t start_of_hs_stats_interval; + +/** Carries the various hidden service statistics, and any other + * information needed. */ +typedef struct hs_stats_t { + /** How many relay cells have we seen as rendezvous points? */ + uint64_t rp_relay_cells_seen; + + /** Set of unique public key digests we've seen this stat period + * (could also be implemented as sorted smartlist). */ + digestmap_t *onions_seen_this_period; +} hs_stats_t; + +/** Our statistics structure singleton. */ +static hs_stats_t *hs_stats = NULL; + +/** Allocate, initialize and return an hs_stats_t structure. */ +static hs_stats_t * +hs_stats_new(void) +{ + hs_stats_t *new_hs_stats = tor_malloc_zero(sizeof(hs_stats_t)); + new_hs_stats->onions_seen_this_period = digestmap_new(); + + return new_hs_stats; +} + +#define hs_stats_free(val) \ + FREE_AND_NULL(hs_stats_t, hs_stats_free_, (val)) + +/** Free an hs_stats_t structure. */ +static void +hs_stats_free_(hs_stats_t *victim_hs_stats) +{ + if (!victim_hs_stats) { + return; + } + + digestmap_free(victim_hs_stats->onions_seen_this_period, NULL); + tor_free(victim_hs_stats); +} + +/** Initialize hidden service statistics. */ +void +rep_hist_hs_stats_init(time_t now) +{ + if (!hs_stats) { + hs_stats = hs_stats_new(); + } + + start_of_hs_stats_interval = now; +} + +/** Clear history of hidden service statistics and set the measurement + * interval start to <b>now</b>. */ +static void +rep_hist_reset_hs_stats(time_t now) +{ + if (!hs_stats) { + hs_stats = hs_stats_new(); + } + + hs_stats->rp_relay_cells_seen = 0; + + digestmap_free(hs_stats->onions_seen_this_period, NULL); + hs_stats->onions_seen_this_period = digestmap_new(); + + start_of_hs_stats_interval = now; +} + +/** Stop collecting hidden service stats in a way that we can re-start + * doing so in rep_hist_buffer_stats_init(). */ +void +rep_hist_hs_stats_term(void) +{ + rep_hist_reset_hs_stats(0); +} + +/** We saw a new HS relay cell, Count it! */ +void +rep_hist_seen_new_rp_cell(void) +{ + if (!hs_stats) { + return; // We're not collecting stats + } + + hs_stats->rp_relay_cells_seen++; +} + +/** As HSDirs, we saw another hidden service with public key + * <b>pubkey</b>. Check whether we have counted it before, if not + * count it now! */ +void +rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey) +{ + char pubkey_hash[DIGEST_LEN]; + + if (!hs_stats) { + return; // We're not collecting stats + } + + /* Get the digest of the pubkey which will be used to detect whether + we've seen this hidden service before or not. */ + if (crypto_pk_get_digest(pubkey, pubkey_hash) < 0) { + /* This fail should not happen; key has been validated by + descriptor parsing code first. */ + return; + } + + /* Check if this is the first time we've seen this hidden + service. If it is, count it as new. */ + if (!digestmap_get(hs_stats->onions_seen_this_period, + pubkey_hash)) { + digestmap_set(hs_stats->onions_seen_this_period, + pubkey_hash, (void*)(uintptr_t)1); + } +} + +/* The number of cells that are supposed to be hidden from the adversary + * by adding noise from the Laplace distribution. This value, divided by + * EPSILON, is Laplace parameter b. It must be greather than 0. */ +#define REND_CELLS_DELTA_F 2048 +/* Security parameter for obfuscating number of cells with a value between + * ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same + * time make statistics less usable. */ +#define REND_CELLS_EPSILON 0.3 +/* The number of cells that are supposed to be hidden from the adversary + * by rounding up to the next multiple of this number. */ +#define REND_CELLS_BIN_SIZE 1024 +/* The number of service identities that are supposed to be hidden from the + * adversary by adding noise from the Laplace distribution. This value, + * divided by EPSILON, is Laplace parameter b. It must be greater than 0. */ +#define ONIONS_SEEN_DELTA_F 8 +/* Security parameter for obfuscating number of service identities with a + * value between ]0.0, 1.0]. Smaller values obfuscate observations more, but + * at the same time make statistics less usable. */ +#define ONIONS_SEEN_EPSILON 0.3 +/* The number of service identities that are supposed to be hidden from + * the adversary by rounding up to the next multiple of this number. */ +#define ONIONS_SEEN_BIN_SIZE 8 + +/** Allocate and return a string containing hidden service stats that + * are meant to be placed in the extra-info descriptor. */ +static char * +rep_hist_format_hs_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *hs_stats_string; + int64_t obfuscated_cells_seen; + int64_t obfuscated_onions_seen; + + uint64_t rounded_cells_seen + = round_uint64_to_next_multiple_of(hs_stats->rp_relay_cells_seen, + REND_CELLS_BIN_SIZE); + rounded_cells_seen = MIN(rounded_cells_seen, INT64_MAX); + obfuscated_cells_seen = add_laplace_noise((int64_t)rounded_cells_seen, + crypto_rand_double(), + REND_CELLS_DELTA_F, REND_CELLS_EPSILON); + + uint64_t rounded_onions_seen = + round_uint64_to_next_multiple_of((size_t)digestmap_size( + hs_stats->onions_seen_this_period), + ONIONS_SEEN_BIN_SIZE); + rounded_onions_seen = MIN(rounded_onions_seen, INT64_MAX); + obfuscated_onions_seen = add_laplace_noise((int64_t)rounded_onions_seen, + crypto_rand_double(), ONIONS_SEEN_DELTA_F, + ONIONS_SEEN_EPSILON); + + format_iso_time(t, now); + tor_asprintf(&hs_stats_string, "hidserv-stats-end %s (%d s)\n" + "hidserv-rend-relayed-cells %"PRId64" delta_f=%d " + "epsilon=%.2f bin_size=%d\n" + "hidserv-dir-onions-seen %"PRId64" delta_f=%d " + "epsilon=%.2f bin_size=%d\n", + t, (unsigned) (now - start_of_hs_stats_interval), + (obfuscated_cells_seen), REND_CELLS_DELTA_F, + REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE, + (obfuscated_onions_seen), + ONIONS_SEEN_DELTA_F, + ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE); + + return hs_stats_string; +} + +/** If 24 hours have passed since the beginning of the current HS + * stats period, write buffer stats to $DATADIR/stats/hidserv-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write buffer stats or 0 if we never want to + * write. */ +time_t +rep_hist_hs_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_hs_stats_interval) { + return 0; /* Not initialized. */ + } + + if (start_of_hs_stats_interval + WRITE_STATS_INTERVAL > now) { + goto done; /* Not ready to write */ + } + + /* Generate history string. */ + str = rep_hist_format_hs_stats(now); + + /* Reset HS history. */ + rep_hist_reset_hs_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "hidserv-stats", str, + "hidden service stats"); + } + + done: + tor_free(str); + return start_of_hs_stats_interval + WRITE_STATS_INTERVAL; +} + +static uint64_t link_proto_count[MAX_LINK_PROTO+1][2]; + +/** Note that we negotiated link protocol version <b>link_proto</b>, on + * a connection that started here iff <b>started_here</b> is true. + */ +void +rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here) +{ + started_here = !!started_here; /* force to 0 or 1 */ + if (link_proto > MAX_LINK_PROTO) { + log_warn(LD_BUG, "Can't log link protocol %u", link_proto); + return; + } + + link_proto_count[link_proto][started_here]++; +} + +/** + * Update the maximum count of total pending channel padding timers + * in this period. + */ +void +rep_hist_padding_count_timers(uint64_t num_timers) +{ + if (num_timers > padding_current.maximum_chanpad_timers) { + padding_current.maximum_chanpad_timers = num_timers; + } +} + +/** + * Count a cell that we sent for padding overhead statistics. + * + * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be + * counted for PADDING_TYPE_TOTAL. + */ +void +rep_hist_padding_count_write(padding_type_t type) +{ + switch (type) { + case PADDING_TYPE_DROP: + padding_current.write_drop_cell_count++; + break; + case PADDING_TYPE_CELL: + padding_current.write_pad_cell_count++; + break; + case PADDING_TYPE_TOTAL: + padding_current.write_cell_count++; + break; + case PADDING_TYPE_ENABLED_TOTAL: + padding_current.enabled_write_cell_count++; + break; + case PADDING_TYPE_ENABLED_CELL: + padding_current.enabled_write_pad_cell_count++; + break; + } +} + +/** + * Count a cell that we've received for padding overhead statistics. + * + * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be + * counted for PADDING_TYPE_TOTAL. + */ +void +rep_hist_padding_count_read(padding_type_t type) +{ + switch (type) { + case PADDING_TYPE_DROP: + padding_current.read_drop_cell_count++; + break; + case PADDING_TYPE_CELL: + padding_current.read_pad_cell_count++; + break; + case PADDING_TYPE_TOTAL: + padding_current.read_cell_count++; + break; + case PADDING_TYPE_ENABLED_TOTAL: + padding_current.enabled_read_cell_count++; + break; + case PADDING_TYPE_ENABLED_CELL: + padding_current.enabled_read_pad_cell_count++; + break; + } +} + +/** + * Reset our current padding statistics. Called once every 24 hours. + */ +void +rep_hist_reset_padding_counts(void) +{ + memset(&padding_current, 0, sizeof(padding_current)); +} + +/** + * Copy our current cell counts into a structure for listing in our + * extra-info descriptor. Also perform appropriate rounding and redaction. + * + * This function is called once every 24 hours. + */ +#define MIN_CELL_COUNTS_TO_PUBLISH 1 +#define ROUND_CELL_COUNTS_TO 10000 +void +rep_hist_prep_published_padding_counts(time_t now) +{ + memcpy(&padding_published, &padding_current, sizeof(padding_published)); + + if (padding_published.read_cell_count < MIN_CELL_COUNTS_TO_PUBLISH || + padding_published.write_cell_count < MIN_CELL_COUNTS_TO_PUBLISH) { + memset(&padding_published, 0, sizeof(padding_published)); + return; + } + + format_iso_time(padding_published.first_published_at, now); +#define ROUND_AND_SET_COUNT(x) (x) = round_uint64_to_next_multiple_of((x), \ + ROUND_CELL_COUNTS_TO) + ROUND_AND_SET_COUNT(padding_published.read_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.read_drop_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_drop_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_cell_count); + ROUND_AND_SET_COUNT(padding_published.read_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_read_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_read_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_write_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_write_pad_cell_count); +#undef ROUND_AND_SET_COUNT +} + +/** + * Returns an allocated string for extra-info documents for publishing + * padding statistics from the last 24 hour interval. + */ +char * +rep_hist_get_padding_count_lines(void) +{ + char *result = NULL; + + if (!padding_published.read_cell_count || + !padding_published.write_cell_count) { + return NULL; + } + + tor_asprintf(&result, "padding-counts %s (%d s)" + " bin-size=%"PRIu64 + " write-drop=%"PRIu64 + " write-pad=%"PRIu64 + " write-total=%"PRIu64 + " read-drop=%"PRIu64 + " read-pad=%"PRIu64 + " read-total=%"PRIu64 + " enabled-read-pad=%"PRIu64 + " enabled-read-total=%"PRIu64 + " enabled-write-pad=%"PRIu64 + " enabled-write-total=%"PRIu64 + " max-chanpad-timers=%"PRIu64 + "\n", + padding_published.first_published_at, + REPHIST_CELL_PADDING_COUNTS_INTERVAL, + (uint64_t)ROUND_CELL_COUNTS_TO, + (padding_published.write_drop_cell_count), + (padding_published.write_pad_cell_count), + (padding_published.write_cell_count), + (padding_published.read_drop_cell_count), + (padding_published.read_pad_cell_count), + (padding_published.read_cell_count), + (padding_published.enabled_read_pad_cell_count), + (padding_published.enabled_read_cell_count), + (padding_published.enabled_write_pad_cell_count), + (padding_published.enabled_write_cell_count), + (padding_published.maximum_chanpad_timers) + ); + + return result; +} + +/** Log a heartbeat message explaining how many connections of each link + * protocol version we have used. + */ +void +rep_hist_log_link_protocol_counts(void) +{ + log_notice(LD_HEARTBEAT, + "Since startup, we have initiated " + "%"PRIu64" v1 connections, " + "%"PRIu64" v2 connections, " + "%"PRIu64" v3 connections, and " + "%"PRIu64" v4 connections; and received " + "%"PRIu64" v1 connections, " + "%"PRIu64" v2 connections, " + "%"PRIu64" v3 connections, and " + "%"PRIu64" v4 connections.", + (link_proto_count[1][1]), + (link_proto_count[2][1]), + (link_proto_count[3][1]), + (link_proto_count[4][1]), + (link_proto_count[1][0]), + (link_proto_count[2][0]), + (link_proto_count[3][0]), + (link_proto_count[4][0])); +} + +/** Free all storage held by the OR/link history caches, by the + * bandwidth history arrays, by the port history, or by statistics . */ +void +rep_hist_free_all(void) +{ + hs_stats_free(hs_stats); + digestmap_free(history_map, free_or_history); + + bw_array_free(read_array); + read_array = NULL; + + bw_array_free(write_array); + write_array = NULL; + + bw_array_free(dir_read_array); + dir_read_array = NULL; + + bw_array_free(dir_write_array); + dir_write_array = NULL; + + tor_free(exit_bytes_read); + tor_free(exit_bytes_written); + tor_free(exit_streams); + predicted_ports_free_all(); + bidi_map_free_all(); + + if (circuits_for_buffer_stats) { + SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, s, + tor_free(s)); + smartlist_free(circuits_for_buffer_stats); + circuits_for_buffer_stats = NULL; + } + rep_hist_desc_stats_term(); + total_descriptor_downloads = 0; + + tor_assert_nonfatal(rephist_total_alloc == 0); + tor_assert_nonfatal_once(rephist_total_num == 0); +} diff --cc src/feature/stats/rephist.h index 67a015a4c,000000000..e17a72248 mode 100644,000000..100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@@ -1,140 -1,0 +1,142 @@@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file rephist.h + * \brief Header file for rephist.c. + **/ + +#ifndef TOR_REPHIST_H +#define TOR_REPHIST_H + +void rep_hist_init(void); +void rep_hist_dump_stats(time_t now, int severity); - void rep_hist_note_bytes_read(size_t num_bytes, time_t when); - void rep_hist_note_bytes_written(size_t num_bytes, time_t when); ++void rep_hist_note_bytes_read(uint64_t num_bytes, time_t when); ++void rep_hist_note_bytes_written(uint64_t num_bytes, time_t when); + +void rep_hist_make_router_pessimal(const char *id, time_t when); + - void rep_hist_note_dir_bytes_read(size_t num_bytes, time_t when); - void rep_hist_note_dir_bytes_written(size_t num_bytes, time_t when); ++void rep_hist_note_dir_bytes_read(uint64_t num_bytes, time_t when); ++void rep_hist_note_dir_bytes_written(uint64_t num_bytes, time_t when); + +MOCK_DECL(int, rep_hist_bandwidth_assess, (void)); +char *rep_hist_get_bandwidth_lines(void); +void rep_hist_update_state(or_state_t *state); +int rep_hist_load_state(or_state_t *state, char **err); +void rep_history_clean(time_t before); + +void rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + const uint16_t at_port, time_t when); +void rep_hist_note_router_unreachable(const char *id, time_t when); +int rep_hist_record_mtbf_data(time_t now, int missing_means_down); +int rep_hist_load_mtbf_data(time_t now); + +time_t rep_hist_downrate_old_runs(time_t now); +long rep_hist_get_uptime(const char *id, time_t when); +double rep_hist_get_stability(const char *id, time_t when); +double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when); +long rep_hist_get_weighted_time_known(const char *id, time_t when); +int rep_hist_have_measured_enough_stability(void); + +void predicted_ports_init(void); +void rep_hist_note_used_port(time_t now, uint16_t port); +smartlist_t *rep_hist_get_predicted_ports(time_t now); +void rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports); +void rep_hist_note_used_resolve(time_t now); +void rep_hist_note_used_internal(time_t now, int need_uptime, + int need_capacity); +int rep_hist_get_predicted_internal(time_t now, int *need_uptime, + int *need_capacity); + +int any_predicted_circuits(time_t now); +int rep_hist_circbuilding_dormant(time_t now); +int predicted_ports_prediction_time_remaining(time_t now); + +void rep_hist_exit_stats_init(time_t now); +void rep_hist_reset_exit_stats(time_t now); +void rep_hist_exit_stats_term(void); +char *rep_hist_format_exit_stats(time_t now); +time_t rep_hist_exit_stats_write(time_t now); +void rep_hist_note_exit_bytes(uint16_t port, size_t num_written, + size_t num_read); +void rep_hist_note_exit_stream_opened(uint16_t port); + +void rep_hist_buffer_stats_init(time_t now); +void rep_hist_buffer_stats_add_circ(circuit_t *circ, + time_t end_of_interval); +time_t rep_hist_buffer_stats_write(time_t now); +void rep_hist_buffer_stats_term(void); +void rep_hist_add_buffer_stats(double mean_num_cells_in_queue, + double mean_time_cells_in_queue, uint32_t processed_cells); +char *rep_hist_format_buffer_stats(time_t now); +void rep_hist_reset_buffer_stats(time_t now); + +void rep_hist_desc_stats_init(time_t now); +void rep_hist_note_desc_served(const char * desc); +void rep_hist_desc_stats_term(void); +time_t rep_hist_desc_stats_write(time_t now); + +void rep_hist_conn_stats_init(time_t now); +void rep_hist_note_or_conn_bytes(uint64_t conn_id, size_t num_read, + size_t num_written, time_t when); +void rep_hist_reset_conn_stats(time_t now); +char *rep_hist_format_conn_stats(time_t now); +time_t rep_hist_conn_stats_write(time_t now); +void rep_hist_conn_stats_term(void); + +void rep_hist_note_circuit_handshake_requested(uint16_t type); +void rep_hist_note_circuit_handshake_assigned(uint16_t type); +void rep_hist_log_circuit_handshake_stats(time_t now); + +void rep_hist_hs_stats_init(time_t now); +void rep_hist_hs_stats_term(void); +time_t rep_hist_hs_stats_write(time_t now); +char *rep_hist_get_hs_stats_string(void); +void rep_hist_seen_new_rp_cell(void); +void rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey); + +void rep_hist_free_all(void); + +void rep_hist_note_negotiated_link_proto(unsigned link_proto, + int started_here); +void rep_hist_log_link_protocol_counts(void); + +extern uint64_t rephist_total_alloc; +extern uint32_t rephist_total_num; +#ifdef TOR_UNIT_TESTS +extern int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1]; +extern int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1]; ++typedef struct bw_array_t bw_array_t; ++extern bw_array_t *write_array; +#endif + +/** + * Represents the type of a cell for padding accounting + */ +typedef enum padding_type_t { + /** A RELAY_DROP cell */ + PADDING_TYPE_DROP, + /** A CELL_PADDING cell */ + PADDING_TYPE_CELL, + /** Total counts of padding and non-padding together */ + PADDING_TYPE_TOTAL, + /** Total cell counts for all padding-enabled channels */ + PADDING_TYPE_ENABLED_TOTAL, + /** CELL_PADDING counts for all padding-enabled channels */ + PADDING_TYPE_ENABLED_CELL +} padding_type_t; + +/** The amount of time over which the padding cell counts were counted */ +#define REPHIST_CELL_PADDING_COUNTS_INTERVAL (24*60*60) +void rep_hist_padding_count_read(padding_type_t type); +void rep_hist_padding_count_write(padding_type_t type); +char *rep_hist_get_padding_count_lines(void); +void rep_hist_reset_padding_counts(void); +void rep_hist_prep_published_padding_counts(time_t now); +void rep_hist_padding_count_timers(uint64_t num_timers); + +#endif /* !defined(TOR_REPHIST_H) */ + diff --cc src/test/test_relay.c index 25084fab3,ffb013442..65b9a2f94 --- a/src/test/test_relay.c +++ b/src/test/test_relay.c @@@ -1,21 -1,20 +1,23 @@@ -/* Copyright (c) 2014-2017, The Tor Project, Inc. */ +/* Copyright (c) 2014-2018, The Tor Project, Inc. */ /* See LICENSE for licensing information */
-#include "or.h" +#include "core/or/or.h" #define CIRCUITBUILD_PRIVATE -#include "circuitbuild.h" -#include "circuitlist.h" -#include "rephist.h" -#include "channeltls.h" +#include "core/or/circuitbuild.h" +#include "core/or/circuitlist.h" ++#include "core/or/channeltls.h" ++#include "feature/stats/rephist.h" #define RELAY_PRIVATE -#include "relay.h" +#include "core/or/relay.h" /* For init/free stuff */ -#include "scheduler.h" +#include "core/or/scheduler.h" + +#include "core/or/cell_st.h" +#include "core/or/or_circuit_st.h"
/* Test suite stuff */ -#include "test.h" -#include "fakechans.h" +#include "test/test.h" +#include "test/fakechans.h"
static or_circuit_t * new_fake_orcirc(channel_t *nchan, channel_t *pchan);
@@@ -129,5 -225,8 +228,7 @@@ test_relay_append_cell_to_circuit_queue struct testcase_t relay_tests[] = { { "append_cell_to_circuit_queue", test_relay_append_cell_to_circuit_queue, TT_FORK, NULL, NULL }, + { "close_circ_rephist", test_relay_close_circuit, + TT_FORK, NULL, NULL }, END_OF_TESTCASES }; -
tor-commits@lists.torproject.org