commit a5f07a6c2a2b937f58548680f9b87b1e75b3c8f8 Author: Nick Mathewson nickm@torproject.org Date: Wed Oct 26 11:02:57 2016 -0400
More module-level documentation. --- src/or/directory.c | 35 ++++++++++++++++++++++++++++++++--- src/or/networkstatus.c | 30 ++++++++++++++++++++++++++++-- src/or/nodelist.c | 26 ++++++++++++++++++++++++++ src/or/relay.c | 35 +++++++++++++++++++++++++++++++++++ src/or/torcert.c | 17 +++++++++++++++++ 5 files changed, 138 insertions(+), 5 deletions(-)
diff --git a/src/or/directory.c b/src/or/directory.c index a73680a..fdfb339 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -40,9 +40,38 @@
/** * \file directory.c - * \brief Code to send and fetch directories and router - * descriptors via HTTP. Directories use dirserv.c to generate the - * results; clients use routers.c to parse them. + * \brief Code to send and fetch information from directory authorities and + * caches via HTTP. + * + * Directory caches and authorities use dirserv.c to generate the results of a + * query and stream them to the connection; clients use routerparse.c to parse + * them. + * + * Every directory request has a dir_connection_t on the client side and on + * the server side. In most cases, the dir_connection_t object is a linked + * connection, tunneled through an edge_connection_t so that it can be a + * stream on the Tor network. The only non-tunneled connections are those + * that are used to upload material (descriptors and votes) to authorities. + * Among tunneled connections, some use one-hop circuits, and others use + * multi-hop circuits for anonymity. + * + * Directory requests are launched by calling + * directory_initiate_command_rend() or one of its numerous variants. This + * launch the connection, will construct an HTTP request with + * directory_send_command(), send the and wait for a response. The client + * later handles the response with connection_dir_client_reached_eof(), + * which passes the information received to another part of Tor. + * + * On the server side, requests are read in directory_handle_command(), + * which dispatches first on the request type (GET or POST), and then on + * the URL requested. GET requests are processed with a table-based + * dispatcher in url_table[]. The process of handling larger GET requests + * is complicated because we need to avoid allocating a copy of all the + * data to be sent to the client in one huge buffer. Instead, we spool the + * data into the buffer using logic in connection_dirserv_flushed_some() in + * dirserv.c. (TODO: If we extended buf.c to have a zero-copy + * reference-based buffer type, we could remove most of that code, at the + * cost of a bit more reference counting.) **/
/* In-points to directory.c: diff --git a/src/or/networkstatus.c b/src/or/networkstatus.c index f30fe16..85a73c8 100644 --- a/src/or/networkstatus.c +++ b/src/or/networkstatus.c @@ -6,8 +6,34 @@
/** * \file networkstatus.c - * \brief Functions and structures for handling network status documents as a - * client or cache. + * \brief Functions and structures for handling networkstatus documents as a + * client or as a directory cache. + * + * A consensus networkstatus object is created by the directory + * authorities. It authenticates a set of network parameters--most + * importantly, the list of all the relays in the network. This list + * of relays is represented as an array of routerstatus_t objects. + * + * There are currently two flavors of consensus. With the older "NS" + * flavor, each relay is associated with a digest of its router + * descriptor. Tor instances that use this consensus keep the list of + * router descriptors as routerinfo_t objects stored and managed in + * routerlist.c. With the newer "microdesc" flavor, each relay is + * associated with a digest of the microdescriptor that the authorities + * made for it. These are stored and managed in microdesc.c. Information + * about the router is divided between the the networkstatus and the + * microdescriptor according to the general rule that microdescriptors + * should hold information that changes much less frequently than the + * information in the networkstatus. + * + * Modern clients use microdescriptor networkstatuses. Directory caches + * need to keep both kinds of networkstatus document, so they can serve them. + * + * This module manages fetching, holding, storing, updating, and + * validating networkstatus objects. The download-and-validate process + * is slightly complicated by the fact that the keys you need to + * validate a consensus are stored in the authority certificates, which + * you might not have yet when you download the consensus. */
#define NETWORKSTATUS_PRIVATE diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 070e2e9..2802d5b 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -10,6 +10,32 @@ * \brief Structures and functions for tracking what we know about the routers * on the Tor network, and correlating information from networkstatus, * routerinfo, and microdescs. + * + * The key structure here is node_t: that's the canonical way to refer + * to a Tor relay that we might want to build a circuit through. Every + * node_t has either a routerinfo_t, or a routerstatus_t from the current + * networkstatus consensus. If it has a routerstatus_t, it will also + * need to have a microdesc_t before you can use it for circuits. + * + * The nodelist_t is a global singleton that maps identities to node_t + * objects. Access them with the node_get_*() functions. The nodelist_t + * is maintained by calls throughout the codebase + * + * Generally, other code should not have to reach inside a node_t to + * see what information it has. Instead, you should call one of the + * many accessor functions that works on a generic node_t. If there + * isn't one that does what you need, it's better to make such a function, + * and then use it. + * + * For historical reasons, some of the functions that select a node_t + * from the list of all usable node_t objects are in the routerlist.c + * module, since they originally selected a routerinfo_t. (TODO: They + * should move!) + * + * (TODO: Perhaps someday we should abstract the remaining ways of + * talking about a relay to also be node_t instances. Those would be + * routerstatus_t as used for directory requests, and dir_server_t as + * used for authorities and fallback directories.) */
#include "or.h" diff --git a/src/or/relay.c b/src/or/relay.c index 1794215..823e074 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -8,6 +8,41 @@ * \file relay.c * \brief Handle relay cell encryption/decryption, plus packaging and * receiving from circuits, plus queuing on circuits. + * + * This is a core modules that makes Tor work. It's responsible for + * dealing with RELAY cells (the ones that travel more than one hop along a + * circuit), by: + * <ul> + * <li>constructing relays cells, + * <li>encrypting relay cells, + * <li>decrypting relay cells, + * <li>demultiplexing relay cells as they arrive on a connection, + * <li>queueing relay cells for retransmission, + * <li>or handling relay cells that are for us to receive (as an exit or a + * client). + * </ul> + * + * RELAY cells are generated throughout the code at the client or relay side, + * using relay_send_command_from_edge() or one of the functions like + * connection_edge_send_command() that calls it. Of particular interest is + * connection_edge_package_raw_inbuf(), which takes information that has + * arrived on an edge connection socket, and packages it as a RELAY_DATA cell + * -- this is how information is actually sent across the Tor network. The + * cryptography for these functions is handled deep in + * circuit_package_relay_cell(), which either adds a single layer of + * encryption (if we're an exit), or multiple layers (if we're the origin of + * the circuit). After construction and encryption, the RELAY cells are + * passed to append_cell_to_circuit_queue(), which queues them for + * transmission and tells the circuitmux (see circuitmux.c) that the circuit + * is waiting to send something. + * + * Incoming RELAY cells arrive at circuit_receive_relay_cell(), called from + * command.c. There they are decrypted and, if they are for us, are passed to + * connection_edge_process_relay_cell(). If they're not for us, they're + * re-queued for retransmission again with append_cell_to_circuit_queue(). + * + * The connection_edge_process_relay_cell() function handles all the different + * types of relay cells, launching requests or transmitting data as needed. **/
#define RELAY_PRIVATE diff --git a/src/or/torcert.c b/src/or/torcert.c index a6a33c6..37a201e 100644 --- a/src/or/torcert.c +++ b/src/or/torcert.c @@ -6,6 +6,23 @@ * * \brief Implementation for ed25519-signed certificates as used in the Tor * protocol. + * + * This certificate format is designed to be simple and compact; it's + * documented in tor-spec.txt in the torspec.git repository. All of the + * certificates in this format are signed with an Ed25519 key; the + * contents themselves may be another Ed25519 key, a digest of a + * RSA key, or some other material. + * + * In this module there is also support for a crooss-certification of + * Ed25519 identities using (older) RSA1024 identities. + * + * Tor uses other types of certificate too, beyond those described in this + * module. Notably, our use of TLS requires us to touch X.509 certificates, + * even though sensible people would stay away from those. Our X.509 + * certificates are represented with tor_x509_cert_t, and implemented in + * tortls.c. We also have a separate certificate type that authorities + * use to authenticate their RSA signing keys with their RSA identity keys: + * that one is authority_cert_t, and it's mostly handled in routerlist.c. */
#include "crypto.h"