diff --git a/src/or/directory.c b/src/or/directory.c index a73680a9fd..fdfb3391a8 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -40,9 +40,38 @@ /** * \file directory.c - * \brief Code to send and fetch directories and router - * descriptors via HTTP. Directories use dirserv.c to generate the - * results; clients use routers.c to parse them. + * \brief Code to send and fetch information from directory authorities and + * caches via HTTP. + * + * Directory caches and authorities use dirserv.c to generate the results of a + * query and stream them to the connection; clients use routerparse.c to parse + * them. + * + * Every directory request has a dir_connection_t on the client side and on + * the server side. In most cases, the dir_connection_t object is a linked + * connection, tunneled through an edge_connection_t so that it can be a + * stream on the Tor network. The only non-tunneled connections are those + * that are used to upload material (descriptors and votes) to authorities. + * Among tunneled connections, some use one-hop circuits, and others use + * multi-hop circuits for anonymity. + * + * Directory requests are launched by calling + * directory_initiate_command_rend() or one of its numerous variants. This + * launch the connection, will construct an HTTP request with + * directory_send_command(), send the and wait for a response. The client + * later handles the response with connection_dir_client_reached_eof(), + * which passes the information received to another part of Tor. + * + * On the server side, requests are read in directory_handle_command(), + * which dispatches first on the request type (GET or POST), and then on + * the URL requested. GET requests are processed with a table-based + * dispatcher in url_table[]. The process of handling larger GET requests + * is complicated because we need to avoid allocating a copy of all the + * data to be sent to the client in one huge buffer. Instead, we spool the + * data into the buffer using logic in connection_dirserv_flushed_some() in + * dirserv.c. (TODO: If we extended buf.c to have a zero-copy + * reference-based buffer type, we could remove most of that code, at the + * cost of a bit more reference counting.) **/ /* In-points to directory.c: diff --git a/src/or/networkstatus.c b/src/or/networkstatus.c index f30fe1607c..85a73c8e63 100644 --- a/src/or/networkstatus.c +++ b/src/or/networkstatus.c @@ -6,8 +6,34 @@ /** * \file networkstatus.c - * \brief Functions and structures for handling network status documents as a - * client or cache. + * \brief Functions and structures for handling networkstatus documents as a + * client or as a directory cache. + * + * A consensus networkstatus object is created by the directory + * authorities. It authenticates a set of network parameters--most + * importantly, the list of all the relays in the network. This list + * of relays is represented as an array of routerstatus_t objects. + * + * There are currently two flavors of consensus. With the older "NS" + * flavor, each relay is associated with a digest of its router + * descriptor. Tor instances that use this consensus keep the list of + * router descriptors as routerinfo_t objects stored and managed in + * routerlist.c. With the newer "microdesc" flavor, each relay is + * associated with a digest of the microdescriptor that the authorities + * made for it. These are stored and managed in microdesc.c. Information + * about the router is divided between the the networkstatus and the + * microdescriptor according to the general rule that microdescriptors + * should hold information that changes much less frequently than the + * information in the networkstatus. + * + * Modern clients use microdescriptor networkstatuses. Directory caches + * need to keep both kinds of networkstatus document, so they can serve them. + * + * This module manages fetching, holding, storing, updating, and + * validating networkstatus objects. The download-and-validate process + * is slightly complicated by the fact that the keys you need to + * validate a consensus are stored in the authority certificates, which + * you might not have yet when you download the consensus. */ #define NETWORKSTATUS_PRIVATE diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 070e2e9e0d..2802d5b9e0 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -10,6 +10,32 @@ * \brief Structures and functions for tracking what we know about the routers * on the Tor network, and correlating information from networkstatus, * routerinfo, and microdescs. + * + * The key structure here is node_t: that's the canonical way to refer + * to a Tor relay that we might want to build a circuit through. Every + * node_t has either a routerinfo_t, or a routerstatus_t from the current + * networkstatus consensus. If it has a routerstatus_t, it will also + * need to have a microdesc_t before you can use it for circuits. + * + * The nodelist_t is a global singleton that maps identities to node_t + * objects. Access them with the node_get_*() functions. The nodelist_t + * is maintained by calls throughout the codebase + * + * Generally, other code should not have to reach inside a node_t to + * see what information it has. Instead, you should call one of the + * many accessor functions that works on a generic node_t. If there + * isn't one that does what you need, it's better to make such a function, + * and then use it. + * + * For historical reasons, some of the functions that select a node_t + * from the list of all usable node_t objects are in the routerlist.c + * module, since they originally selected a routerinfo_t. (TODO: They + * should move!) + * + * (TODO: Perhaps someday we should abstract the remaining ways of + * talking about a relay to also be node_t instances. Those would be + * routerstatus_t as used for directory requests, and dir_server_t as + * used for authorities and fallback directories.) */ #include "or.h" diff --git a/src/or/relay.c b/src/or/relay.c index 1794215378..823e0743bd 100644 --- a/src/or/relay.c +++ b/src/or/relay.c @@ -8,6 +8,41 @@ * \file relay.c * \brief Handle relay cell encryption/decryption, plus packaging and * receiving from circuits, plus queuing on circuits. + * + * This is a core modules that makes Tor work. It's responsible for + * dealing with RELAY cells (the ones that travel more than one hop along a + * circuit), by: + *