commit 7ca86b9cd65ba4c7ed3c36f0c56c97ad4b075585 Author: Nick Mathewson nickm@torproject.org Date: Sat Apr 15 13:23:10 2017 -0400
Add a hashtable to consdiffmgr to keep track of diff status
In several places in the old code, we had problems that only an in-memory index of diff status could solve, including: * Remembering which diffs were in-progress, so that we didn't re-launch them. * Remembering which diffs had failed, so that we didn't try to recompute them over and over. * Having a fast way to look up the diff from a given consensus to the latest consensus of a given flavor.
This patch adds a hashtable mapping from (flavor, source diff), to solve the problem. It maps to a cache entry handle, rather than to a cache entry directly, so that it doesn't affect the reference counts of the cache entries, and so that we don't otherwise need to worry about lifetime management. --- src/or/consdiffmgr.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 307 insertions(+), 10 deletions(-)
diff --git a/src/or/consdiffmgr.c b/src/or/consdiffmgr.c index 16778b7..240fe6e 100644 --- a/src/or/consdiffmgr.c +++ b/src/or/consdiffmgr.c @@ -59,6 +59,44 @@ static consensus_cache_t *cons_diff_cache = NULL; */ static int cdm_cache_dirty = 0; /** + * If true, we have scanned the cache to update our hashtable of diffs. + */ +static int cdm_cache_loaded = 0; + +/** + * Possible status values for cdm_diff_t.cdm_diff_status + **/ +typedef enum cdm_diff_status_t { + CDM_DIFF_PRESENT=1, + CDM_DIFF_IN_PROGRESS=2, + CDM_DIFF_ERROR=3, +} cdm_diff_status_t; + +/** Hashtable node used to remember the current status of the diff + * from a given sha3 digest to the current consensus. */ +typedef struct cdm_diff_t { + HT_ENTRY(cdm_diff_t) node; + + /** Consensus flavor for this diff (part of ht key) */ + consensus_flavor_t flavor; + /** SHA3-256 digest of the consensus that this diff is _from_. (part of the + * ht key) */ + uint8_t from_sha3[DIGEST256_LEN]; + + /** One of the CDM_DIFF_* values, depending on whether this diff + * is available, in progress, or impossible to compute. */ + cdm_diff_status_t cdm_diff_status; + /** SHA3-256 digest of the consensus that this diff is _to. */ + uint8_t target_sha3[DIGEST256_LEN]; + /** Handle to the cache entry for this diff, if any. We use a handle here + * to avoid thinking too hard about cache entry lifetime issues. */ + consensus_cache_entry_handle_t *entry; +} cdm_diff_t; + +/** Hashtable mapping flavor and source consensus digest to status. */ +static HT_HEAD(cdm_diff_ht, cdm_diff_t) cdm_diff_ht = HT_INITIALIZER(); + +/** * Configuration for this module */ static consdiff_cfg_t consdiff_cfg = { @@ -69,6 +107,160 @@ static consdiff_cfg_t consdiff_cfg = { static int consensus_diff_queue_diff_work(consensus_cache_entry_t *diff_from, consensus_cache_entry_t *diff_to); static void consdiffmgr_set_cache_flags(void); + +/* ===== + * Hashtable setup + * ===== */ + +/** Helper: hash the key of a cdm_diff_t. */ +static unsigned +cdm_diff_hash(const cdm_diff_t *diff) +{ + uint8_t tmp[DIGEST256_LEN + 1]; + memcpy(tmp, diff->from_sha3, DIGEST256_LEN); + tmp[DIGEST256_LEN] = (uint8_t) diff->flavor; + return (unsigned) siphash24g(tmp, sizeof(tmp)); +} +/** Helper: compare two cdm_diff_t objects for key equality */ +static int +cdm_diff_eq(const cdm_diff_t *diff1, const cdm_diff_t *diff2) +{ + return fast_memeq(diff1->from_sha3, diff2->from_sha3, DIGEST256_LEN) && + diff1->flavor == diff2->flavor; +} + +HT_PROTOTYPE(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq) +HT_GENERATE2(cdm_diff_ht, cdm_diff_t, node, cdm_diff_hash, cdm_diff_eq, + 0.6, tor_reallocarray, tor_free_) + +/** Release all storage held in <b>diff</b>. */ +static void +cdm_diff_free(cdm_diff_t *diff) +{ + if (!diff) + return; + consensus_cache_entry_handle_free(diff->entry); + tor_free(diff); +} + +/** Create and return a new cdm_diff_t with the given values. Does not + * add it to the hashtable. */ +static cdm_diff_t * +cdm_diff_new(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3) +{ + cdm_diff_t *ent; + ent = tor_malloc_zero(sizeof(cdm_diff_t)); + ent->flavor = flav; + memcpy(ent->from_sha3, from_sha3, DIGEST256_LEN); + memcpy(ent->target_sha3, target_sha3, DIGEST256_LEN); + return ent; +} + +/** + * Examine the diff hashtable to see whether we know anything about computing + * a diff of type <b>flav</b> between consensuses with the two provided + * SHA3-256 digests. If a computation is in progress, or if the computation + * has already been tried and failed, return 1. Otherwise, note the + * computation as "in progress" so that we don't reattempt it later, and + * return 0. + */ +static int +cdm_diff_ht_check_and_note_pending(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *target_sha3) +{ + struct cdm_diff_t search, *ent; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (ent) { + tor_assert_nonfatal(ent->cdm_diff_status != CDM_DIFF_PRESENT); + return 1; + } + ent = cdm_diff_new(flav, from_sha3, target_sha3); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + return 0; +} + +/** + * Update the status of the diff of type <b>flav</b> between consensuses with + * the two provided SHA3-256 digests, so that its status becomes + * <b>status</b>, and its value becomes the <b>handle</b>. If <b>handle</b> + * is NULL, then the old handle (if any) is freed, and replaced with NULL. + */ +static void +cdm_diff_ht_set_status(consensus_flavor_t flav, + const uint8_t *from_sha3, + const uint8_t *to_sha3, + int status, + consensus_cache_entry_handle_t *handle) +{ + struct cdm_diff_t search, *ent; + memset(&search, 0, sizeof(cdm_diff_t)); + search.flavor = flav; + memcpy(search.from_sha3, from_sha3, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + if (!ent) { + ent = cdm_diff_new(flav, from_sha3, to_sha3); + ent->cdm_diff_status = CDM_DIFF_IN_PROGRESS; + HT_INSERT(cdm_diff_ht, &cdm_diff_ht, ent); + } else if (fast_memneq(ent->target_sha3, to_sha3, DIGEST256_LEN)) { + // This can happen under certain really pathological conditions + // if we decide we don't care about a diff before it is actually + // done computing. + return; + } + + tor_assert_nonfatal(ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS); + + ent->cdm_diff_status = status; + consensus_cache_entry_handle_free(ent->entry); + ent->entry = handle; +} + +/** + * Helper: Remove from the hash table every present (actually computed) diff + * of type <b>flav</b> whose target digest does not match + * <b>unless_target_sha3_matches</b>. + * + * This function is used for the hash table to throw away references to diffs + * that do not lead to the most given consensus of a given flavor. + */ +static void +cdm_diff_ht_purge(consensus_flavor_t flav, + const uint8_t *unless_target_sha3_matches) +{ + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + + if ((*diff)->cdm_diff_status == CDM_DIFF_PRESENT && + flav == (*diff)->flavor) { + + if (consensus_cache_entry_handle_get((*diff)->entry) == NULL) { + /* the underlying entry has gone away; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + + if (unless_target_sha3_matches && + fast_memneq(unless_target_sha3_matches, (*diff)->target_sha3, + DIGEST256_LEN)) { + /* target hash doesn't match; drop this. */ + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + continue; + } + } + next = HT_NEXT(cdm_diff_ht, &cdm_diff_ht, diff); + } +} + /** * Helper: initialize <b>cons_diff_cache</b>. */ @@ -88,6 +280,7 @@ cdm_cache_init(void) consdiffmgr_set_cache_flags(); } cdm_cache_dirty = 1; + cdm_cache_loaded = 0; }
/** @@ -160,7 +353,8 @@ cdm_cache_lookup_consensus(consensus_flavor_t flavor, time_t valid_after)
/* We'll filter by valid-after time first, since that should * match the fewest documents. */ - // XXXX This is stupid and it should be a hash table. + /* We could add an extra hashtable here, but since we only do this scan + * when adding a new consensus, it probably doesn't matter much. */ smartlist_t *matches = smartlist_new(); consensus_cache_find_all(matches, cdm_cache_get(), LABEL_VALID_AFTER, formatted_time); @@ -289,12 +483,33 @@ consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out, const uint8_t *digest, size_t digestlen) { - // XXXX actually return IN_PROGRESS some times? if (BUG(digest_type != DIGEST_SHA3_256) || BUG(digestlen != DIGEST256_LEN)) { return CONSDIFF_NOT_FOUND; // LCOV_EXCL_LINE }
+ // Try to look up the entry in the hashtable. + cdm_diff_t search, *ent; + memset(&search, 0, sizeof(search)); + search.flavor = flavor; + memcpy(search.from_sha3, digest, DIGEST256_LEN); + ent = HT_FIND(cdm_diff_ht, &cdm_diff_ht, &search); + + if (ent == NULL || + ent->cdm_diff_status == CDM_DIFF_ERROR) { + return CONSDIFF_NOT_FOUND; + } else if (ent->cdm_diff_status == CDM_DIFF_IN_PROGRESS) { + return CONSDIFF_IN_PROGRESS; + } else if (BUG(ent->cdm_diff_status != CDM_DIFF_PRESENT)) { + return CONSDIFF_IN_PROGRESS; + } + + *entry_out = consensus_cache_entry_handle_get(ent->entry); + return (*entry_out) ? CONSDIFF_AVAILABLE : CONSDIFF_NOT_FOUND; + +#if 0 + // XXXX Remove this. I'm keeping it around for now in case we need to + // XXXX debug issues in the hashtable. char hex[HEX_DIGEST256_LEN+1]; base16_encode(hex, sizeof(hex), (const char *)digest, digestlen); const char *flavname = networkstatus_get_flavor_name(flavor); @@ -311,6 +526,7 @@ consdiffmgr_find_diff_from(consensus_cache_entry_t **entry_out, smartlist_free(matches);
return result; +#endif }
/** @@ -495,6 +711,10 @@ consdiffmgr_rescan_flavor_(consensus_flavor_t flavor) consensus_cache_entry_get_value(most_recent, LABEL_VALID_AFTER); if (BUG(most_recent_valid_after == NULL)) goto done; //LCOV_EXCL_LINE + uint8_t most_recent_sha3[DIGEST256_LEN]; + if (BUG(cdm_entry_get_sha3_value(most_recent_sha3, most_recent, + LABEL_SHA3_DIGEST) < 0)) + goto done; //LCOV_EXCL_LINE
// 2. Find all the relevant diffs _to_ this consensus. These are ones // that we don't need to compute. @@ -535,13 +755,23 @@ consdiffmgr_rescan_flavor_(consensus_flavor_t flavor) flavname, smartlist_len(compute_diffs_from));
- // 4. Actually launch the requests. + // 4. Update the hashtable; remove entries in this flavor to other + // target consensuses. + cdm_diff_ht_purge(flavor, most_recent_sha3); + + // 5. Actually launch the requests. SMARTLIST_FOREACH_BEGIN(compute_diffs_from, consensus_cache_entry_t *, c) { if (BUG(c == most_recent)) continue; // LCOV_EXCL_LINE
- // XXXX how do we know that we are not already computing this????? - // XXXX DO NOT MERGE UNTIL THAT ISSUE IS SOLVED. + uint8_t this_sha3[DIGEST256_LEN]; + if (BUG(cdm_entry_get_sha3_value(this_sha3, c, LABEL_SHA3_DIGEST)<0)) + continue; // LCOV_EXCL_LINE + if (cdm_diff_ht_check_and_note_pending(flavor, + this_sha3, most_recent_sha3)) { + // This is already pending, or we encountered an error. + continue; + } consensus_diff_queue_diff_work(c, most_recent); } SMARTLIST_FOREACH_END(c);
@@ -553,6 +783,37 @@ consdiffmgr_rescan_flavor_(consensus_flavor_t flavor) }
/** + * Scan the cache for diffs, and add them to the hashtable. + */ +static void +consdiffmgr_diffs_load(void) +{ + smartlist_t *diffs = smartlist_new(); + consensus_cache_find_all(diffs, cdm_cache_get(), + LABEL_DOCTYPE, DOCTYPE_CONSENSUS_DIFF); + SMARTLIST_FOREACH_BEGIN(diffs, consensus_cache_entry_t *, diff) { + const char *lv_flavor = + consensus_cache_entry_get_value(diff, LABEL_FLAVOR); + if (!lv_flavor) + continue; + int flavor = networkstatus_parse_flavor_name(lv_flavor); + if (flavor < 0) + continue; + uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + if (cdm_entry_get_sha3_value(from_sha3, diff, LABEL_FROM_SHA3_DIGEST)<0) + continue; + if (cdm_entry_get_sha3_value(to_sha3, diff, LABEL_TARGET_SHA3_DIGEST)<0) + continue; + + cdm_diff_ht_set_status(flavor, from_sha3, to_sha3, + CDM_DIFF_PRESENT, + consensus_cache_entry_handle_new(diff)); + } SMARTLIST_FOREACH_END(diff); + smartlist_free(diffs); +} + +/** * Build new diffs as needed. */ void @@ -565,6 +826,11 @@ consdiffmgr_rescan(void) // consensuses do not have any entries. consdiffmgr_cleanup();
+ if (cdm_cache_loaded == 0) { + consdiffmgr_diffs_load(); + cdm_cache_loaded = 1; + } + for (int flav = 0; flav < N_CONSENSUS_FLAVORS; ++flav) { consdiffmgr_rescan_flavor_((consensus_flavor_t) flav); } @@ -595,6 +861,12 @@ consdiffmgr_set_cache_flags(void) void consdiffmgr_free_all(void) { + cdm_diff_t **diff, **next; + for (diff = HT_START(cdm_diff_ht, &cdm_diff_ht); diff; diff = next) { + cdm_diff_t *this = *diff; + next = HT_NEXT_RMV(cdm_diff_ht, &cdm_diff_ht, diff); + cdm_diff_free(this); + } consensus_cache_free(cons_diff_cache); cons_diff_cache = NULL; } @@ -751,27 +1023,52 @@ consensus_diff_worker_replyfn(void *work_) consensus_cache_entry_get_value(job->diff_from, LABEL_SHA3_DIGEST); const char *lv_to_digest = consensus_cache_entry_get_value(job->diff_to, LABEL_SHA3_DIGEST); + const char *lv_flavor = + consensus_cache_entry_get_value(job->diff_to, LABEL_FLAVOR); if (BUG(lv_from_digest == NULL)) lv_from_digest = "???"; // LCOV_EXCL_LINE if (BUG(lv_to_digest == NULL)) lv_to_digest = "???"; // LCOV_EXCL_LINE
+ uint8_t from_sha3[DIGEST256_LEN]; + uint8_t to_sha3[DIGEST256_LEN]; + int flav = -1; + int cache = 1; + if (BUG(cdm_entry_get_sha3_value(from_sha3, job->diff_from, + LABEL_SHA3_DIGEST) < 0)) + cache = 0; + if (BUG(cdm_entry_get_sha3_value(to_sha3, job->diff_to, + LABEL_SHA3_DIGEST) < 0)) + cache = 0; + if (BUG(lv_flavor == NULL)) { + cache = 0; + } else if ((flav = networkstatus_parse_flavor_name(lv_flavor)) < 0) { + cache = 0; + } + + int status; + consensus_cache_entry_handle_t *handle = NULL; if (job->body_out && job->bodylen_out && job->labels_out) { /* Success! Store the results */ log_info(LD_DIRSERV, "Adding consensus diff from %s to %s", lv_from_digest, lv_to_digest); - consensus_cache_add(cdm_cache_get(), job->labels_out, - job->body_out, - job->bodylen_out); + consensus_cache_entry_t *ent = + consensus_cache_add(cdm_cache_get(), job->labels_out, + job->body_out, + job->bodylen_out); + status = CDM_DIFF_PRESENT; + handle = consensus_cache_entry_handle_new(ent); } else { /* Failure! Nothing to do but complain */ log_warn(LD_DIRSERV, "Worker was unable to compute consensus diff " "from %s to %s", lv_from_digest, lv_to_digest); - /* XXXX Actually, we should cache this failure and not repeat the - * attempt over and over */ + /* Cache this error so we don't try to compute this one again. */ + status = CDM_DIFF_ERROR; }
+ if (cache) + cdm_diff_ht_set_status(flav, from_sha3, to_sha3, status, handle); consensus_diff_worker_job_free(job); }