commit 0389d4aa561bec06ad2aab70ea5a989f1f2d02c6 Author: Nick Mathewson nickm@torproject.org Date: Mon Mar 17 14:15:12 2014 -0400
More logs to try to diagnose bug 7164
This time, check in microdesc_cache_clean() to see what could be going wrong with an attempt to clean a microdesc that's held by a node. --- changes/bug7164_diagnose_harder | 6 +++++ src/or/microdesc.c | 52 ++++++++++++++++++++++++++++++++++++++- src/or/nodelist.c | 19 ++++++++++++++ src/or/nodelist.h | 1 + 4 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/changes/bug7164_diagnose_harder b/changes/bug7164_diagnose_harder new file mode 100644 index 0000000..28c36b6 --- /dev/null +++ b/changes/bug7164_diagnose_harder @@ -0,0 +1,6 @@ + o Minor features: + - Try harder to diagnose a possible cause of bug 7164, which causes + intermittent "microdesc_free() called but md was still referenced" + warnings. We now log more information about the likely error case, + to try to figure out why we might be cleaning a microdescriptor + as old if it's still referenced by a live node. diff --git a/src/or/microdesc.c b/src/or/microdesc.c index 90ac0ac..abcf5fc 100644 --- a/src/or/microdesc.c +++ b/src/or/microdesc.c @@ -368,7 +368,9 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force) cutoff = now - TOLERATE_MICRODESC_AGE;
for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) { - if ((*mdp)->last_listed < cutoff) { + const int is_old = (*mdp)->last_listed < cutoff; + const unsigned held_by_nodes = (*mdp)->held_by_nodes; + if (is_old && !held_by_nodes) { ++dropped; victim = *mdp; mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp); @@ -376,6 +378,54 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force) bytes_dropped += victim->bodylen; microdesc_free(victim); } else { + if (is_old) { + /* It's old, but it has held_by_nodes set. That's not okay. */ + /* Let's try to diagnose and fix #7164 . */ + smartlist_t *nodes = nodelist_find_nodes_with_microdesc(*mdp); + const networkstatus_t *ns = networkstatus_get_latest_consensus(); + int networkstatus_age = -1; + if (ns) { + networkstatus_age = now - ns->valid_after; + } + log_warn(LD_BUG, "Microdescriptor seemed very old " + "(last listed %d hours ago vs %d hour cutoff), but is still " + "marked as being held by %d node(s). I found %d node(s) " + "holding it. Current networkstatus is %d hours old.", + (int)((now - (*mdp)->last_listed) / 3600), + (int)((now - cutoff) / 3600), + held_by_nodes, + smartlist_len(nodes), + (int)(networkstatus_age / 3600)); + + SMARTLIST_FOREACH_BEGIN(nodes, const node_t *, node) { + const char *rs_match = "No RS"; + const char *rs_present = ""; + if (node->rs) { + if (tor_memeq(node->rs->descriptor_digest, + (*mdp)->digest, DIGEST256_LEN)) { + rs_match = "Microdesc digest in RS matches"; + } else { + rs_match = "Microdesc digest in RS does match"; + } + if (ns) { + /* This should be impossible, but let's see! */ + rs_present = " RS not present in networkstatus."; + SMARTLIST_FOREACH(ns->routerstatus_list, routerstatus_t *,rs, { + if (rs == node->rs) { + rs_present = " RS okay in networkstatus."; + } + }); + } + } + log_warn(LD_BUG, " [%d]: ID=%s. md=%p, rs=%p, ri=%p. %s.%s", + node_sl_idx, + hex_str(node->identity, DIGEST_LEN), + node->md, node->rs, node->ri, rs_match, rs_present); + } SMARTLIST_FOREACH_END(node); + smartlist_free(nodes); + (*mdp)->last_listed = now; + } + ++kept; mdp = HT_NEXT(microdesc_map, &cache->map, mdp); } diff --git a/src/or/nodelist.c b/src/or/nodelist.c index 178f084..d92ef17 100644 --- a/src/or/nodelist.c +++ b/src/or/nodelist.c @@ -337,6 +337,25 @@ nodelist_drop_node(node_t *node, int remove_from_ht) node->nodelist_idx = -1; }
+/** Return a newly allocated smartlist of the nodes that have <b>md</b> as + * their microdescriptor. */ +smartlist_t * +nodelist_find_nodes_with_microdesc(const microdesc_t *md) +{ + smartlist_t *result = smartlist_new(); + + if (the_nodelist == NULL) + return result; + + SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) { + if (node->md == md) { + smartlist_add(result, node); + } + } SMARTLIST_FOREACH_END(node); + + return result; +} + /** Release storage held by <b>node</b> */ static void node_free(node_t *node) diff --git a/src/or/nodelist.h b/src/or/nodelist.h index 8a4665a..836b2af 100644 --- a/src/or/nodelist.h +++ b/src/or/nodelist.h @@ -26,6 +26,7 @@ void nodelist_set_consensus(networkstatus_t *ns); void nodelist_remove_microdesc(const char *identity_digest, microdesc_t *md); void nodelist_remove_routerinfo(routerinfo_t *ri); void nodelist_purge(void); +smartlist_t *nodelist_find_nodes_with_microdesc(const microdesc_t *md);
void nodelist_free_all(void); void nodelist_assert_ok(void);
tor-commits@lists.torproject.org