commit 0389d4aa561bec06ad2aab70ea5a989f1f2d02c6
Author: Nick Mathewson <nickm(a)torproject.org>
Date: Mon Mar 17 14:15:12 2014 -0400
More logs to try to diagnose bug 7164
This time, check in microdesc_cache_clean() to see what could be
going wrong with an attempt to clean a microdesc that's held by a node.
---
changes/bug7164_diagnose_harder | 6 +++++
src/or/microdesc.c | 52 ++++++++++++++++++++++++++++++++++++++-
src/or/nodelist.c | 19 ++++++++++++++
src/or/nodelist.h | 1 +
4 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/changes/bug7164_diagnose_harder b/changes/bug7164_diagnose_harder
new file mode 100644
index 0000000..28c36b6
--- /dev/null
+++ b/changes/bug7164_diagnose_harder
@@ -0,0 +1,6 @@
+ o Minor features:
+ - Try harder to diagnose a possible cause of bug 7164, which causes
+ intermittent "microdesc_free() called but md was still referenced"
+ warnings. We now log more information about the likely error case,
+ to try to figure out why we might be cleaning a microdescriptor
+ as old if it's still referenced by a live node.
diff --git a/src/or/microdesc.c b/src/or/microdesc.c
index 90ac0ac..abcf5fc 100644
--- a/src/or/microdesc.c
+++ b/src/or/microdesc.c
@@ -368,7 +368,9 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
cutoff = now - TOLERATE_MICRODESC_AGE;
for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) {
- if ((*mdp)->last_listed < cutoff) {
+ const int is_old = (*mdp)->last_listed < cutoff;
+ const unsigned held_by_nodes = (*mdp)->held_by_nodes;
+ if (is_old && !held_by_nodes) {
++dropped;
victim = *mdp;
mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp);
@@ -376,6 +378,54 @@ microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
bytes_dropped += victim->bodylen;
microdesc_free(victim);
} else {
+ if (is_old) {
+ /* It's old, but it has held_by_nodes set. That's not okay. */
+ /* Let's try to diagnose and fix #7164 . */
+ smartlist_t *nodes = nodelist_find_nodes_with_microdesc(*mdp);
+ const networkstatus_t *ns = networkstatus_get_latest_consensus();
+ int networkstatus_age = -1;
+ if (ns) {
+ networkstatus_age = now - ns->valid_after;
+ }
+ log_warn(LD_BUG, "Microdescriptor seemed very old "
+ "(last listed %d hours ago vs %d hour cutoff), but is still "
+ "marked as being held by %d node(s). I found %d node(s) "
+ "holding it. Current networkstatus is %d hours old.",
+ (int)((now - (*mdp)->last_listed) / 3600),
+ (int)((now - cutoff) / 3600),
+ held_by_nodes,
+ smartlist_len(nodes),
+ (int)(networkstatus_age / 3600));
+
+ SMARTLIST_FOREACH_BEGIN(nodes, const node_t *, node) {
+ const char *rs_match = "No RS";
+ const char *rs_present = "";
+ if (node->rs) {
+ if (tor_memeq(node->rs->descriptor_digest,
+ (*mdp)->digest, DIGEST256_LEN)) {
+ rs_match = "Microdesc digest in RS matches";
+ } else {
+ rs_match = "Microdesc digest in RS does match";
+ }
+ if (ns) {
+ /* This should be impossible, but let's see! */
+ rs_present = " RS not present in networkstatus.";
+ SMARTLIST_FOREACH(ns->routerstatus_list, routerstatus_t *,rs, {
+ if (rs == node->rs) {
+ rs_present = " RS okay in networkstatus.";
+ }
+ });
+ }
+ }
+ log_warn(LD_BUG, " [%d]: ID=%s. md=%p, rs=%p, ri=%p. %s.%s",
+ node_sl_idx,
+ hex_str(node->identity, DIGEST_LEN),
+ node->md, node->rs, node->ri, rs_match, rs_present);
+ } SMARTLIST_FOREACH_END(node);
+ smartlist_free(nodes);
+ (*mdp)->last_listed = now;
+ }
+
++kept;
mdp = HT_NEXT(microdesc_map, &cache->map, mdp);
}
diff --git a/src/or/nodelist.c b/src/or/nodelist.c
index 178f084..d92ef17 100644
--- a/src/or/nodelist.c
+++ b/src/or/nodelist.c
@@ -337,6 +337,25 @@ nodelist_drop_node(node_t *node, int remove_from_ht)
node->nodelist_idx = -1;
}
+/** Return a newly allocated smartlist of the nodes that have <b>md</b> as
+ * their microdescriptor. */
+smartlist_t *
+nodelist_find_nodes_with_microdesc(const microdesc_t *md)
+{
+ smartlist_t *result = smartlist_new();
+
+ if (the_nodelist == NULL)
+ return result;
+
+ SMARTLIST_FOREACH_BEGIN(the_nodelist->nodes, node_t *, node) {
+ if (node->md == md) {
+ smartlist_add(result, node);
+ }
+ } SMARTLIST_FOREACH_END(node);
+
+ return result;
+}
+
/** Release storage held by <b>node</b> */
static void
node_free(node_t *node)
diff --git a/src/or/nodelist.h b/src/or/nodelist.h
index 8a4665a..836b2af 100644
--- a/src/or/nodelist.h
+++ b/src/or/nodelist.h
@@ -26,6 +26,7 @@ void nodelist_set_consensus(networkstatus_t *ns);
void nodelist_remove_microdesc(const char *identity_digest, microdesc_t *md);
void nodelist_remove_routerinfo(routerinfo_t *ri);
void nodelist_purge(void);
+smartlist_t *nodelist_find_nodes_with_microdesc(const microdesc_t *md);
void nodelist_free_all(void);
void nodelist_assert_ok(void);