commit df3f2bd9aa55a8ccab6b41c19859b4665ee3e3d2
Author: David Goulet <dgoulet(a)torproject.org>
Date:   Thu Feb 20 11:41:53 2020 -0500
    hs-v3: Log reasons why service can't upload descriptor
    
    When a service can not upload its descriptor(s), we have no logs on why. This
    adds logging for each possible reason for each descriptors.
    
    That logging is emitted every second so it is rate limited for each reason and
    per descriptor.
    
    Closes #33400
    
    Signed-off-by: David Goulet <dgoulet(a)torproject.org>
---
 changes/ticket33400                      |   3 +
 scripts/maint/practracker/exceptions.txt |   2 +-
 src/feature/hs/hs_service.c              | 100 ++++++++++++++++++-
 src/feature/hs/hs_service.h              |   3 +
 src/test/test_hs_service.c               | 165 +++++++++++++++++++++++++++++++
 5 files changed, 268 insertions(+), 5 deletions(-)
diff --git a/changes/ticket33400 b/changes/ticket33400
new file mode 100644
index 000000000..760389076
--- /dev/null
+++ b/changes/ticket33400
@@ -0,0 +1,3 @@
+  o Minor feature (onion service v3):
+    - Log at INFO level why the service can not upload its descriptor(s). Closes
+      ticket 33400; bugfix on 0.3.2.1-alpha.
diff --git a/scripts/maint/practracker/exceptions.txt b/scripts/maint/practracker/exceptions.txt
index b4ef1e51a..53c6faff2 100644
--- a/scripts/maint/practracker/exceptions.txt
+++ b/scripts/maint/practracker/exceptions.txt
@@ -252,7 +252,7 @@ problem function-size /src/feature/hs/hs_descriptor.c:decrypt_desc_layer() 111
 problem function-size /src/feature/hs/hs_descriptor.c:decode_introduction_point() 122
 problem function-size /src/feature/hs/hs_descriptor.c:desc_decode_superencrypted_v3() 107
 problem function-size /src/feature/hs/hs_descriptor.c:desc_decode_encrypted_v3() 109
-problem file-size /src/feature/hs/hs_service.c 4131
+problem file-size /src/feature/hs/hs_service.c 4223
 problem function-size /src/feature/keymgt/loadkey.c:ed_key_init_from_file() 326
 problem function-size /src/feature/nodelist/authcert.c:trusted_dirs_load_certs_from_string() 123
 problem function-size /src/feature/nodelist/authcert.c:authority_certs_fetch_missing() 295
diff --git a/src/feature/hs/hs_service.c b/src/feature/hs/hs_service.c
index 8b51b770a..d6382ef80 100644
--- a/src/feature/hs/hs_service.c
+++ b/src/feature/hs/hs_service.c
@@ -707,8 +707,8 @@ get_extend_info_from_intro_point(const hs_service_intro_point_t *ip,
 
 /** Return the number of introduction points that are established for the
  * given descriptor. */
-static unsigned int
-count_desc_circuit_established(const hs_service_descriptor_t *desc)
+MOCK_IMPL(STATIC unsigned int,
+count_desc_circuit_established, (const hs_service_descriptor_t *desc))
 {
   unsigned int count = 0;
 
@@ -3055,13 +3055,85 @@ service_desc_hsdirs_changed(const hs_service_t *service,
   return should_reupload;
 }
 
+/** These are all the reasons why a descriptor upload can't occur. We use
+ * those to log the reason properly with the right rate limiting and for the
+ * right descriptor. */
+typedef enum {
+  LOG_DESC_UPLOAD_REASON_MISSING_IPS = 0,
+  LOG_DESC_UPLOAD_REASON_IP_NOT_ESTABLISHED = 1,
+  LOG_DESC_UPLOAD_REASON_NOT_TIME = 2,
+  LOG_DESC_UPLOAD_REASON_NO_LIVE_CONSENSUS = 3,
+  LOG_DESC_UPLOAD_REASON_NO_DIRINFO = 4,
+} log_desc_upload_reason_t;
+
+/** Maximum number of reasons. This is used to allocate the static array of
+ * all rate limiting objects. */
+#define LOG_DESC_UPLOAD_REASON_MAX LOG_DESC_UPLOAD_REASON_NO_DIRINFO
+
+/** Log the reason why we can't upload the given descriptor for the given
+ * service. This takes a message string (allocated by the caller) and a
+ * reason.
+ *
+ * Depending on the reason and descriptor, different rate limit applies. This
+ * is done because this function will basically be called every second. Each
+ * descriptor for each reason uses its own log rate limit object in order to
+ * avoid message suppression for different reasons and descriptors. */
+static void
+log_cant_upload_desc(const hs_service_t *service,
+                     const hs_service_descriptor_t *desc, const char *msg,
+                     const log_desc_upload_reason_t reason)
+{
+  /* Writing the log every minute shouldn't be too annoying for log rate limit
+   * since this can be emitted every second for each descriptor.
+   *
+   * However, for one specific case, we increase it to 10 minutes because it
+   * is hit constantly, as an expected behavior, which is the reason
+   * indicating that it is not the time to upload. */
+  static ratelim_t limits[2][LOG_DESC_UPLOAD_REASON_MAX + 1] =
+    { { RATELIM_INIT(60), RATELIM_INIT(60), RATELIM_INIT(60 * 10),
+        RATELIM_INIT(60), RATELIM_INIT(60) },
+      { RATELIM_INIT(60), RATELIM_INIT(60), RATELIM_INIT(60 * 10),
+        RATELIM_INIT(60), RATELIM_INIT(60) },
+    };
+  bool is_next_desc = false;
+  unsigned int rlim_pos = 0;
+  ratelim_t *rlim = NULL;
+
+  tor_assert(service);
+  tor_assert(desc);
+  tor_assert(msg);
+
+  /* Make sure the reason value is valid. It should never happen because we
+   * control that value in the code flow but will be apparent during
+   * development if a reason is added but LOG_DESC_UPLOAD_REASON_NUM_ is not
+   * updated. */
+  if (BUG(reason > LOG_DESC_UPLOAD_REASON_MAX || reason < 0)) {
+    return;
+  }
+
+  /* Ease our life. Flag that tells us if the descriptor is the next one. */
+  is_next_desc = (service->desc_next == desc);
+
+  /* Current descriptor is the first element in the ratelimit object array.
+   * The next descriptor is the second element. */
+  rlim_pos = (is_next_desc ? 1 : 0);
+  /* Get the ratelimit object for the reason _and_ right descriptor. */
+  rlim = &limits[rlim_pos][reason];
+
+  log_fn_ratelim(rlim, LOG_INFO, LD_REND,
+                 "Service %s can't upload its %s descriptor: %s",
+                 safe_str_client(service->onion_address),
+                 (is_next_desc) ? "next" : "current", msg);
+}
+
 /** Return 1 if the given descriptor from the given service can be uploaded
  * else return 0 if it can not. */
 static int
 should_service_upload_descriptor(const hs_service_t *service,
                               const hs_service_descriptor_t *desc, time_t now)
 {
-  unsigned int num_intro_points;
+  char *msg = NULL;
+  unsigned int num_intro_points, count_ip_established;
 
   tor_assert(service);
   tor_assert(desc);
@@ -3081,34 +3153,54 @@ should_service_upload_descriptor(const hs_service_t *service,
    * upload descriptor in this case. We need at least one for the service to
    * be reachable. */
   if (desc->missing_intro_points && num_intro_points == 0) {
+    msg = tor_strdup("Missing intro points");
+    log_cant_upload_desc(service, desc, msg,
+                         LOG_DESC_UPLOAD_REASON_MISSING_IPS);
     goto cannot;
   }
 
   /* Check if all our introduction circuit have been established for all the
    * intro points we have selected. */
-  if (count_desc_circuit_established(desc) != num_intro_points) {
+  count_ip_established = count_desc_circuit_established(desc);
+  if (count_ip_established != num_intro_points) {
+    tor_asprintf(&msg, "Intro circuits aren't yet all established (%d/%d).",
+                 count_ip_established, num_intro_points);
+    log_cant_upload_desc(service, desc, msg,
+                         LOG_DESC_UPLOAD_REASON_IP_NOT_ESTABLISHED);
     goto cannot;
   }
 
   /* Is it the right time to upload? */
   if (desc->next_upload_time > now) {
+    tor_asprintf(&msg, "Next upload time is %ld, it is now %ld.",
+                 (long int) desc->next_upload_time, (long int) now);
+    log_cant_upload_desc(service, desc, msg,
+                         LOG_DESC_UPLOAD_REASON_NOT_TIME);
     goto cannot;
   }
 
   /* Don't upload desc if we don't have a live consensus */
   if (!networkstatus_get_live_consensus(now)) {
+    msg = tor_strdup("No live consensus");
+    log_cant_upload_desc(service, desc, msg,
+                         LOG_DESC_UPLOAD_REASON_NO_LIVE_CONSENSUS);
     goto cannot;
   }
 
   /* Do we know enough router descriptors to have adequate vision of the HSDir
      hash ring? */
   if (!router_have_minimum_dir_info()) {
+    msg = tor_strdup("Not enough directory information");
+    log_cant_upload_desc(service, desc, msg,
+                         LOG_DESC_UPLOAD_REASON_NO_DIRINFO);
     goto cannot;
   }
 
   /* Can upload! */
   return 1;
+
  cannot:
+  tor_free(msg);
   return 0;
 }
 
diff --git a/src/feature/hs/hs_service.h b/src/feature/hs/hs_service.h
index 94a73b2fe..3fe14878e 100644
--- a/src/feature/hs/hs_service.h
+++ b/src/feature/hs/hs_service.h
@@ -384,6 +384,9 @@ STATIC hs_service_t *get_first_service(void);
 STATIC hs_service_intro_point_t *service_intro_point_find_by_ident(
                                          const hs_service_t *service,
                                          const hs_ident_circuit_t *ident);
+
+MOCK_DECL(STATIC unsigned int, count_desc_circuit_established,
+          (const hs_service_descriptor_t *desc));
 #endif /* defined(TOR_UNIT_TESTS) */
 
 /* Service accessors. */
diff --git a/src/test/test_hs_service.c b/src/test/test_hs_service.c
index 685f4c25a..9966bd108 100644
--- a/src/test/test_hs_service.c
+++ b/src/test/test_hs_service.c
@@ -89,6 +89,13 @@ mock_networkstatus_get_live_consensus(time_t now)
   return &mock_ns;
 }
 
+static networkstatus_t *
+mock_networkstatus_get_live_consensus_null(time_t now)
+{
+  (void) now;
+  return NULL;
+}
+
 static or_state_t *dummy_state = NULL;
 
 /* Mock function to get fake or state (used for rev counters) */
@@ -136,6 +143,20 @@ mock_relay_send_command_from_edge(streamid_t stream_id, circuit_t *circ,
   return 0;
 }
 
+static unsigned int num_intro_points = 0;
+static unsigned int
+mock_count_desc_circuit_established(const hs_service_descriptor_t *desc)
+{
+  (void) desc;
+  return num_intro_points;
+}
+
+static int
+mock_router_have_minimum_dir_info_false(void)
+{
+  return 0;
+}
+
 /* Helper: from a set of options in conf, configure a service which will add
  * it to the staging list of the HS subsytem. */
 static int
@@ -2520,6 +2541,148 @@ test_intro2_handling(void *arg)
   UNMOCK(launch_rendezvous_point_circuit);
 }
 
+static void
+test_cannot_upload_descriptors(void *arg)
+{
+  int ret;
+  time_t now;
+  hs_service_t *service;
+
+  (void) arg;
+
+  hs_init();
+  MOCK(get_or_state,
+       get_or_state_replacement);
+  MOCK(networkstatus_get_live_consensus,
+       mock_networkstatus_get_live_consensus);
+
+  dummy_state = or_state_new();
+
+  ret = parse_rfc1123_time("Sat, 26 Oct 1985 13:00:00 UTC",
+                           &mock_ns.valid_after);
+  tt_int_op(ret, OP_EQ, 0);
+  ret = parse_rfc1123_time("Sat, 26 Oct 1985 14:00:00 UTC",
+                           &mock_ns.fresh_until);
+  tt_int_op(ret, OP_EQ, 0);
+  dirauth_sched_recalculate_timing(get_options(), mock_ns.valid_after);
+
+  update_approx_time(mock_ns.valid_after + 1);
+  now = mock_ns.valid_after + 1;
+
+  /* Create a service with no descriptor. It's added to the global map. */
+  service = hs_service_new(get_options());
+  tt_assert(service);
+  service->config.version = HS_VERSION_THREE;
+  ed25519_secret_key_generate(&service->keys.identity_sk, 0);
+  ed25519_public_key_generate(&service->keys.identity_pk,
+                              &service->keys.identity_sk);
+  /* Register service to global map. */
+  ret = register_service(get_hs_service_map(), service);
+  tt_int_op(ret, OP_EQ, 0);
+  /* But first, build our descriptor. */
+  build_all_descriptors(now);
+
+  /* 1. Testing missing intro points reason. */
+  {
+    digest256map_t *cur = service->desc_current->intro_points.map;
+    digest256map_t *tmp = digest256map_new();
+    service->desc_current->intro_points.map = tmp;
+    service->desc_current->missing_intro_points = 1;
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    digest256map_free(tmp, tor_free_);
+    service->desc_current->intro_points.map = cur;
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "Missing intro points");
+    teardown_capture_of_logs();
+    /* Reset. */
+    service->desc_current->missing_intro_points = 0;
+  }
+
+  /* 2. Testing non established intro points. */
+  {
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "Intro circuits aren't yet all established (0/3).");
+    teardown_capture_of_logs();
+  }
+
+  /* We need to pass the established circuit tests and thus from now on, we
+   * MOCK this to return 3 intro points. */
+  MOCK(count_desc_circuit_established, mock_count_desc_circuit_established);
+  num_intro_points = 3;
+
+  /* 3. Testing non established intro points. */
+  {
+    service->desc_current->next_upload_time = now + 1000;
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "Next upload time is");
+    teardown_capture_of_logs();
+    /* Reset. */
+    service->desc_current->next_upload_time = 0;
+  }
+
+  /* 4. Testing missing live consensus. */
+  {
+    MOCK(networkstatus_get_live_consensus,
+         mock_networkstatus_get_live_consensus_null);
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "No live consensus");
+    teardown_capture_of_logs();
+    /* Reset. */
+    MOCK(networkstatus_get_live_consensus,
+         mock_networkstatus_get_live_consensus);
+  }
+
+  /* 5. Test missing minimum directory information. */
+  {
+    MOCK(router_have_minimum_dir_info,
+         mock_router_have_minimum_dir_info_false);
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "Not enough directory information");
+    teardown_capture_of_logs();
+
+    /* Running it again shouldn't trigger anything due to rate limitation. */
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_no_log_entry();
+    teardown_capture_of_logs();
+    UNMOCK(router_have_minimum_dir_info);
+  }
+
+  /* Increase time and redo test (5) in order to test the rate limiting. */
+  update_approx_time(mock_ns.valid_after + 61);
+  {
+    MOCK(router_have_minimum_dir_info,
+         mock_router_have_minimum_dir_info_false);
+    setup_full_capture_of_logs(LOG_INFO);
+    run_upload_descriptor_event(now);
+    expect_log_msg_containing(
+      "Service [scrubbed] can't upload its current descriptor: "
+      "Not enough directory information");
+    teardown_capture_of_logs();
+    UNMOCK(router_have_minimum_dir_info);
+  }
+
+ done:
+  hs_free_all();
+  UNMOCK(count_desc_circuit_established);
+  UNMOCK(networkstatus_get_live_consensus);
+  UNMOCK(get_or_state);
+}
+
 struct testcase_t hs_service_tests[] = {
   { "e2e_rend_circuit_setup", test_e2e_rend_circuit_setup, TT_FORK,
     NULL, NULL },
@@ -2557,6 +2720,8 @@ struct testcase_t hs_service_tests[] = {
     NULL, NULL },
   { "upload_descriptors", test_upload_descriptors, TT_FORK,
     NULL, NULL },
+  { "cannot_upload_descriptors", test_cannot_upload_descriptors, TT_FORK,
+    NULL, NULL },
   { "rendezvous1_parsing", test_rendezvous1_parsing, TT_FORK,
     NULL, NULL },
   { "authorized_client_config_equal", test_authorized_client_config_equal,