commit c934fced31a2dd6d53b129b6270109bc48c1ebb0
Author: David Goulet <dgoulet(a)torproject.org>
Date: Tue Dec 15 11:41:42 2020 -0500
relay: Report the entire content of a stats file
It turns out that 9 years ago, we stopped appending data into stats file and
rather overwrite everytime we have new stats (see commit
a6a127c833eace1100aca7ab8ad118862bb8a8b9)
The load_stats_file() function was still thinking that we could have the same
line many times in the file which turns out to be false since 9 years ago.
However, that did not cause problem until IPv6 connection stats came along
which introduced a new line in conn-stats: "ipv6-conn-bi-direct ...".
Before, that file contained a single line starting with the tag
"conn-bi-direct". That very tag appears also in the IPv6 tag (see above) so
the load_stats_file() function would consider that the IPv6 line as the last
tag to be appeneded to the file and fail to report the line above (for IPv4).
It would actually truncate the IPv6 line and report it (removing the "ipv6-"
part).
In other words, "conn-bi-direct" was not reported and instead
"ipv6-conn-bi-direct" was used without the "ipv6-" part.
This commit refactors the entire function so that now it looks for a
"timestamp tag" to validate and then if everything is fine, returns the entire
content of the file. The refactor simplifies the function, adds logging in
case of failures and modernize it in terms of coding standard.
Unit tests are also added that makes sure the loaded content matches the
entire file if timestamp validation passes.
Fixes #40226
Signed-off-by: David Goulet <dgoulet(a)torproject.org>
---
changes/ticket40226 | 5 +++
src/feature/relay/router.c | 106 +++++++++++++++++++++++++++------------------
src/feature/relay/router.h | 2 +
src/test/test_stats.c | 65 +++++++++++++++++++++++++++
4 files changed, 135 insertions(+), 43 deletions(-)
diff --git a/changes/ticket40226 b/changes/ticket40226
new file mode 100644
index 0000000000..4775438f63
--- /dev/null
+++ b/changes/ticket40226
@@ -0,0 +1,5 @@
+ o Minor bugfixes (relay, statistics):
+ - The connection statistics were wrongly exported in the extrainfo document
+ due to a problem in the file loading function which would wrongly truncate
+ the file reporting the wrong information. It is now fixed. Fixes bug
+ 40226; bugfix on 0.4.5.1-alpha.
diff --git a/src/feature/relay/router.c b/src/feature/relay/router.c
index 3aa9561f4f..abe23f9aae 100644
--- a/src/feature/relay/router.c
+++ b/src/feature/relay/router.c
@@ -3139,57 +3139,77 @@ router_dump_exit_policy_to_string(const routerinfo_t *router,
include_ipv6);
}
-/** Load the contents of <b>filename</b>, find the last line starting with
- * <b>end_line</b>, ensure that its timestamp is not more than 25 hours in
- * the past or more than 1 hour in the future with respect to <b>now</b>,
- * and write the file contents starting with that line to *<b>out</b>.
- * Return 1 for success, 0 if the file does not exist or is empty, or -1
- * if the file does not contain a line matching these criteria or other
- * failure. */
-static int
-load_stats_file(const char *filename, const char *end_line, time_t now,
+/** Load the contents of <b>filename</b>, find a line starting with
+ * timestamp tag <b>ts_tag</b>, ensure that its timestamp is not more than 25
+ * hours in the past or more than 1 hour in the future with respect to
+ * <b>now</b>, and write the entire file contents into <b>out</b>.
+ *
+ * The timestamp expected should be an ISO-formatted UTC time value which is
+ * parsed using our parse_iso_time() function.
+ *
+ * In case more than one tag are found in the file, the very first one is
+ * used.
+ *
+ * Return 1 for success, 0 if the file does not exist or is empty, or -1 if
+ * the file does not contain a line with the timestamp tag. */
+STATIC int
+load_stats_file(const char *filename, const char *ts_tag, time_t now,
char **out)
{
int r = -1;
char *fname = get_datadir_fname(filename);
- char *contents, *start = NULL, *tmp, timestr[ISO_TIME_LEN+1];
+ char *contents = NULL, timestr[ISO_TIME_LEN+1];
time_t written;
+
switch (file_status(fname)) {
- case FN_FILE:
- /* X022 Find an alternative to reading the whole file to memory. */
- if ((contents = read_file_to_str(fname, 0, NULL))) {
- tmp = strstr(contents, end_line);
- /* Find last block starting with end_line */
- while (tmp) {
- start = tmp;
- tmp = strstr(tmp + 1, end_line);
- }
- if (!start)
- goto notfound;
- if (strlen(start) < strlen(end_line) + 1 + sizeof(timestr))
- goto notfound;
- strlcpy(timestr, start + 1 + strlen(end_line), sizeof(timestr));
- if (parse_iso_time(timestr, &written) < 0)
- goto notfound;
- if (written < now - (25*60*60) || written > now + (1*60*60))
- goto notfound;
- *out = tor_strdup(start);
- r = 1;
- }
- notfound:
- tor_free(contents);
- break;
- /* treat empty stats files as if the file doesn't exist */
- case FN_NOENT:
- case FN_EMPTY:
- r = 0;
- break;
- case FN_ERROR:
- case FN_DIR:
- default:
- break;
+ case FN_FILE:
+ contents = read_file_to_str(fname, 0, NULL);
+ if (contents == NULL) {
+ log_debug(LD_BUG, "Unable to read content of %s", filename);
+ goto end;
+ }
+ /* Find the timestamp tag to validate that the file is not too old or if
+ * exists. */
+ const char *ts_tok = find_str_at_start_of_line(contents, ts_tag);
+ if (!ts_tok) {
+ log_warn(LD_BUG, "Token %s not found in file %s", ts_tag, filename);
+ goto end;
+ }
+ /* Do we have enough for parsing a timestamp? */
+ if (strlen(contents) < strlen(ts_tag) + 1 + sizeof(timestr)) {
+ log_warn(LD_BUG, "Token %s malformed in file %s", ts_tag, filename);
+ goto end;
+ }
+ /* Parse timestamp in order to validate it is not too old. */
+ strlcpy(timestr, contents + strlen(ts_tag) + 1, sizeof(timestr));
+ if (parse_iso_time(timestr, &written) < 0) {
+ log_warn(LD_BUG, "Token %s has a malformed timestamp in file %s",
+ ts_tag, filename);
+ goto end;
+ }
+ if (written < now - (25*60*60) || written > now + (1*60*60)) {
+ /* This can happen normally so don't log. */
+ goto end;
+ }
+ /* Success. Put in the entire content. */
+ *out = contents;
+ contents = NULL; /* Must not free it. */
+ r = 1;
+ break;
+ /* treat empty stats files as if the file doesn't exist */
+ case FN_NOENT:
+ case FN_EMPTY:
+ r = 0;
+ break;
+ case FN_ERROR:
+ case FN_DIR:
+ default:
+ break;
}
+
+ end:
tor_free(fname);
+ tor_free(contents);
return r;
}
diff --git a/src/feature/relay/router.h b/src/feature/relay/router.h
index f71ada8eb7..2648bb5112 100644
--- a/src/feature/relay/router.h
+++ b/src/feature/relay/router.h
@@ -129,6 +129,8 @@ void router_free_all(void);
STATIC void get_platform_str(char *platform, size_t len);
STATIC int router_write_fingerprint(int hashed, int ed25519_identity);
STATIC smartlist_t *get_my_declared_family(const or_options_t *options);
+STATIC int load_stats_file(const char *filename, const char *ts_tag,
+ time_t now, char **out);
#ifdef TOR_UNIT_TESTS
extern time_t desc_clean_since;
diff --git a/src/test/test_stats.c b/src/test/test_stats.c
index b6849b0b6d..154acd4f9f 100644
--- a/src/test/test_stats.c
+++ b/src/test/test_stats.c
@@ -31,6 +31,7 @@
#define MAINLOOP_PRIVATE
#define STATEFILE_PRIVATE
#define BWHIST_PRIVATE
+#define ROUTER_PRIVATE
#include "core/or/or.h"
#include "lib/err/backtrace.h"
@@ -45,6 +46,7 @@
#include "app/config/statefile.h"
#include "feature/stats/bwhist.h"
#include "feature/stats/bw_array_st.h"
+#include "feature/relay/router.h"
/** Run unit tests for some stats code. */
static void
@@ -493,6 +495,68 @@ test_get_bandwidth_lines(void *arg)
bwhist_free_all();
}
+static void
+test_load_stats_file(void *arg)
+{
+ int ret;
+ char *content = NULL, *read_file_content = NULL;
+ const char *fname = NULL;
+
+ (void) arg;
+
+ /* Load conn-stats. */
+ fname = get_fname("conn-stats");
+ tt_assert(fname);
+ read_file_content = tor_strdup(
+ "conn-bi-direct 2020-12-13 15:48:53 (86400 s) 12,34,56,78\n"
+ "ipv6-conn-bi-direct 2020-12-14 15:48:53 (86400 s) 21,43,65,87\n");
+ write_str_to_file(fname, read_file_content, 0);
+ ret = load_stats_file("conn-stats", "conn-bi-direct", 1607874000, &content);
+ tt_int_op(ret, OP_EQ, 1);
+ tt_str_op(read_file_content, OP_EQ, content);
+
+ /* Load hidserv-stats. */
+ fname = get_fname("hidserv-stats");
+ tt_assert(fname);
+ tor_free(read_file_content);
+ read_file_content = tor_strdup(
+ "hidserv-stats-end 2020-12-13 15:48:53 (86400 s)\n"
+ "hidserv-rend-relayed-cells 48754891 delta_f=2048 epsilon=0.30 "
+ "bin_size=1024\n"
+ "hidserv-dir-onions-seen 53 delta_f=8 epsilon=0.30 bin_size=8\n");
+ write_str_to_file(fname, read_file_content, 0);
+ tor_free(content);
+ ret = load_stats_file("hidserv-stats", "hidserv-stats-end", 1607874000,
+ &content);
+ tt_int_op(ret, OP_EQ, 1);
+ tt_str_op(read_file_content, OP_EQ, content);
+
+ /* Load dirreq-stats. */
+ fname = get_fname("dirreq-stats");
+ tt_assert(fname);
+ tor_free(read_file_content);
+ read_file_content = tor_strdup(
+ "dirreq-stats-end 2020-12-13 15:48:53 (86400 s)\n"
+ "dirreq-v3-ips ru=1728,us=1144,de=696,ir=432,gb=328,fr=304,in=296,ua=232\n"
+ "dirreq-v3-reqs ru=3616,us=3576,de=1896,fr=800,gb=632,ir=616\n"
+ "dirreq-v3-resp ok=18472,not-enough-sigs=0,unavailable=0,not-found=0,"
+ "not-modified=3136,busy=0\n"
+ "dirreq-v3-direct-dl complete=0,timeout=0,running=0\n"
+ "dirreq-v3-tunneled-dl complete=18124,timeout=348,running=4,min=257,"
+ "d1=133653,d2=221050,q1=261242,d3=300622,d4=399758,md=539051,d6=721322,"
+ "d7=959866,q3=1103363,d8=1302035,d9=2046125,max=113404000\n");
+ write_str_to_file(fname, read_file_content, 0);
+ tor_free(content);
+ ret = load_stats_file("dirreq-stats", "dirreq-stats-end", 1607874000,
+ &content);
+ tt_int_op(ret, OP_EQ, 1);
+ tt_str_op(read_file_content, OP_EQ, content);
+
+ done:
+ tor_free(read_file_content);
+ tor_free(content);
+}
+
#define ENT(name) \
{ #name, test_ ## name , 0, NULL, NULL }
#define FORK(name) \
@@ -506,6 +570,7 @@ struct testcase_t stats_tests[] = {
FORK(add_obs),
FORK(fill_bandwidth_history),
FORK(get_bandwidth_lines),
+ FORK(load_stats_file),
END_OF_TESTCASES
};