commit 3f336966a264d7cd7c6dab08fb85d85273f06d68 Author: Yawning Angel yawning@schwanenlied.me Date: Wed Jun 24 13:52:29 2015 +0000
Work around nytimes.com's broken hostnames in our SOCKS checks.
RFC 952 is approximately 30 years old, and people are failing to comply, by serving A records with '_' as part of the hostname. Since relaxing the check is a QOL improvement for our userbase, relax the check to allow such abominations as destinations, especially since there are likely to be other similarly misconfigured domains out there. --- changes/bug16430 | 4 ++++ src/common/util.c | 7 +++++-- src/test/test_util.c | 9 +++++++-- 3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/changes/bug16430 b/changes/bug16430 new file mode 100644 index 0000000..ca7b874 --- /dev/null +++ b/changes/bug16430 @@ -0,0 +1,4 @@ + o Minor features (client): + - Relax the validation done to hostnames in SOCKS5 requests, and allow + '_' to cope with domains observed in the wild that are serving non-RFC + compliant records. Resolves ticket 16430. diff --git a/src/common/util.c b/src/common/util.c index 942d0c2..4490150 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -1036,6 +1036,9 @@ string_is_valid_ipv6_address(const char *string)
/** Return true iff <b>string</b> matches a pattern of DNS names * that we allow Tor clients to connect to. + * + * Note: This allows certain technically invalid characters ('_') to cope + * with misconfigured zones that have been encountered in the wild. */ int string_is_valid_hostname(const char *string) @@ -1048,7 +1051,7 @@ string_is_valid_hostname(const char *string) smartlist_split_string(components,string,".",0,0);
SMARTLIST_FOREACH_BEGIN(components, char *, c) { - if (c[0] == '-') { + if ((c[0] == '-') || (*c == '_')) { result = 0; break; } @@ -1057,7 +1060,7 @@ string_is_valid_hostname(const char *string) if ((*c >= 'a' && *c <= 'z') || (*c >= 'A' && *c <= 'Z') || (*c >= '0' && *c <= '9') || - (*c == '-')) + (*c == '-') || (*c == '_')) c++; else result = 0; diff --git a/src/test/test_util.c b/src/test/test_util.c index b0366db..0f64c26 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -4268,18 +4268,23 @@ test_util_hostname_validation(void *arg) tt_assert(string_is_valid_hostname("stanford.edu")); tt_assert(string_is_valid_hostname("multiple-words-with-hypens.jp"));
- // Subdomain name cannot start with '-'. + // Subdomain name cannot start with '-' or '_'. tt_assert(!string_is_valid_hostname("-torproject.org")); tt_assert(!string_is_valid_hostname("subdomain.-domain.org")); tt_assert(!string_is_valid_hostname("-subdomain.domain.org")); + tt_assert(!string_is_valid_hostname("___abc.org"));
// Hostnames cannot contain non-alphanumeric characters. tt_assert(!string_is_valid_hostname("%%domain.\org.")); tt_assert(!string_is_valid_hostname("***x.net")); - tt_assert(!string_is_valid_hostname("___abc.org")); tt_assert(!string_is_valid_hostname("\xff\xffxyz.org")); tt_assert(!string_is_valid_hostname("word1 word2.net"));
+ // Test workaround for nytimes.com stupidity, technically invalid, + // but we allow it since they are big, even though they are failing to + // comply with a ~30 year old standard. + tt_assert(string_is_valid_hostname("core3_euw1.fabrik.nytimes.com")); + // XXX: do we allow single-label DNS names?
done: