[or-cvs] r18760: {tor} Add a simple locale-independent no-surprises sscanf replacem (in tor/trunk/src: common or)

nickm at seul.org nickm at seul.org
Tue Mar 3 18:02:31 UTC 2009


Author: nickm
Date: 2009-03-03 13:02:31 -0500 (Tue, 03 Mar 2009)
New Revision: 18760

Modified:
   tor/trunk/src/common/util.c
   tor/trunk/src/common/util.h
   tor/trunk/src/or/test.c
Log:
Add a simple locale-independent no-surprises sscanf replacement.

tor_sscanf() only handles %u and %s for now, which will make it
adequate to replace sscanf() for date/time/IP parsing.  We want this
to prevent attackers from constructing weirdly formed descriptors,
cells, addresses, HTTP responses, etc, that validate under some
locales but not others.

Modified: tor/trunk/src/common/util.c
===================================================================
--- tor/trunk/src/common/util.c	2009-03-03 15:38:44 UTC (rev 18759)
+++ tor/trunk/src/common/util.c	2009-03-03 18:02:31 UTC (rev 18760)
@@ -2185,6 +2185,145 @@
   }
 }
 
+#define MAX_SCANF_WIDTH 9999
+
+/** DOCDOC */
+static int
+digit_to_num(char d)
+{
+  int num = ((int)d) - (int)'0';
+  tor_assert(num <= 9 && num >= 0);
+  return num;
+}
+
+/** DOCDOC */
+static int
+scan_unsigned(const char **bufp, unsigned *out, int width)
+{
+  unsigned result = 0;
+  int scanned_so_far = 0;
+  if (!bufp || !*bufp)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+    int digit = digit_to_num(*(*bufp)++);
+    unsigned new_result = result * 10 + digit;
+    if (new_result > UINT32_MAX || new_result < result)
+      return -1; /* over/underflow. */
+    result = new_result;
+    ++scanned_so_far;
+  }
+
+  if (!scanned_so_far) /* No actual digits scanned */
+    return -1;
+
+  *out = result;
+  return 0;
+}
+
+/** DOCDOC */
+static int
+scan_string(const char **bufp, char *out, int width)
+{
+  int scanned_so_far = 0;
+  if (!bufp || width < 0)
+    return -1;
+  while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
+    *out++ = *(*bufp)++;
+    ++scanned_so_far;
+  }
+  *out = '\0';
+  return 0;
+}
+
+/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
+ * restricted pattern format.  For more info on what it supports, see
+ * tor_sscanf() documentation.  */
+int
+tor_vsscanf(const char *buf, const char *pattern, va_list ap)
+{
+  int n_matched = 0;
+
+  while (*pattern) {
+    if (*pattern != '%') {
+      if (*buf == *pattern) {
+        ++buf;
+        ++pattern;
+        continue;
+      } else {
+        return n_matched;
+      }
+    } else {
+      int width = -1;
+      ++pattern;
+      if (TOR_ISDIGIT(*pattern)) {
+        width = digit_to_num(*pattern++);
+        while (TOR_ISDIGIT(*pattern)) {
+          width *= 10;
+          width += digit_to_num(*pattern++);
+          if (width > MAX_SCANF_WIDTH)
+            return -1;
+        }
+        if (!width) /* No zero-width things. */
+          return -1;
+      }
+      if (*pattern == 'u') {
+        unsigned *u = va_arg(ap, unsigned *);
+        if (!*buf)
+          return n_matched;
+        if (scan_unsigned(&buf, u, width)<0)
+          return n_matched;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 's') {
+        char *s = va_arg(ap, char *);
+        if (width < 0)
+          return -1;
+        if (scan_string(&buf, s, width)<0)
+          return n_matched;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 'c') {
+        char *ch = va_arg(ap, char *);
+        if (width != -1)
+          return -1;
+        if (!*buf)
+          return n_matched;
+        *ch = *buf++;
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == '%') {
+        if (*buf != '%')
+          return -1;
+        ++buf;
+        ++pattern;
+      } else {
+        return -1; /* Unrecognized pattern component. */
+      }
+    }
+  }
+
+  return n_matched;
+}
+
+/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
+ * and store the results in the corresponding argument fields.  Differs from
+ * sscanf in that it: Only handles %u and %Ns.  Does not handle arbitrarily
+ * long widths. %u does not consume any space.  Is locale-independent.
+ * Returns -1 on malformed  */
+int
+tor_sscanf(const char *buf, const char *pattern, ...)
+{
+  int r;
+  va_list ap;
+  va_start(ap, pattern);
+  r = tor_vsscanf(buf, pattern, ap);
+  va_end(ap);
+  return r;
+}
+
 /** Return a new list containing the filenames in the directory <b>dirname</b>.
  * Return NULL on error or if <b>dirname</b> is not a directory.
  */

Modified: tor/trunk/src/common/util.h
===================================================================
--- tor/trunk/src/common/util.h	2009-03-03 15:38:44 UTC (rev 18759)
+++ tor/trunk/src/common/util.h	2009-03-03 18:02:31 UTC (rev 18760)
@@ -195,6 +195,12 @@
 struct smartlist_t;
 void wrap_string(struct smartlist_t *out, const char *string, size_t width,
                  const char *prefix0, const char *prefixRest);
+int tor_vsscanf(const char *buf, const char *pattern, va_list ap);
+int tor_sscanf(const char *buf, const char *pattern, ...)
+#ifdef __GNUC__
+  __attribute__((format(scanf, 2, 3)))
+#endif
+  ;
 
 int hex_decode_digit(char c);
 void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen);

Modified: tor/trunk/src/or/test.c
===================================================================
--- tor/trunk/src/or/test.c	2009-03-03 15:38:44 UTC (rev 18759)
+++ tor/trunk/src/or/test.c	2009-03-03 18:02:31 UTC (rev 18760)
@@ -2747,6 +2747,89 @@
   tor_free(out);
 }
 
+static void
+test_util_sscanf(void)
+{
+  unsigned u1, u2, u3;
+  char s1[10], s2[10], s3[10], ch;
+  int r;
+
+  r = tor_sscanf("hello world", "hello world"); /* String match: success */
+  test_eq(r, 0);
+  r = tor_sscanf("hello world 3", "hello worlb %u", &u1); /* String fail */
+  test_eq(r, 0);
+  r = tor_sscanf("12345", "%u", &u1); /* Simple number */
+  test_eq(r, 1);
+  test_eq(u1, 12345u);
+  r = tor_sscanf("", "%u", &u1); /* absent number */
+  test_eq(r, 0);
+  r = tor_sscanf("A", "%u", &u1); /* bogus number */
+  test_eq(r, 0);
+  r = tor_sscanf("4294967295", "%u", &u1); /* UINT32_MAX should work. */
+  test_eq(r, 1);
+  test_eq(u1, 4294967295u);
+  r = tor_sscanf("4294967296", "%u", &u1); /* Always say -1 at 32 bits. */
+  test_eq(r, 0);
+  r = tor_sscanf("123456", "%2u%u", &u1, &u2); /* Width */
+  test_eq(r, 2);
+  test_eq(u1, 12u);
+  test_eq(u2, 3456u);
+  r = tor_sscanf("!12:3:456", "!%2u:%2u:%3u", &u1, &u2, &u3); /* separators */
+  test_eq(r, 3);
+  test_eq(u1, 12u);
+  test_eq(u2, 3u);
+  test_eq(u3, 456u);
+  r = tor_sscanf("12:3:045", "%2u:%2u:%3u", &u1, &u2, &u3); /* 0s */
+  test_eq(r, 3);
+  test_eq(u1, 12u);
+  test_eq(u2, 3u);
+  test_eq(u3, 45u);
+  /* %u does not match space.*/
+  r = tor_sscanf("12:3: 45", "%2u:%2u:%3u", &u1, &u2, &u3);
+  test_eq(r, 2);
+  /* %u does not match negative numbers. */
+  r = tor_sscanf("12:3:-4", "%2u:%2u:%3u", &u1, &u2, &u3);
+  test_eq(r, 2);
+  /* Arbitrary amounts of 0-padding are okay */
+  r = tor_sscanf("12:03:000000000000000099", "%2u:%2u:%u", &u1, &u2, &u3);
+  test_eq(r, 3);
+  test_eq(u1, 12u);
+  test_eq(u2, 3u);
+  test_eq(u3, 99u);
+  r = tor_sscanf("hello", "%s", s1); /* %s needs a number. */
+  test_eq(r, -1);
+
+  r = tor_sscanf("hello", "%3s%7s", s1, s2); /* %s matches characters. */
+  test_eq(r, 2);
+  test_streq(s1, "hel");
+  test_streq(s2, "lo");
+  r = tor_sscanf("WD40", "%2s%u", s3, &u1); /* %s%u */
+  test_eq(r, 2);
+  test_streq(s3, "WD");
+  test_eq(u1, 40);
+  r = tor_sscanf("76trombones", "%6u%9s", &u1, s1); /* %u%s */
+  test_eq(r, 2);
+  test_eq(u1, 76);
+  test_streq(s1, "trombones");
+  r = tor_sscanf("hello world", "%9s %9s", s1, s2); /* %s doesn't eat space. */
+  test_eq(r, 2);
+  test_streq(s1, "hello");
+  test_streq(s2, "world");
+  r = tor_sscanf("hi", "%9s%9s%3s", s1, s2, s3); /* %s can be empty. */
+  test_eq(r, 3);
+  test_streq(s1, "hi");
+  test_streq(s2, "");
+  test_streq(s3, "");
+
+  r = tor_sscanf("1.2.3", "%u.%u.%u%c", &u1, &u2, &u3, &ch);
+  test_eq(r, 3);
+  r = tor_sscanf("1.2.3 foobar", "%u.%u.%u%c", &u1, &u2, &u3, &ch);
+  test_eq(r, 4);
+
+ done:
+  ;
+}
+
 /** Run unit tests for the onion handshake code. */
 static void
 test_onion_handshake(void)
@@ -4665,6 +4748,7 @@
   SUBENT(util, mmap),
   SUBENT(util, threads),
   SUBENT(util, order_functions),
+  SUBENT(util, sscanf),
   ENT(onion_handshake),
   ENT(dir_format),
   ENT(dirutil),



More information about the tor-commits mailing list