[tor-commits] [tor/master] Extend tor_sscanf so it can replace sscanf in rephist.c

nickm at torproject.org nickm at torproject.org
Thu Jun 28 14:04:27 UTC 2012


commit d4285f03df475dccf2a7132e2a8808715f383c24
Author: Nick Mathewson <nickm at torproject.org>
Date:   Thu Oct 6 13:25:04 2011 -0400

    Extend tor_sscanf so it can replace sscanf in rephist.c
    
    Fixes bug 4195 and Coverity CID 448
---
 changes/bug4195      |    6 ++
 src/common/util.c    |  151 +++++++++++++++++++++++++++++++++++++++++++++++---
 src/or/rephist.c     |    6 +-
 src/test/test_util.c |   75 +++++++++++++++++++++++++
 4 files changed, 226 insertions(+), 12 deletions(-)

diff --git a/changes/bug4195 b/changes/bug4195
new file mode 100644
index 0000000..2e7a724
--- /dev/null
+++ b/changes/bug4195
@@ -0,0 +1,6 @@
+  o Minor features:
+    - Enhance our internal sscanf replacement so that we can eliminate
+      the last remaining uses of the system sscanf. (Though those uses
+      of sscanf were safe, sscanf itself is generally error prone, so
+      we want to eliminate when we can.) Fixes ticket 4195 and Coverity
+      CID 448.
diff --git a/src/common/util.c b/src/common/util.c
index d63acea..099cdd3 100644
--- a/src/common/util.c
+++ b/src/common/util.c
@@ -2694,9 +2694,9 @@ digit_to_num(char d)
  * success, store the result in <b>out</b>, advance bufp to the next
  * character, and return 0.  On failure, return -1. */
 static int
-scan_unsigned(const char **bufp, unsigned *out, int width, int base)
+scan_unsigned(const char **bufp, unsigned long *out, int width, int base)
 {
-  unsigned result = 0;
+  unsigned long result = 0;
   int scanned_so_far = 0;
   const int hex = base==16;
   tor_assert(base == 10 || base == 16);
@@ -2708,8 +2708,8 @@ scan_unsigned(const char **bufp, unsigned *out, int width, int base)
   while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
          && scanned_so_far < width) {
     int digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
-    unsigned new_result = result * base + digit;
-    if (new_result > UINT32_MAX || new_result < result)
+    unsigned long new_result = result * base + digit;
+    if (new_result < result)
       return -1; /* over/underflow. */
     result = new_result;
     ++scanned_so_far;
@@ -2722,6 +2722,89 @@ scan_unsigned(const char **bufp, unsigned *out, int width, int base)
   return 0;
 }
 
+/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
+ * characters.  (Handle arbitrary width if <b>width</b> is less than 0.)  On
+ * success, store the result in <b>out</b>, advance bufp to the next
+ * character, and return 0.  On failure, return -1. */
+static int
+scan_signed(const char **bufp, long *out, int width)
+{
+  int neg = 0;
+  unsigned long result = 0;
+
+  if (!bufp || !*bufp || !out)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  if (**bufp == '-') {
+    neg = 1;
+    ++*bufp;
+    --width;
+  }
+
+  if (scan_unsigned(bufp, &result, width, 10) < 0)
+    return -1;
+
+  if (neg) {
+    if (result > ((unsigned long)LONG_MAX) + 1)
+      return -1; /* Underflow */
+    *out = -(long)result;
+  } else {
+    if (result > LONG_MAX)
+      return -1; /* Overflow */
+    *out = (long)result;
+  }
+
+  return 0;
+}
+
+/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
+ * <b>width</b> characters.  (Handle arbitrary width if <b>width</b> is less
+ * than 0.)  On success, store the result in <b>out</b>, advance bufp to the
+ * next character, and return 0.  On failure, return -1. */
+static int
+scan_double(const char **bufp, double *out, int width)
+{
+  int neg = 0;
+  double result = 0;
+  int scanned_so_far = 0;
+
+  if (!bufp || !*bufp || !out)
+    return -1;
+  if (width<0)
+    width=MAX_SCANF_WIDTH;
+
+  if (**bufp == '-') {
+    neg = 1;
+    ++*bufp;
+  }
+
+  while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+    const int digit = digit_to_num(*(*bufp)++);
+    result = result * 10 + digit;
+    ++scanned_so_far;
+  }
+  if (**bufp == '.') {
+    double fracval = 0, denominator = 1;
+    ++*bufp;
+    ++scanned_so_far;
+    while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
+      const int digit = digit_to_num(*(*bufp)++);
+      fracval = fracval * 10 + digit;
+      denominator *= 10;
+      ++scanned_so_far;
+    }
+    result += fracval / denominator;
+  }
+
+  if (!scanned_so_far) /* No actual digits scanned */
+    return -1;
+
+  *out = neg ? -result : result;
+  return 0;
+}
+
 /** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
  * <b>out</b>.  Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
  * to the next non-space character or the EOS. */
@@ -2758,6 +2841,7 @@ tor_vsscanf(const char *buf, const char *pattern, va_list ap)
       }
     } else {
       int width = -1;
+      int longmod = 0;
       ++pattern;
       if (TOR_ISDIGIT(*pattern)) {
         width = digit_to_num(*pattern++);
@@ -2770,17 +2854,57 @@ tor_vsscanf(const char *buf, const char *pattern, va_list ap)
         if (!width) /* No zero-width things. */
           return -1;
       }
+      if (*pattern == 'l') {
+        longmod = 1;
+        ++pattern;
+      }
       if (*pattern == 'u' || *pattern == 'x') {
-        unsigned *u = va_arg(ap, unsigned *);
+        unsigned long u;
         const int base = (*pattern == 'u') ? 10 : 16;
         if (!*buf)
           return n_matched;
-        if (scan_unsigned(&buf, u, width, base)<0)
+        if (scan_unsigned(&buf, &u, width, base)<0)
+          return n_matched;
+        if (longmod) {
+          unsigned long *out = va_arg(ap, unsigned long *);
+          *out = u;
+        } else {
+          unsigned *out = va_arg(ap, unsigned *);
+          if (u > UINT_MAX)
+            return n_matched;
+          *out = u;
+        }
+        ++pattern;
+        ++n_matched;
+      } else if (*pattern == 'f') {
+        double *d = va_arg(ap, double *);
+        if (!longmod)
+          return -1; /* float not supported */
+        if (!*buf)
+          return n_matched;
+        if (scan_double(&buf, d, width)<0)
           return n_matched;
         ++pattern;
         ++n_matched;
+      } else if (*pattern == 'd') {
+        long lng=0;
+        if (scan_signed(&buf, &lng, width)<0)
+          return n_matched;
+        if (longmod) {
+          long *out = va_arg(ap, long *);
+          *out = lng;
+        } else {
+          int *out = va_arg(ap, int *);
+          if (lng < INT_MIN || lng > INT_MAX)
+            return n_matched;
+          *out = (int)lng;
+        }
+        ++pattern;
+        ++n_matched;
       } else if (*pattern == 's') {
         char *s = va_arg(ap, char *);
+        if (longmod)
+          return -1;
         if (width < 0)
           return -1;
         if (scan_string(&buf, s, width)<0)
@@ -2789,6 +2913,8 @@ tor_vsscanf(const char *buf, const char *pattern, va_list ap)
         ++n_matched;
       } else if (*pattern == 'c') {
         char *ch = va_arg(ap, char *);
+        if (longmod)
+          return -1;
         if (width != -1)
           return -1;
         if (!*buf)
@@ -2799,6 +2925,8 @@ tor_vsscanf(const char *buf, const char *pattern, va_list ap)
       } else if (*pattern == '%') {
         if (*buf != '%')
           return n_matched;
+        if (longmod)
+          return -1;
         ++buf;
         ++pattern;
       } else {
@@ -2812,9 +2940,14 @@ tor_vsscanf(const char *buf, const char *pattern, va_list ap)
 
 /** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
  * and store the results in the corresponding argument fields.  Differs from
- * sscanf in that it: Only handles %u, %x, %c and %Ns.  Does not handle
- * arbitrarily long widths. %u and %x do not consume any space.  Is
- * locale-independent.  Returns -1 on malformed patterns.
+ * sscanf in that:
+ * <ul><li>It only handles %u, %lu, %x, %lx, %<NUM>s, %d, %ld, %lf, and %c.
+ *     <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
+ *     <li>It does not handle arbitrarily long widths.
+ *     <li>Numbers do not consume any space characters.
+ *     <li>It is locale-independent.
+ *     <li>%u and %x do not consume any space.
+ *     <li>It returns -1 on malformed patterns.</ul>
  *
  * (As with other locale-independent functions, we need this to parse data that
  * is in ASCII without worrying that the C library's locale-handling will make
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 720d14c..fa02f98 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -1136,7 +1136,7 @@ rep_hist_load_mtbf_data(time_t now)
     wfu_timebuf[0] = '\0';
 
     if (format == 1) {
-      n = sscanf(line, "%40s %ld %lf S=%10s %8s",
+      n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s",
                  hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
       if (n != 3 && n != 5) {
         log_warn(LD_HIST, "Couldn't scan line %s", escaped(line));
@@ -1153,7 +1153,7 @@ rep_hist_load_mtbf_data(time_t now)
       wfu_idx = find_next_with(lines, i+1, "+WFU ");
       if (mtbf_idx >= 0) {
         const char *mtbfline = smartlist_get(lines, mtbf_idx);
-        n = sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s",
+        n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s",
                    &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
         if (n == 2 || n == 4) {
           have_mtbf = 1;
@@ -1164,7 +1164,7 @@ rep_hist_load_mtbf_data(time_t now)
       }
       if (wfu_idx >= 0) {
         const char *wfuline = smartlist_get(lines, wfu_idx);
-        n = sscanf(wfuline, "+WFU %lu %lu S=%10s %8s",
+        n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s",
                    &wt_uptime, &total_wt_time,
                    wfu_timebuf, wfu_timebuf+11);
         if (n == 2 || n == 4) {
diff --git a/src/test/test_util.c b/src/test/test_util.c
index 632ef68..b07fa3b 100644
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@@ -1461,12 +1461,28 @@ test_util_control_formats(void)
   tor_free(out);
 }
 
+#define test_feq(value1,value2) do {                               \
+    double v1 = (value1), v2=(value2);                             \
+    double tf_diff = v1-v2;                                        \
+    double tf_tolerance = ((v1+v2)/2.0)/1e8;                       \
+    if (tf_diff<0) tf_diff=-tf_diff;                               \
+    if (tf_tolerance<0) tf_tolerance=-tf_tolerance;                \
+    if (tf_diff<tf_tolerance) {                                    \
+      TT_BLATHER(("%s ~~ %s: %f ~~ %f",#value1,#value2,v1,v2));  \
+    } else {                                                       \
+      TT_FAIL(("%s ~~ %s: %f != %f",#value1,#value2,v1,v2)); \
+    }                                                              \
+  } while(0)
+
 static void
 test_util_sscanf(void)
 {
   unsigned u1, u2, u3;
   char s1[20], s2[10], s3[10], ch;
   int r;
+  long lng1,lng2;
+  int int1, int2;
+  double d1,d2,d3,d4;
 
   /* Simple tests (malformed patterns, literal matching, ...) */
   test_eq(-1, tor_sscanf("123", "%i", &r)); /* %i is not supported */
@@ -1595,6 +1611,65 @@ test_util_sscanf(void)
   test_eq(4, tor_sscanf("1.2.3 foobar", "%u.%u.%u%c", &u1, &u2, &u3, &ch));
   test_eq(' ', ch);
 
+  r = tor_sscanf("12345 -67890 -1", "%d %ld %d", &int1, &lng1, &int2);
+  test_eq(r,3);
+  test_eq(int1, 12345);
+  test_eq(lng1, -67890);
+  test_eq(int2, -1);
+
+#if SIZEOF_INT == 4
+  r = tor_sscanf("-2147483648. 2147483647.", "%d. %d.", &int1, &int2);
+  test_eq(r,2);
+  test_eq(int1, -2147483648);
+  test_eq(int2, 2147483647);
+
+  r = tor_sscanf("-2147483679.", "%d.", &int1);
+  test_eq(r,0);
+
+  r = tor_sscanf("2147483678.", "%d.", &int1);
+  test_eq(r,0);
+#elif SIZEOF_INT == 8
+  r = tor_sscanf("-9223372036854775808. 9223372036854775807.",
+                 "%d. %d.", &int1, &int2);
+  test_eq(r,2);
+  test_eq(int1, -9223372036854775808);
+  test_eq(int2, 9223372036854775807);
+
+  r = tor_sscanf("-9223372036854775809.", "%d.", &int1);
+  test_eq(r,0);
+
+  r = tor_sscanf("9223372036854775808.", "%d.", &int1);
+  test_eq(r,0);
+#endif
+
+#if SIZEOF_LONG == 4
+  r = tor_sscanf("-2147483648. 2147483647.", "%ld. %ld.", &lng1, &lng2)
+  test_eq(r,2);
+  test_eq(lng1, -2147483647 - 1);
+  test_eq(lng2, 2147483647);
+#elif SIZEOF_LONG == 8
+  r = tor_sscanf("-9223372036854775808. 9223372036854775807.",
+                 "%ld. %ld.", &lng1, &lng2);
+  test_eq(r,2);
+  test_eq(lng1, -9223372036854775807L - 1);
+  test_eq(lng2, 9223372036854775807L);
+
+  r = tor_sscanf("-9223372036854775808. 9223372036854775808.",
+                 "%ld. %ld.", &lng1, &lng2);
+  test_eq(r,1);
+  r = tor_sscanf("-9223372036854775809. 9223372036854775808.",
+                 "%ld. %ld.", &lng1, &lng2);
+  test_eq(r,0);
+#endif
+
+  r = tor_sscanf("123.456 .000007 -900123123.2000787 00003.2",
+                 "%lf %lf %lf %lf", &d1,&d2,&d3,&d4);
+  test_eq(r,4);
+  test_feq(d1, 123.456);
+  test_feq(d2, .000007);
+  test_feq(d3, -900123123.2000787);
+  test_feq(d4, 3.2);
+
  done:
   ;
 }





More information about the tor-commits mailing list