[tor-commits] [tor/master] Refactor out command line formatting

nickm at torproject.org nickm at torproject.org
Tue Aug 30 19:58:40 UTC 2011


commit bc97f410802d5b9c66bfba6aebeae1ecd70f8857
Author: Steven Murdoch <Steven.Murdoch at cl.cam.ac.uk>
Date:   Mon Aug 29 12:26:55 2011 +0100

    Refactor out command line formatting
    
    Now correctly handles whitespace, quotes and backslashes. Passes all unit tests.
---
 src/common/util.c    |  108 ++++++++++++++++++++++++++++++++++++++++++++++----
 src/common/util.h    |    1 +
 src/test/test_util.c |   49 ++++++++++++++++++++++
 3 files changed, 150 insertions(+), 8 deletions(-)

diff --git a/src/common/util.c b/src/common/util.c
index dd2d111..87c6fb5 100644
--- a/src/common/util.c
+++ b/src/common/util.c
@@ -2953,6 +2953,105 @@ load_windows_system_library(const TCHAR *library_name)
 }
 #endif
 
+/* Format a single argument for being put on a Windows command line.
+ * Returns a newly allocated string */
+static char *
+format_cmdline_argument(const char *arg)
+{
+  char *formatted_arg;
+  char need_quotes;
+  const char *c;
+  int i;
+  int bs_counter = 0;
+  /* Backslash we can point to when one is inserted into the string */
+  const char backslash = '\\';
+
+  /* Smartlist of *char */
+  smartlist_t *arg_chars;
+  arg_chars = smartlist_create();
+
+  /* Quote string if it contains whitespace or is empty */
+  need_quotes = (strchr(arg, ' ') || strchr(arg, '\t') || '\0' == arg[0]);
+
+  /* Build up smartlist of *chars */
+  for (c=arg; *c != '\0'; c++) {
+    if ('"' == *c) {
+      /* Double up backslashes preceding a quote */
+      for (i=0; i<(bs_counter*2); i++)
+        smartlist_add(arg_chars, (void*)&backslash);
+      bs_counter = 0;
+      /* Escape the quote */
+      smartlist_add(arg_chars, (void*)&backslash);
+      smartlist_add(arg_chars, (void*)c);
+    } else if ('\\' == *c) {
+      /* Count backslashes until we know whether to double up */
+      bs_counter++;
+    } else {
+      /* Don't double up slashes preceding a non-quote */
+      for (i=0; i<bs_counter; i++)
+        smartlist_add(arg_chars, (void*)&backslash);
+      bs_counter = 0;
+      smartlist_add(arg_chars, (void*)c);
+    }
+  }
+  /* Don't double up trailing backslashes */
+  for (i=0; i<bs_counter; i++)
+    smartlist_add(arg_chars, (void*)&backslash);
+
+  /* Allocate space for argument, quotes (if needed), and terminator */
+  formatted_arg = tor_malloc(sizeof(char) *
+      (smartlist_len(arg_chars) + (need_quotes?2:0) + 1));
+
+  /* Add leading quote */
+  i=0;
+  if (need_quotes)
+    formatted_arg[i++] = '"';
+
+  /* Add characters */
+  SMARTLIST_FOREACH(arg_chars, char*, c,
+  {
+    formatted_arg[i++] = *c;
+  });
+
+  /* Add trailing quote */
+  if (need_quotes)
+    formatted_arg[i++] = '"';
+  formatted_arg[i] = '\0';
+
+  smartlist_free(arg_chars);
+  return formatted_arg;
+}
+
+/* Format a command line for use on Windows, which takes the command as a
+ * string rather than string array. Follows the rules from "Parsing C++
+ * Command-Line Arguments" in MSDN. Algorithm based on list2cmdline in the
+ * Python subprocess module. Returns a newly allocated string */
+char *
+tor_join_cmdline(const char *argv[])
+{
+  smartlist_t *argv_list;
+  char *joined_argv;
+  int i;
+
+  /* Format each argument and put the result in a smartlist */
+  argv_list = smartlist_create();
+  for (i=0; argv[i] != NULL; i++) {
+    smartlist_add(argv_list, (void *)format_cmdline_argument(argv[i]));
+  }
+
+  /* Join the arguments with whitespace */
+  joined_argv = smartlist_join_strings(argv_list, " ", 0, NULL);
+
+  /* Free the newly allocated arguments, and the smartlist */
+  SMARTLIST_FOREACH(argv_list, char *, arg,
+  {
+    tor_free(arg);
+  });
+  smartlist_free(argv_list);
+
+  return joined_argv;
+}
+
 /** Format <b>child_state</b> and <b>saved_errno</b> as a hex string placed in
  * <b>hex_errno</b>.  Called between fork and _exit, so must be signal-handler
  * safe.
@@ -3068,9 +3167,7 @@ tor_spawn_background(const char *const filename, const char **argv,
   BOOL retval = FALSE;
 
   SECURITY_ATTRIBUTES saAttr;
-  smartlist_t *argv_list;
   char *joined_argv;
-  int i;
 
   /* process_handle must not be NULL */
   tor_assert(process_handle != NULL);
@@ -3116,12 +3213,7 @@ tor_spawn_background(const char *const filename, const char **argv,
 
   /* Windows expects argv to be a whitespace delimited string, so join argv up
    */
-  argv_list = smartlist_create();
-  for (i=0; argv[i] != NULL; i++) {
-    smartlist_add(argv_list, (void *)argv[i]);
-  }
-
-  joined_argv = smartlist_join_strings(argv_list, " ", 0, NULL);
+  joined_argv = tor_join_cmdline(argv);
 
   ZeroMemory(&(process_handle->pid), sizeof(PROCESS_INFORMATION));
   ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
diff --git a/src/common/util.h b/src/common/util.h
index d8c7370..9cdd8cb 100644
--- a/src/common/util.h
+++ b/src/common/util.h
@@ -393,6 +393,7 @@ ssize_t tor_read_all_from_process_stdout(const process_handle_t process_handle,
                                         char *buf, size_t count);
 ssize_t tor_read_all_from_process_stderr(const process_handle_t process_handle,
                                          char *buf, size_t count);
+char *tor_join_cmdline(const char *argv[]);
 void format_helper_exit_status(unsigned char child_state,
                                int saved_errno, char *hex_errno);
 
diff --git a/src/test/test_util.c b/src/test/test_util.c
index 4568fde..9df7bc6 100644
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@@ -1555,6 +1555,54 @@ test_util_spawn_background_partial_read(void *ptr)
 }
 
 static void
+test_util_join_cmdline(void *ptr)
+{
+  /* Based on some test cases from "Parsing C++ Command-Line Arguments" in MSDN
+   * but we don't exercise all quoting rules because tor_join_cmdline will try
+   * to only generate simple cases for the child process to parse; i.e. we
+   * never embed quoted strings in arguments. */
+
+  const char *argvs[][4] = {
+    {"a", "bb", "CCC", NULL}, // Normal
+    {NULL, NULL, NULL, NULL}, // Empty argument list
+    {"", NULL, NULL, NULL}, // Empty argument
+    {"\"a", "b\"b", "CCC\"", NULL}, // Quotes
+    {"a\tbc", "dd  dd", "E", NULL}, // Whitespace
+    {"a\\\\\\b", "de fg", "H", NULL}, // Backslashes
+    {"a\\\"b", "\\c", "D\\", NULL}, // Backslashes before quote
+    {"a\\\\b c", "d", "E", NULL}, // Backslashes not before quote
+    {} // Terminator
+  };
+
+  const char *cmdlines[] = {
+    "a bb CCC",
+    "",
+    "\"\"",
+    "\\\"a b\\\"b CCC\\\"",
+    "\"a\tbc\" \"dd  dd\" E",
+    "a\\\\\\b \"de fg\" H",
+    "a\\\\\\\"b \\c D\\",
+    "\"a\\\\b c\" d E",
+    NULL // Terminator
+  };
+
+  int i;
+  char *joined_argv;
+
+  (void)ptr;
+
+  for (i=0; cmdlines[i]!=NULL; i++) {
+    log_info(LD_GENERAL, "Joining argvs[%d], expecting <%s>", i, cmdlines[i]);
+    joined_argv = tor_join_cmdline(argvs[i]);
+    tt_str_op(joined_argv, ==, cmdlines[i]);
+    tor_free(joined_argv);
+  }
+
+ done:
+  ;
+}
+
+static void
 test_util_di_ops(void)
 {
 #define LT -1
@@ -1642,6 +1690,7 @@ struct testcase_t util_tests[] = {
   UTIL_TEST(spawn_background_ok, 0),
   UTIL_TEST(spawn_background_fail, 0),
   UTIL_TEST(spawn_background_partial_read, 0),
+  UTIL_TEST(join_cmdline, 0),
   END_OF_TESTCASES
 };
 





More information about the tor-commits mailing list