commit bc97f410802d5b9c66bfba6aebeae1ecd70f8857 Author: Steven Murdoch Steven.Murdoch@cl.cam.ac.uk Date: Mon Aug 29 12:26:55 2011 +0100
Refactor out command line formatting
Now correctly handles whitespace, quotes and backslashes. Passes all unit tests. --- src/common/util.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++---- src/common/util.h | 1 + src/test/test_util.c | 49 ++++++++++++++++++++++ 3 files changed, 150 insertions(+), 8 deletions(-)
diff --git a/src/common/util.c b/src/common/util.c index dd2d111..87c6fb5 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -2953,6 +2953,105 @@ load_windows_system_library(const TCHAR *library_name) } #endif
+/* Format a single argument for being put on a Windows command line. + * Returns a newly allocated string */ +static char * +format_cmdline_argument(const char *arg) +{ + char *formatted_arg; + char need_quotes; + const char *c; + int i; + int bs_counter = 0; + /* Backslash we can point to when one is inserted into the string */ + const char backslash = '\'; + + /* Smartlist of *char */ + smartlist_t *arg_chars; + arg_chars = smartlist_create(); + + /* Quote string if it contains whitespace or is empty */ + need_quotes = (strchr(arg, ' ') || strchr(arg, '\t') || '\0' == arg[0]); + + /* Build up smartlist of *chars */ + for (c=arg; *c != '\0'; c++) { + if ('"' == *c) { + /* Double up backslashes preceding a quote */ + for (i=0; i<(bs_counter*2); i++) + smartlist_add(arg_chars, (void*)&backslash); + bs_counter = 0; + /* Escape the quote */ + smartlist_add(arg_chars, (void*)&backslash); + smartlist_add(arg_chars, (void*)c); + } else if ('\' == *c) { + /* Count backslashes until we know whether to double up */ + bs_counter++; + } else { + /* Don't double up slashes preceding a non-quote */ + for (i=0; i<bs_counter; i++) + smartlist_add(arg_chars, (void*)&backslash); + bs_counter = 0; + smartlist_add(arg_chars, (void*)c); + } + } + /* Don't double up trailing backslashes */ + for (i=0; i<bs_counter; i++) + smartlist_add(arg_chars, (void*)&backslash); + + /* Allocate space for argument, quotes (if needed), and terminator */ + formatted_arg = tor_malloc(sizeof(char) * + (smartlist_len(arg_chars) + (need_quotes?2:0) + 1)); + + /* Add leading quote */ + i=0; + if (need_quotes) + formatted_arg[i++] = '"'; + + /* Add characters */ + SMARTLIST_FOREACH(arg_chars, char*, c, + { + formatted_arg[i++] = *c; + }); + + /* Add trailing quote */ + if (need_quotes) + formatted_arg[i++] = '"'; + formatted_arg[i] = '\0'; + + smartlist_free(arg_chars); + return formatted_arg; +} + +/* Format a command line for use on Windows, which takes the command as a + * string rather than string array. Follows the rules from "Parsing C++ + * Command-Line Arguments" in MSDN. Algorithm based on list2cmdline in the + * Python subprocess module. Returns a newly allocated string */ +char * +tor_join_cmdline(const char *argv[]) +{ + smartlist_t *argv_list; + char *joined_argv; + int i; + + /* Format each argument and put the result in a smartlist */ + argv_list = smartlist_create(); + for (i=0; argv[i] != NULL; i++) { + smartlist_add(argv_list, (void *)format_cmdline_argument(argv[i])); + } + + /* Join the arguments with whitespace */ + joined_argv = smartlist_join_strings(argv_list, " ", 0, NULL); + + /* Free the newly allocated arguments, and the smartlist */ + SMARTLIST_FOREACH(argv_list, char *, arg, + { + tor_free(arg); + }); + smartlist_free(argv_list); + + return joined_argv; +} + /** Format <b>child_state</b> and <b>saved_errno</b> as a hex string placed in * <b>hex_errno</b>. Called between fork and _exit, so must be signal-handler * safe. @@ -3068,9 +3167,7 @@ tor_spawn_background(const char *const filename, const char **argv, BOOL retval = FALSE;
SECURITY_ATTRIBUTES saAttr; - smartlist_t *argv_list; char *joined_argv; - int i;
/* process_handle must not be NULL */ tor_assert(process_handle != NULL); @@ -3116,12 +3213,7 @@ tor_spawn_background(const char *const filename, const char **argv,
/* Windows expects argv to be a whitespace delimited string, so join argv up */ - argv_list = smartlist_create(); - for (i=0; argv[i] != NULL; i++) { - smartlist_add(argv_list, (void *)argv[i]); - } - - joined_argv = smartlist_join_strings(argv_list, " ", 0, NULL); + joined_argv = tor_join_cmdline(argv);
ZeroMemory(&(process_handle->pid), sizeof(PROCESS_INFORMATION)); ZeroMemory(&siStartInfo, sizeof(STARTUPINFO)); diff --git a/src/common/util.h b/src/common/util.h index d8c7370..9cdd8cb 100644 --- a/src/common/util.h +++ b/src/common/util.h @@ -393,6 +393,7 @@ ssize_t tor_read_all_from_process_stdout(const process_handle_t process_handle, char *buf, size_t count); ssize_t tor_read_all_from_process_stderr(const process_handle_t process_handle, char *buf, size_t count); +char *tor_join_cmdline(const char *argv[]); void format_helper_exit_status(unsigned char child_state, int saved_errno, char *hex_errno);
diff --git a/src/test/test_util.c b/src/test/test_util.c index 4568fde..9df7bc6 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -1555,6 +1555,54 @@ test_util_spawn_background_partial_read(void *ptr) }
static void +test_util_join_cmdline(void *ptr) +{ + /* Based on some test cases from "Parsing C++ Command-Line Arguments" in MSDN + * but we don't exercise all quoting rules because tor_join_cmdline will try + * to only generate simple cases for the child process to parse; i.e. we + * never embed quoted strings in arguments. */ + + const char *argvs[][4] = { + {"a", "bb", "CCC", NULL}, // Normal + {NULL, NULL, NULL, NULL}, // Empty argument list + {"", NULL, NULL, NULL}, // Empty argument + {""a", "b"b", "CCC"", NULL}, // Quotes + {"a\tbc", "dd dd", "E", NULL}, // Whitespace + {"a\\\b", "de fg", "H", NULL}, // Backslashes + {"a\"b", "\c", "D\", NULL}, // Backslashes before quote + {"a\\b c", "d", "E", NULL}, // Backslashes not before quote + {} // Terminator + }; + + const char *cmdlines[] = { + "a bb CCC", + "", + """", + "\"a b\"b CCC\"", + ""a\tbc" "dd dd" E", + "a\\\b "de fg" H", + "a\\\"b \c D\", + ""a\\b c" d E", + NULL // Terminator + }; + + int i; + char *joined_argv; + + (void)ptr; + + for (i=0; cmdlines[i]!=NULL; i++) { + log_info(LD_GENERAL, "Joining argvs[%d], expecting <%s>", i, cmdlines[i]); + joined_argv = tor_join_cmdline(argvs[i]); + tt_str_op(joined_argv, ==, cmdlines[i]); + tor_free(joined_argv); + } + + done: + ; +} + +static void test_util_di_ops(void) { #define LT -1 @@ -1642,6 +1690,7 @@ struct testcase_t util_tests[] = { UTIL_TEST(spawn_background_ok, 0), UTIL_TEST(spawn_background_fail, 0), UTIL_TEST(spawn_background_partial_read, 0), + UTIL_TEST(join_cmdline, 0), END_OF_TESTCASES };