[tor-commits] [stegotorus/master] Add APRAdb registration helper support.

zwol at torproject.org zwol at torproject.org
Fri Jul 20 23:17:08 UTC 2012


commit a6051fba5236f7866a3c0967b08e771224948816
Author: Zack Weinberg <zackw at cmu.edu>
Date:   Sat Jun 2 19:18:10 2012 -0700

    Add APRAdb registration helper support.
---
 Makefile.am            |    1 +
 configure.ac           |    3 +-
 src/audit-globals.sh   |   11 +-
 src/listener.h         |   15 ++
 src/main.cc            |   69 +++++++-
 src/network.cc         |   16 +--
 src/protocol.h         |   25 ++-
 src/protocol/chop.cc   |   12 +-
 src/protocol/null.cc   |    5 +-
 src/steg.h             |    6 +-
 src/subprocess-unix.cc |  468 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/subprocess.h       |   68 +++++++
 12 files changed, 667 insertions(+), 32 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 88939fd..a21f053 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -37,6 +37,7 @@ libstegotorus_a_SOURCES = \
 	src/protocol.cc \
 	src/rng.cc \
 	src/socks.cc \
+	src/subprocess-unix.cc \
 	src/steg.cc \
 	src/util.cc \
 	$(PROTOCOLS) $(STEGANOGRAPHERS)
diff --git a/configure.ac b/configure.ac
index 6dc4fe3..4c23af6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,8 @@ LIBS="$LIBS $ws32_LIBS"
 
 ### System features ###
 
-AC_CHECK_HEADERS([execinfo.h],,,[/**/])
+AC_CHECK_HEADERS([execinfo.h paths.h],,,[/**/])
+AC_CHECK_FUNCS([closefrom execvpe])
 
 ### Output ###
 
diff --git a/src/audit-globals.sh b/src/audit-globals.sh
index 2a94462..13565aa 100644
--- a/src/audit-globals.sh
+++ b/src/audit-globals.sh
@@ -27,27 +27,28 @@ sed '
   # them.  The above commands have stripped any leading src/ and/or
   # .o or .obj extension.
 
-  # These are genuinely OK.
   /^compression ZLIB_CEILING$/d
   /^compression ZLIB_UINT_MAX$/d
   /^connections circuits$/d
-  /^connections connections$/d
   /^connections closing_all_connections$/d
+  /^connections connections$/d
   /^connections last_ckt_serial$/d
   /^connections last_conn_serial$/d
   /^connections shutting_down$/d
+  /^crypt init_crypto()::initialized$/d
+  /^crypt log_crypto()::initialized$/d
   /^main allow_kq$/d
-  /^main the_event_base$/d
   /^main handle_signal_cb(int, short, void\*)::got_sigint$/d
+  /^main registration_helper$/d
+  /^main the_event_base$/d
   /^network listeners$/d
   /^rng rng$/d
+  /^subprocess-unix already_waited$/d
   /^util log_dest$/d
   /^util log_min_sev$/d
   /^util log_timestamps$/d
   /^util log_ts_base$/d
   /^util the_evdns_base$/d
-  /^crypt log_crypto()::initialized$/d
-  /^crypt init_crypto()::initialized$/d
 ')
 
 if [ -n "$symbols" ]; then
diff --git a/src/listener.h b/src/listener.h
index 6a73d1d..6813686 100644
--- a/src/listener.h
+++ b/src/listener.h
@@ -4,8 +4,23 @@
 #ifndef LISTENER_H
 #define LISTENER_H
 
+#include <vector>
+
+/**
+  This struct defines the state of a listener on a particular address.
+ */
+struct listener_t
+{
+  config_t *cfg;
+  struct evconnlistener *listener;
+  char *address;
+  size_t index;
+};
+
 /* returns 1 on success, 0 on failure */
 int listener_open(struct event_base *base, config_t *cfg);
 void listener_close_all(void);
 
+std::vector<listener_t *> const& get_all_listeners();
+
 #endif
diff --git a/src/main.cc b/src/main.cc
index 8c1407f..9cc1300 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -10,6 +10,8 @@
 #include "crypt.h"
 #include "listener.h"
 #include "protocol.h"
+#include "steg.h"
+#include "subprocess.h"
 
 #include <vector>
 #include <string>
@@ -35,6 +37,7 @@ using std::string;
 
 static struct event_base *the_event_base;
 static bool allow_kq = false;
+static string registration_helper;
 
 /**
    Puts stegotorus's networking subsystem on "closing time" mode. This
@@ -160,6 +163,59 @@ stdin_detect_eof_cb(evutil_socket_t fd, short, void *arg)
 }
 
 /**
+   APRAdb registration hook.
+*/
+static void
+call_registration_helper(string const& helper)
+{
+  vector<string> env = get_environ("ST_");
+  env.push_back("ST_SERVER_KEY=placeholder_server_key");
+
+  vector<listener_t*> const& listeners = get_all_listeners();
+  vector<listener_t*>::const_iterator el;
+  unsigned int n = 0;
+  char buf[512];
+
+  for (el = listeners.begin(); el != listeners.end(); el++, n++) {
+    const steg_config_t *sc = (*el)->cfg->get_steg((*el)->index);
+    if (!sc)
+      continue;
+
+    // The address is in the form x.y.z.w:port or [a:b:c...]:port.
+    // We want IP and port in separate strings.  Also, in the latter
+    // case, we want to get rid of the square brackets.
+    string ap((*el)->address);
+    size_t colon = ap.rfind(':');
+    string addr(ap, 0, colon);
+    string port(ap, colon+1);
+
+    if (addr[0] == '[') {
+      addr.erase(addr.size()-1, 1);
+      addr.erase(0,1);
+    }
+
+    if (xsnprintf(buf, sizeof buf, "ST_LISTENER_%u=%s,tcp,%s,%s",
+                  n, addr.c_str(), port.c_str(), sc->name()) == -1) {
+      log_warn("listener %u info is too big", n);
+      continue;
+    }
+    env.push_back(buf);
+  }
+
+  vector<string> args;
+  args.push_back(helper);
+  subprocess h = subprocess::call(args, env);
+  if (h.state == CLD_DUMPED) {
+    log_warn("%s: %s (core dumped)", helper.c_str(), strsignal(h.returncode));
+  } else if (h.state == CLD_KILLED) {
+    log_warn("%s: %s", helper.c_str(), strsignal(h.returncode));
+  } else if (h.state == CLD_EXITED && h.returncode != 0) {
+    log_warn("%s: exited unsuccessfully, status %d",
+             helper.c_str(), h.returncode);
+  }
+}
+
+/**
    Prints usage instructions then exits.
 */
 static void ATTR_NORETURN
@@ -200,6 +256,7 @@ handle_generic_args(const char *const *argv)
   bool logsev_set = false;
   bool allow_kq_set = false;
   bool timestamps_set = false;
+  bool registration_helper_set=false;
   int i = 1;
 
   while (argv[i] &&
@@ -250,8 +307,15 @@ handle_generic_args(const char *const *argv)
       }
       allow_kq = true;
       allow_kq_set = true;
+    } else if (!strncmp(argv[i], "--registration-helper=", 22)) {
+      if (registration_helper_set) {
+        fprintf(stderr, "you've already set a registration helper!\n");
+        exit(1);
+      }
+      registration_helper = string(argv[i]+22);
+      registration_helper_set = true;
     } else {
-      fprintf(stderr, "unrecognizable argument '%s'", argv[i]);
+      fprintf(stderr, "unrecognizable argument '%s'\n", argv[i]);
       exit(1);
     }
     i++;
@@ -409,6 +473,9 @@ main(int, const char *const *argv)
       log_abort("failed to open listeners for configuration %lu",
                 (unsigned long)(i - configs.begin()) + 1);
 
+  if (!registration_helper.empty())
+    call_registration_helper(registration_helper);
+
   /* We are go for launch. As a signal to any monitoring process that may
      be running, close stdout now. */
   log_info("%s process %lu now initialized", argv[0], (unsigned long)getpid());
diff --git a/src/network.cc b/src/network.cc
index f436ef4..b5f6d4b 100644
--- a/src/network.cc
+++ b/src/network.cc
@@ -20,17 +20,6 @@
 
 using std::vector;
 
-/**
-  This struct defines the state of a listener on a particular address.
- */
-struct listener_t
-{
-  config_t *cfg;
-  struct evconnlistener *listener;
-  char *address;
-  size_t index;
-};
-
 /** All our listeners. */
 static vector<listener_t *> listeners;
 
@@ -57,6 +46,11 @@ static void downstream_event_cb(struct bufferevent *bev, short what, void *arg);
 static void create_outbound_connections(circuit_t *ckt, bool is_socks);
 static void create_outbound_connections_socks(circuit_t *ckt);
 
+vector<listener_t *> const& get_all_listeners()
+{
+  return listeners;
+}
+
 /**
    This function opens listening sockets configured according to the
    provided 'config_t'.  Returns 1 on success, 0 on failure.
diff --git a/src/protocol.h b/src/protocol.h
index fbc459e..a2e467d 100644
--- a/src/protocol.h
+++ b/src/protocol.h
@@ -7,6 +7,7 @@
 #define PROTOCOL_H
 
 struct proto_module;
+struct steg_config_t;
 
 /** A 'config_t' is a set of addresses to listen on, and what to do
     when connections are received.  A protocol module must define a
@@ -29,7 +30,7 @@ struct config_t
   /** Return the name of the protocol associated with this
       configuration.  You do not have to define this method in your
       subclass, PROTO_DEFINE_MODULE does it for you. */
-  virtual const char *name() = 0;
+  virtual const char *name() const = 0;
 
   /** Initialize yourself from a set of command line options.  This is
       separate from the subclass constructor so that it can fail:
@@ -42,7 +43,7 @@ struct config_t
       users of this function should call it repeatedly with successive
       values of N, starting from zero, until it returns NULL, and
       create listeners for every address returned. */
-  virtual evutil_addrinfo *get_listen_addrs(size_t n) = 0;
+  virtual evutil_addrinfo *get_listen_addrs(size_t n) const = 0;
 
   /** Return a set of addresses to attempt an outbound connection to,
       in the form of an 'evutil_addrinfo' linked list.  As with
@@ -50,7 +51,12 @@ struct config_t
       should in general attempt simultaneous connection to at least
       one address from every list.  The maximum N is indicated in the
       same way as for get_listen_addrs.  */
-  virtual evutil_addrinfo *get_target_addrs(size_t n) = 0;
+  virtual evutil_addrinfo *get_target_addrs(size_t n) const = 0;
+
+  /** Return the steganography module associated with either listener
+      or target address set N.  If called on a protocol that doesn't
+      use steganography, will return NULL.  */
+  virtual const steg_config_t *get_steg(size_t n) const = 0;
 
   /** Return an extended 'circuit_t' object for a new socket using
       this configuration.  The 'index' argument is equal to the 'N'
@@ -87,7 +93,7 @@ extern const proto_module *const supported_protos[];
 
 #define PROTO_DEFINE_MODULE(mod)                                \
   /* canned methods */                                          \
-  const char *mod##_config_t::name()                            \
+  const char *mod##_config_t::name() const                      \
   { return #mod; }                                              \
                                                                 \
   static config_t *                                             \
@@ -106,14 +112,19 @@ extern const proto_module *const supported_protos[];
 #define CONFIG_DECLARE_METHODS(mod)                             \
   mod##_config_t();                                             \
   virtual ~mod##_config_t();                                    \
-  virtual const char *name();                                   \
+  virtual const char *name() const;                             \
   virtual bool init(int n_opts, const char *const *opts);       \
-  virtual evutil_addrinfo *get_listen_addrs(size_t n);          \
-  virtual evutil_addrinfo *get_target_addrs(size_t n);          \
+  virtual evutil_addrinfo *get_listen_addrs(size_t n) const;    \
+  virtual evutil_addrinfo *get_target_addrs(size_t n) const;    \
+  virtual const steg_config_t *get_steg(size_t n) const;        \
   virtual circuit_t *circuit_create(size_t index);              \
   virtual conn_t *conn_create(size_t index)                     \
   /* deliberate absence of semicolon */
 
+#define CONFIG_STEG_STUBS(mod)                                  \
+  const steg_config_t *mod##_config_t::get_steg(size_t) const   \
+  { return 0; }
+
 #define CONN_DECLARE_METHODS(mod)                       \
   mod##_conn_t();                                       \
   virtual ~mod##_conn_t();                              \
diff --git a/src/protocol/chop.cc b/src/protocol/chop.cc
index 5451925..f547303 100644
--- a/src/protocol/chop.cc
+++ b/src/protocol/chop.cc
@@ -475,7 +475,7 @@ chop_config_t::init(int n_options, const char *const *options)
 }
 
 struct evutil_addrinfo *
-chop_config_t::get_listen_addrs(size_t n)
+chop_config_t::get_listen_addrs(size_t n) const 
 {
   if (mode == LSN_SIMPLE_SERVER) {
     if (n < down_addresses.size())
@@ -488,7 +488,7 @@ chop_config_t::get_listen_addrs(size_t n)
 }
 
 struct evutil_addrinfo *
-chop_config_t::get_target_addrs(size_t n)
+chop_config_t::get_target_addrs(size_t n) const
 {
   if (mode == LSN_SIMPLE_SERVER) {
     if (n == 0)
@@ -500,6 +500,14 @@ chop_config_t::get_target_addrs(size_t n)
   return NULL;
 }
 
+const steg_config_t *
+chop_config_t::get_steg(size_t n) const
+{
+  if (n < steg_targets.size())
+    return steg_targets[n];
+  return NULL;
+}
+
 // Circuit methods
 
 const char passphrase[] =
diff --git a/src/protocol/null.cc b/src/protocol/null.cc
index 90abec2..d8fe98b 100644
--- a/src/protocol/null.cc
+++ b/src/protocol/null.cc
@@ -102,7 +102,7 @@ null_config_t::init(int n_options, const char *const *options)
 
 /** Retrieve the 'n'th set of listen addresses for this configuration. */
 struct evutil_addrinfo *
-null_config_t::get_listen_addrs(size_t n)
+null_config_t::get_listen_addrs(size_t n) const
 {
   if (n > 0)
     return 0;
@@ -111,7 +111,7 @@ null_config_t::get_listen_addrs(size_t n)
 
 /* Retrieve the target address for this configuration. */
 struct evutil_addrinfo *
-null_config_t::get_target_addrs(size_t n)
+null_config_t::get_target_addrs(size_t n) const
 {
   if (n > 0)
     return 0;
@@ -287,4 +287,5 @@ null_conn_t::recv_eof()
   return 0;
 }
 
+CONFIG_STEG_STUBS(null);
 CONN_STEG_STUBS(null);
diff --git a/src/steg.h b/src/steg.h
index fe8ef5e..f042864 100644
--- a/src/steg.h
+++ b/src/steg.h
@@ -26,7 +26,7 @@ struct steg_config_t
 
   /** Report the name of this steg module.  You do not have to define
       this method in your subclass, STEG_DEFINE_MODULE does it for you. */
-  virtual const char *name() = 0;
+  virtual const char *name() const = 0;
 
   /** Create an extended 'steg_t' object (see below) from this
       configuration, associated with connection CONN.  */
@@ -110,7 +110,7 @@ steg_config_t *steg_new(const char *name, config_t *cfg);
   { return new mod##_steg_config_t(cfg); }              \
                                                         \
   /* canned methods */                                  \
-  const char *mod##_steg_config_t::name()               \
+  const char *mod##_steg_config_t::name() const         \
   { return #mod; }                                      \
                                                         \
   /* module object */                                   \
@@ -121,7 +121,7 @@ steg_config_t *steg_new(const char *name, config_t *cfg);
 #define STEG_CONFIG_DECLARE_METHODS(mod)                \
   mod##_steg_config_t(config_t *);                      \
   virtual ~mod##_steg_config_t();                       \
-  virtual const char *name();                           \
+  virtual const char *name() const;                     \
   virtual steg_t *steg_create(conn_t *)                 \
   /* deliberate absence of semicolon */
 
diff --git a/src/subprocess-unix.cc b/src/subprocess-unix.cc
new file mode 100644
index 0000000..7e4b8a2
--- /dev/null
+++ b/src/subprocess-unix.cc
@@ -0,0 +1,468 @@
+/* Copyright 2012 SRI International
+ * Portions copyright 2003-2011 Roger Dingledine, Nick Mathewson,
+ *   and/or The Tor Project, Inc.
+ * Portions copyright 1991-2012 The Regents of the University of California
+ *   and/or various FreeBSD contributors.
+ * See LICENSE for other credits and copying information.
+ */
+
+// N.B. This file will have to be rewritten more-or-less from scratch
+// for the Windows port.  It should be acceptably portable to all Unix
+// implementations still in wide use.
+
+#include "util.h"
+#include "subprocess.h"
+
+#include <map>
+
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef HAVE_PATHS_H
+#include <paths.h>
+#endif
+#ifndef _PATH_DEFPATH
+#define _PATH_DEFPATH "/usr/bin:/bin"
+#endif
+
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+#  define PATH_MAX MAXPATHLEN
+# else
+#  define PATH_MAX 4096
+# endif
+#endif
+
+#ifndef OPEN_MAX
+# define OPEN_MAX 256
+#endif
+
+extern char **environ;
+
+using std::map;
+using std::vector;
+using std::string;
+
+
+// Space for hex values of child state, a slash, saved_errno (with
+//    leading minus) and newline (no null)
+#define HEX_ERRNO_SIZE (sizeof(int)*2 + 4)
+
+// State codes for the child side of the fork.
+#define CHILD_STATE_REDIRECT_STDIN 1
+#define CHILD_STATE_REDIRECT_STDOUT 2
+#define CHILD_STATE_REDIRECT_STDERR 3
+#define CHILD_STATE_CLOSEFROM 4
+#define CHILD_STATE_EXEC 5
+
+// Some C libraries get very unhappy with you if you ignore the result
+// of a write call, but where it's used in this file, there is nothing
+// whatsoever we can do if it fails.
+#define IGNORE_FAILURE(expr) do { if (expr) {} } while (0)
+
+// We have not prevented copying of |subprocess| objects, so it is
+// possible that |wait| will be called more than once for the same
+// PID, with no state in the object to tell us so.  To prevent
+// problems, maintain a table of processes that we have waited for.
+// We make no attempt to prune this table; its memory requirements
+// should be trivial for the expected uses of this API.
+static map<pid_t, int> already_waited;
+
+// Internal utilities and replacements for system library routines
+// that may or may not exist.
+
+#ifndef HAVE_CLOSEFROM
+static void
+closefrom(int lowfd)
+{
+#ifdef F_CLOSEM
+  // Try F_CLOSEM if it's defined.  But it might not work.
+  if (fcntl(lowfd, F_CLOSEM, 0) == 0)
+    return;
+#endif
+
+  // If /proc/self/fd is available, use it.
+  // N.B. Theoretically you are not allowed to use opendir() after fork()
+  // as it's not async-signal-safe.  This is overwhelmingly unlikely to
+  // cause problems in practice.
+  DIR *dirp;
+  if ((dirp = opendir("/proc/self/fd")) != 0) {
+    struct dirent *dent;
+    char *endp;
+    while ((dent = readdir(dirp)) != NULL) {
+      unsigned long fd = strtoul(dent->d_name, &endp, 10);
+      if (dent->d_name != endp && *endp == '\0' &&
+          fd <  (unsigned long)INT_MAX &&
+          fd >= (unsigned long)lowfd   &&
+          fd != (unsigned long)dirfd(dirp))
+        close((int)fd);
+    }
+    closedir(dirp);
+    return;
+  }
+
+  // As a last resort, blindly close all possible fd numbers
+  // between lowfd and _SC_OPEN_MAX.
+  unsigned long maxfd = sysconf(_SC_OPEN_MAX);
+  if (maxfd == (unsigned long)(-1L))
+    maxfd = OPEN_MAX;
+  for (unsigned long fd = lowfd; fd < maxfd; fd++)
+    close((int)fd);
+}
+#endif
+
+#ifndef HAVE_EXECVPE
+// Implementation courtesy FreeBSD 9.0 src/lib/libc/gen/exec.c
+// some adjustments made with reference to the glibc implementation
+static int
+execvpe(const char *name, char * const argv[], char * const envp[])
+{
+  const char *path;
+  const char *p, *q;
+  size_t lp, ln;
+  bool eacces = false;
+  char buf[PATH_MAX];
+
+  // If it's an empty path name, fail immediately.
+  if (*name == '\0') {
+    errno = ENOENT;
+    return -1;
+  }
+
+  // If it's an absolute or relative pathname, do not search $PATH.
+  if (strchr(name, '/')) {
+    execve(name, argv, envp);
+    return -1;
+  }
+  ln = strlen(name);
+
+  // Get the path to search.  Intentionally uses the parent
+  // environment, not 'envp'.
+  if (!(path = getenv("PATH")))
+    path = _PATH_DEFPATH;
+
+  q = path;
+  do {
+    p = q;
+    while (*q != '\0' && *q != ':')
+      q++;
+
+    // Double, leading and trailing colons mean the current directory.
+    if (q == p) {
+      p = ".";
+      lp = 1;
+    } else
+      lp = q - p;
+    q++;
+
+    // If the path is too long, complain and skip it.  This is a
+    // possible security issue; given a way to make the path too long
+    // the user may execute the wrong program.
+    if (lp + ln + 2 > sizeof(buf)) {
+      IGNORE_FAILURE(write(2, "execvpe: ", 8));
+      IGNORE_FAILURE(write(2, p, lp));
+      IGNORE_FAILURE(write(2, ": path too long\n", 16));
+      continue;
+    }
+
+    memcpy(buf, p, lp);
+    buf[lp] = '/';
+    memcpy(buf + lp + 1, name, ln);
+    buf[lp + ln + 1] = '\0';
+
+    execve(buf, argv, envp);
+    switch (errno) {
+      // These errors all indicate that we should try the next directory.
+    case EACCES:
+      // Remember that at least one failure was due to a permission check;
+      // this will be preferentially reported, unless we hit something even
+      // more serious.
+      eacces = true;
+    case ELOOP:
+    case ENAMETOOLONG:
+    case ENOENT:
+    case ENOTDIR:
+    case ESTALE:
+    case ETIMEDOUT:
+      continue;
+
+    default:
+      // On any other error, give up.
+      // Shell fallback for ENOEXEC deliberately removed, as it is a
+      // historical vestige and involves allocating memory.
+      return -1;
+    }
+  } while (*q);
+
+  if (eacces)
+    errno = EACCES;
+  return -1;
+}
+#endif
+
+/** Format <b>child_state</b> and <b>saved_errno</b> as a hex string placed in
+ * <b>hex_errno</b>.  Called between fork and _exit, so must be signal-handler
+ * safe.
+ *
+ * <b>hex_errno</b> must have at least HEX_ERRNO_SIZE bytes available.
+ *
+ * The format of <b>hex_errno</b> is: "CHILD_STATE/ERRNO\n", left-padded
+ * with spaces. Note that there is no trailing \0. CHILD_STATE indicates where
+ * in the processs of starting the child process did the failure occur (see
+ * CHILD_STATE_* macros for definition), and SAVED_ERRNO is the value of
+ * errno when the failure occurred.
+ */
+static void
+format_helper_exit_status(unsigned char child_state, int saved_errno,
+                          char *hex_errno)
+{
+  unsigned int unsigned_errno;
+  char *cur;
+  size_t i;
+
+  /* Fill hex_errno with spaces, and a trailing newline (memset may
+     not be signal handler safe, so we can't use it) */
+  for (i = 0; i < (HEX_ERRNO_SIZE - 1); i++)
+    hex_errno[i] = ' ';
+  hex_errno[HEX_ERRNO_SIZE - 1] = '\n';
+
+  /* Convert errno to be unsigned for hex conversion */
+  if (saved_errno < 0) {
+    unsigned_errno = (unsigned int) -saved_errno;
+  } else {
+    unsigned_errno = (unsigned int) saved_errno;
+  }
+
+  /* Convert errno to hex (start before \n) */
+  cur = hex_errno + HEX_ERRNO_SIZE - 2;
+
+  /* Check for overflow on first iteration of the loop */
+  if (cur < hex_errno)
+    return;
+
+  do {
+    *cur-- = "0123456789ABCDEF"[unsigned_errno % 16];
+    unsigned_errno /= 16;
+  } while (unsigned_errno != 0 && cur >= hex_errno);
+
+  /* Prepend the minus sign if errno was negative */
+  if (saved_errno < 0 && cur >= hex_errno)
+    *cur-- = '-';
+
+  /* Leave a gap */
+  if (cur >= hex_errno)
+    *cur-- = '/';
+
+  /* Check for overflow on first iteration of the loop */
+  if (cur < hex_errno)
+    return;
+
+  /* Convert child_state to hex */
+  do {
+    *cur-- = "0123456789ABCDEF"[child_state % 16];
+    child_state /= 16;
+  } while (child_state != 0 && cur >= hex_errno);
+}
+
+/** Start a program in the background. If <b>filename</b> contains a '/',
+ * then it will be treated as an absolute or relative path.  Otherwise the
+ * system path will be searched for <b>filename</b>. The strings in
+ * <b>argv</b> will be passed as the command line arguments of the child
+ * program (following convention, argv[0] should normally be the filename of
+ * the executable), and the strings in <b>envp</b> will be passed as its
+ * environment variables.
+ *
+ * The child's standard input and output will both be /dev/null;
+ * the child's standard error will be whatever it is in the parent
+ * (unless it is closed in the parent, in which case it will also be
+ * /dev/null)
+ *
+ * All file descriptors numbered higher than 2 will be closed.
+ *
+ * On success, returns the PID of the child; on failure, returns -1.
+ */
+static pid_t
+do_fork_exec(const char *const filename,
+             const char **argv,
+             const char **envp)
+{
+  pid_t pid = fork();
+
+  if (pid == -1) {
+    log_warn("Failed to fork child process: %s", strerror(errno));
+    return -1;
+  }
+
+  if (pid != 0) {
+    // In parent.
+    // If we spawn a child, wait for it, the PID counter wraps
+    // completely around, and then we spawn another child which
+    // happens to get exactly the same PID as the first one, we had
+    // better remove the old record from the already_waited table or
+    // we won't ever actually wait for the new child.  The odds of
+    // this are small, but not ridiculously small.
+    already_waited.erase(pid);
+    return pid;
+  }
+
+  // In child
+  char hex_errno[HEX_ERRNO_SIZE];
+  unsigned int child_state = CHILD_STATE_REDIRECT_STDIN;
+
+  close(0);
+  if (open("/dev/null", O_RDONLY) != 0)
+    goto error;
+
+  child_state = CHILD_STATE_REDIRECT_STDOUT;
+
+  close(1);
+  if (open("/dev/null", O_WRONLY) != 1)
+    goto error;
+
+  child_state = CHILD_STATE_REDIRECT_STDERR;
+  if (!isatty(2) && errno == EBADF) {
+    if (open("/dev/null", O_WRONLY) != 2)
+      goto error;
+  }
+
+  child_state = CHILD_STATE_CLOSEFROM;
+  closefrom(3);
+
+  child_state = CHILD_STATE_EXEC;
+
+  // We need the casts because execvpe doesn't declare argv or envp
+  // as const, even though it does not modify them.
+  execvpe(filename, (char *const *) argv, (char *const *)envp);
+
+ error:
+  format_helper_exit_status(child_state, errno, hex_errno);
+
+#define error_message "ERR: Failed to spawn child process: code "
+
+  IGNORE_FAILURE(write(2, error_message, sizeof error_message - 1));
+  IGNORE_FAILURE(write(2, hex_errno, sizeof hex_errno));
+
+#undef error_message
+
+  _exit(255);
+}
+
+// Wrapper: marshal the C++-y vector and map into the form the kernel
+// expects.
+static pid_t
+do_fork_exec(vector<string> const& args,
+             vector<string> const& env)
+{
+  char const* argv[args.size() + 1];
+  char const* envp[env.size() + 1];
+
+  for (size_t i = 0; i < args.size(); i++)
+    argv[i] = args[i].c_str();
+  argv[args.size()] = 0;
+
+  for (size_t i = 0; i < env.size(); i++)
+    envp[i] = env[i].c_str();
+  envp[env.size()] = 0;
+
+  return do_fork_exec(argv[0], argv, envp);
+}
+
+static void
+decode_status(int status, int& state, int& rc)
+{
+  if (WIFEXITED(status)) {
+    rc = WEXITSTATUS(status);
+    state = CLD_EXITED;
+  } else if (WIFSIGNALED(status)) {
+    rc = WTERMSIG(status);
+#ifdef WCOREDUMP
+    if (WCOREDUMP(status))
+      state = CLD_DUMPED;
+    else
+#endif
+      state = CLD_KILLED;
+  } else {
+    // we do not use WUNTRACED, WCONTINUED, or ptrace, so the other
+    // WIF* possibilities should never happen
+    log_abort("impossible wait status %04x", (unsigned int)status);
+  }
+}
+
+static bool
+wait_common(pid_t pid, int& state, int& rc, bool wnohang)
+{
+  if (pid == -1) {
+    // Map failure to fork into the same exit state that we get if
+    // there's a failure in between fork and exec.
+    state = CLD_EXITED;
+    rc = 255;
+    return true;
+  }
+
+  map<pid_t, int>::iterator p = already_waited.find(pid);
+  if (p != already_waited.end()) {
+    decode_status(p->second, state, rc);
+    return true;
+  }
+
+  int status;
+  pid_t rv = waitpid(pid, &status, wnohang ? WNOHANG : 0);
+  if (rv == pid) {
+    decode_status(status, state, rc);
+    already_waited.insert(std::make_pair(pid, status));
+    return true;
+  } else if (rv == 0 && wnohang) {
+    return false;
+  } else {
+    log_warn("waitpid(%d) failed: %s", pid, strerror(errno));
+    return false;
+  }
+}
+
+// subprocess methods
+
+subprocess::subprocess(vector<string> const& args,
+                       vector<string> const& env)
+  : pid(do_fork_exec(args, env)),
+    state(0),
+    returncode(-1)
+{
+}
+
+subprocess
+subprocess::call(vector<string> const& args, vector<string> const& env)
+{
+  subprocess proc(args, env);
+  proc.wait();
+  return proc;
+}
+
+bool
+subprocess::poll()
+{
+  return wait_common(pid, state, returncode, true);
+}
+
+void
+subprocess::wait()
+{
+  wait_common(pid, state, returncode, false);
+}
+
+// public utilities
+
+vector<string>
+get_environ(const char *exclude)
+{
+  vector<string> result;
+  size_t exlen = exclude ? strlen(exclude) : 0;
+
+  for (char **p = environ; *p; p++)
+    if (!exclude || strncmp(exclude, *p, exlen))
+      result.push_back(*p);
+
+  return result;
+}
diff --git a/src/subprocess.h b/src/subprocess.h
new file mode 100644
index 0000000..95fe848
--- /dev/null
+++ b/src/subprocess.h
@@ -0,0 +1,68 @@
+/* Copyright 2012 SRI International
+ * See LICENSE for other credits and copying information
+ */
+
+#ifndef SUBPROCESS_H
+#define SUBPROCESS_H
+
+#include <string>
+#include <vector>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+// This API is inspired by the Python subprocess module.  While it
+// could be extended to do everything that that does, at present it
+// does much less.  If you add features, please consider matching
+// Python's presentation of same.
+
+#ifndef CLD_EXITED
+#define CLD_EXITED 1
+#endif
+#ifndef CLD_KILLED
+#define CLD_KILLED 2
+#endif
+#ifndef CLD_DUMPED
+#define CLD_DUMPED 3
+#endif
+
+struct subprocess
+{
+  // Start a new subprocess with argument vector |args| and environment
+  // vector |env|.  stdin and stdout are /dev/null.  stderr is inherited.
+  // All file descriptors numbered 3 and higher are closed.
+  // The current working directory is inherited.
+  subprocess(std::vector<std::string> const& args,
+             std::vector<std::string> const& env);
+
+  // Convenience: spawn a subprocess and wait for it to terminate.
+  static subprocess call(std::vector<std::string> const& args,
+                         std::vector<std::string> const& env);
+
+  // Check whether the child process has terminated.  Returns true if it
+  // has, false otherwise; sets 'state' and 'returncode'.
+  bool poll();
+
+  // Wait for the child process to terminate.
+  void wait();
+
+  // Process ID of the child.  -1 on failure to spawn, in which case
+  // an error message has already been logged.
+  const pid_t pid;
+
+  // Child state, either 0 (running) or one of the <signal.h> constants
+  // CLD_EXITED, CLD_KILLED, or CLD_DUMPED.
+  int state;
+
+  // Exit status (if state == CLD_EXITED) or signal that terminated the
+  // process (if state == CLD_KILLED or CLD_DUMPED); -1 otherwise.
+  int returncode;
+};
+
+// Convert the global environment vector to a C++ vector.
+// If 'exclude' is not NULL, then any environment variable whose name
+// begins with those characters will be excluded from the result.
+extern std::vector<std::string> get_environ(const char *exclude = 0);
+
+#endif





More information about the tor-commits mailing list