commit a6051fba5236f7866a3c0967b08e771224948816
Author: Zack Weinberg <zackw(a)cmu.edu>
Date: Sat Jun 2 19:18:10 2012 -0700
Add APRAdb registration helper support.
---
Makefile.am | 1 +
configure.ac | 3 +-
src/audit-globals.sh | 11 +-
src/listener.h | 15 ++
src/main.cc | 69 +++++++-
src/network.cc | 16 +--
src/protocol.h | 25 ++-
src/protocol/chop.cc | 12 +-
src/protocol/null.cc | 5 +-
src/steg.h | 6 +-
src/subprocess-unix.cc | 468 ++++++++++++++++++++++++++++++++++++++++++++++++
src/subprocess.h | 68 +++++++
12 files changed, 667 insertions(+), 32 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 88939fd..a21f053 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -37,6 +37,7 @@ libstegotorus_a_SOURCES = \
src/protocol.cc \
src/rng.cc \
src/socks.cc \
+ src/subprocess-unix.cc \
src/steg.cc \
src/util.cc \
$(PROTOCOLS) $(STEGANOGRAPHERS)
diff --git a/configure.ac b/configure.ac
index 6dc4fe3..4c23af6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,8 @@ LIBS="$LIBS $ws32_LIBS"
### System features ###
-AC_CHECK_HEADERS([execinfo.h],,,[/**/])
+AC_CHECK_HEADERS([execinfo.h paths.h],,,[/**/])
+AC_CHECK_FUNCS([closefrom execvpe])
### Output ###
diff --git a/src/audit-globals.sh b/src/audit-globals.sh
index 2a94462..13565aa 100644
--- a/src/audit-globals.sh
+++ b/src/audit-globals.sh
@@ -27,27 +27,28 @@ sed '
# them. The above commands have stripped any leading src/ and/or
# .o or .obj extension.
- # These are genuinely OK.
/^compression ZLIB_CEILING$/d
/^compression ZLIB_UINT_MAX$/d
/^connections circuits$/d
- /^connections connections$/d
/^connections closing_all_connections$/d
+ /^connections connections$/d
/^connections last_ckt_serial$/d
/^connections last_conn_serial$/d
/^connections shutting_down$/d
+ /^crypt init_crypto()::initialized$/d
+ /^crypt log_crypto()::initialized$/d
/^main allow_kq$/d
- /^main the_event_base$/d
/^main handle_signal_cb(int, short, void\*)::got_sigint$/d
+ /^main registration_helper$/d
+ /^main the_event_base$/d
/^network listeners$/d
/^rng rng$/d
+ /^subprocess-unix already_waited$/d
/^util log_dest$/d
/^util log_min_sev$/d
/^util log_timestamps$/d
/^util log_ts_base$/d
/^util the_evdns_base$/d
- /^crypt log_crypto()::initialized$/d
- /^crypt init_crypto()::initialized$/d
')
if [ -n "$symbols" ]; then
diff --git a/src/listener.h b/src/listener.h
index 6a73d1d..6813686 100644
--- a/src/listener.h
+++ b/src/listener.h
@@ -4,8 +4,23 @@
#ifndef LISTENER_H
#define LISTENER_H
+#include <vector>
+
+/**
+ This struct defines the state of a listener on a particular address.
+ */
+struct listener_t
+{
+ config_t *cfg;
+ struct evconnlistener *listener;
+ char *address;
+ size_t index;
+};
+
/* returns 1 on success, 0 on failure */
int listener_open(struct event_base *base, config_t *cfg);
void listener_close_all(void);
+std::vector<listener_t *> const& get_all_listeners();
+
#endif
diff --git a/src/main.cc b/src/main.cc
index 8c1407f..9cc1300 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -10,6 +10,8 @@
#include "crypt.h"
#include "listener.h"
#include "protocol.h"
+#include "steg.h"
+#include "subprocess.h"
#include <vector>
#include <string>
@@ -35,6 +37,7 @@ using std::string;
static struct event_base *the_event_base;
static bool allow_kq = false;
+static string registration_helper;
/**
Puts stegotorus's networking subsystem on "closing time" mode. This
@@ -160,6 +163,59 @@ stdin_detect_eof_cb(evutil_socket_t fd, short, void *arg)
}
/**
+ APRAdb registration hook.
+*/
+static void
+call_registration_helper(string const& helper)
+{
+ vector<string> env = get_environ("ST_");
+ env.push_back("ST_SERVER_KEY=placeholder_server_key");
+
+ vector<listener_t*> const& listeners = get_all_listeners();
+ vector<listener_t*>::const_iterator el;
+ unsigned int n = 0;
+ char buf[512];
+
+ for (el = listeners.begin(); el != listeners.end(); el++, n++) {
+ const steg_config_t *sc = (*el)->cfg->get_steg((*el)->index);
+ if (!sc)
+ continue;
+
+ // The address is in the form x.y.z.w:port or [a:b:c...]:port.
+ // We want IP and port in separate strings. Also, in the latter
+ // case, we want to get rid of the square brackets.
+ string ap((*el)->address);
+ size_t colon = ap.rfind(':');
+ string addr(ap, 0, colon);
+ string port(ap, colon+1);
+
+ if (addr[0] == '[') {
+ addr.erase(addr.size()-1, 1);
+ addr.erase(0,1);
+ }
+
+ if (xsnprintf(buf, sizeof buf, "ST_LISTENER_%u=%s,tcp,%s,%s",
+ n, addr.c_str(), port.c_str(), sc->name()) == -1) {
+ log_warn("listener %u info is too big", n);
+ continue;
+ }
+ env.push_back(buf);
+ }
+
+ vector<string> args;
+ args.push_back(helper);
+ subprocess h = subprocess::call(args, env);
+ if (h.state == CLD_DUMPED) {
+ log_warn("%s: %s (core dumped)", helper.c_str(), strsignal(h.returncode));
+ } else if (h.state == CLD_KILLED) {
+ log_warn("%s: %s", helper.c_str(), strsignal(h.returncode));
+ } else if (h.state == CLD_EXITED && h.returncode != 0) {
+ log_warn("%s: exited unsuccessfully, status %d",
+ helper.c_str(), h.returncode);
+ }
+}
+
+/**
Prints usage instructions then exits.
*/
static void ATTR_NORETURN
@@ -200,6 +256,7 @@ handle_generic_args(const char *const *argv)
bool logsev_set = false;
bool allow_kq_set = false;
bool timestamps_set = false;
+ bool registration_helper_set=false;
int i = 1;
while (argv[i] &&
@@ -250,8 +307,15 @@ handle_generic_args(const char *const *argv)
}
allow_kq = true;
allow_kq_set = true;
+ } else if (!strncmp(argv[i], "--registration-helper=", 22)) {
+ if (registration_helper_set) {
+ fprintf(stderr, "you've already set a registration helper!\n");
+ exit(1);
+ }
+ registration_helper = string(argv[i]+22);
+ registration_helper_set = true;
} else {
- fprintf(stderr, "unrecognizable argument '%s'", argv[i]);
+ fprintf(stderr, "unrecognizable argument '%s'\n", argv[i]);
exit(1);
}
i++;
@@ -409,6 +473,9 @@ main(int, const char *const *argv)
log_abort("failed to open listeners for configuration %lu",
(unsigned long)(i - configs.begin()) + 1);
+ if (!registration_helper.empty())
+ call_registration_helper(registration_helper);
+
/* We are go for launch. As a signal to any monitoring process that may
be running, close stdout now. */
log_info("%s process %lu now initialized", argv[0], (unsigned long)getpid());
diff --git a/src/network.cc b/src/network.cc
index f436ef4..b5f6d4b 100644
--- a/src/network.cc
+++ b/src/network.cc
@@ -20,17 +20,6 @@
using std::vector;
-/**
- This struct defines the state of a listener on a particular address.
- */
-struct listener_t
-{
- config_t *cfg;
- struct evconnlistener *listener;
- char *address;
- size_t index;
-};
-
/** All our listeners. */
static vector<listener_t *> listeners;
@@ -57,6 +46,11 @@ static void downstream_event_cb(struct bufferevent *bev, short what, void *arg);
static void create_outbound_connections(circuit_t *ckt, bool is_socks);
static void create_outbound_connections_socks(circuit_t *ckt);
+vector<listener_t *> const& get_all_listeners()
+{
+ return listeners;
+}
+
/**
This function opens listening sockets configured according to the
provided 'config_t'. Returns 1 on success, 0 on failure.
diff --git a/src/protocol.h b/src/protocol.h
index fbc459e..a2e467d 100644
--- a/src/protocol.h
+++ b/src/protocol.h
@@ -7,6 +7,7 @@
#define PROTOCOL_H
struct proto_module;
+struct steg_config_t;
/** A 'config_t' is a set of addresses to listen on, and what to do
when connections are received. A protocol module must define a
@@ -29,7 +30,7 @@ struct config_t
/** Return the name of the protocol associated with this
configuration. You do not have to define this method in your
subclass, PROTO_DEFINE_MODULE does it for you. */
- virtual const char *name() = 0;
+ virtual const char *name() const = 0;
/** Initialize yourself from a set of command line options. This is
separate from the subclass constructor so that it can fail:
@@ -42,7 +43,7 @@ struct config_t
users of this function should call it repeatedly with successive
values of N, starting from zero, until it returns NULL, and
create listeners for every address returned. */
- virtual evutil_addrinfo *get_listen_addrs(size_t n) = 0;
+ virtual evutil_addrinfo *get_listen_addrs(size_t n) const = 0;
/** Return a set of addresses to attempt an outbound connection to,
in the form of an 'evutil_addrinfo' linked list. As with
@@ -50,7 +51,12 @@ struct config_t
should in general attempt simultaneous connection to at least
one address from every list. The maximum N is indicated in the
same way as for get_listen_addrs. */
- virtual evutil_addrinfo *get_target_addrs(size_t n) = 0;
+ virtual evutil_addrinfo *get_target_addrs(size_t n) const = 0;
+
+ /** Return the steganography module associated with either listener
+ or target address set N. If called on a protocol that doesn't
+ use steganography, will return NULL. */
+ virtual const steg_config_t *get_steg(size_t n) const = 0;
/** Return an extended 'circuit_t' object for a new socket using
this configuration. The 'index' argument is equal to the 'N'
@@ -87,7 +93,7 @@ extern const proto_module *const supported_protos[];
#define PROTO_DEFINE_MODULE(mod) \
/* canned methods */ \
- const char *mod##_config_t::name() \
+ const char *mod##_config_t::name() const \
{ return #mod; } \
\
static config_t * \
@@ -106,14 +112,19 @@ extern const proto_module *const supported_protos[];
#define CONFIG_DECLARE_METHODS(mod) \
mod##_config_t(); \
virtual ~mod##_config_t(); \
- virtual const char *name(); \
+ virtual const char *name() const; \
virtual bool init(int n_opts, const char *const *opts); \
- virtual evutil_addrinfo *get_listen_addrs(size_t n); \
- virtual evutil_addrinfo *get_target_addrs(size_t n); \
+ virtual evutil_addrinfo *get_listen_addrs(size_t n) const; \
+ virtual evutil_addrinfo *get_target_addrs(size_t n) const; \
+ virtual const steg_config_t *get_steg(size_t n) const; \
virtual circuit_t *circuit_create(size_t index); \
virtual conn_t *conn_create(size_t index) \
/* deliberate absence of semicolon */
+#define CONFIG_STEG_STUBS(mod) \
+ const steg_config_t *mod##_config_t::get_steg(size_t) const \
+ { return 0; }
+
#define CONN_DECLARE_METHODS(mod) \
mod##_conn_t(); \
virtual ~mod##_conn_t(); \
diff --git a/src/protocol/chop.cc b/src/protocol/chop.cc
index 5451925..f547303 100644
--- a/src/protocol/chop.cc
+++ b/src/protocol/chop.cc
@@ -475,7 +475,7 @@ chop_config_t::init(int n_options, const char *const *options)
}
struct evutil_addrinfo *
-chop_config_t::get_listen_addrs(size_t n)
+chop_config_t::get_listen_addrs(size_t n) const
{
if (mode == LSN_SIMPLE_SERVER) {
if (n < down_addresses.size())
@@ -488,7 +488,7 @@ chop_config_t::get_listen_addrs(size_t n)
}
struct evutil_addrinfo *
-chop_config_t::get_target_addrs(size_t n)
+chop_config_t::get_target_addrs(size_t n) const
{
if (mode == LSN_SIMPLE_SERVER) {
if (n == 0)
@@ -500,6 +500,14 @@ chop_config_t::get_target_addrs(size_t n)
return NULL;
}
+const steg_config_t *
+chop_config_t::get_steg(size_t n) const
+{
+ if (n < steg_targets.size())
+ return steg_targets[n];
+ return NULL;
+}
+
// Circuit methods
const char passphrase[] =
diff --git a/src/protocol/null.cc b/src/protocol/null.cc
index 90abec2..d8fe98b 100644
--- a/src/protocol/null.cc
+++ b/src/protocol/null.cc
@@ -102,7 +102,7 @@ null_config_t::init(int n_options, const char *const *options)
/** Retrieve the 'n'th set of listen addresses for this configuration. */
struct evutil_addrinfo *
-null_config_t::get_listen_addrs(size_t n)
+null_config_t::get_listen_addrs(size_t n) const
{
if (n > 0)
return 0;
@@ -111,7 +111,7 @@ null_config_t::get_listen_addrs(size_t n)
/* Retrieve the target address for this configuration. */
struct evutil_addrinfo *
-null_config_t::get_target_addrs(size_t n)
+null_config_t::get_target_addrs(size_t n) const
{
if (n > 0)
return 0;
@@ -287,4 +287,5 @@ null_conn_t::recv_eof()
return 0;
}
+CONFIG_STEG_STUBS(null);
CONN_STEG_STUBS(null);
diff --git a/src/steg.h b/src/steg.h
index fe8ef5e..f042864 100644
--- a/src/steg.h
+++ b/src/steg.h
@@ -26,7 +26,7 @@ struct steg_config_t
/** Report the name of this steg module. You do not have to define
this method in your subclass, STEG_DEFINE_MODULE does it for you. */
- virtual const char *name() = 0;
+ virtual const char *name() const = 0;
/** Create an extended 'steg_t' object (see below) from this
configuration, associated with connection CONN. */
@@ -110,7 +110,7 @@ steg_config_t *steg_new(const char *name, config_t *cfg);
{ return new mod##_steg_config_t(cfg); } \
\
/* canned methods */ \
- const char *mod##_steg_config_t::name() \
+ const char *mod##_steg_config_t::name() const \
{ return #mod; } \
\
/* module object */ \
@@ -121,7 +121,7 @@ steg_config_t *steg_new(const char *name, config_t *cfg);
#define STEG_CONFIG_DECLARE_METHODS(mod) \
mod##_steg_config_t(config_t *); \
virtual ~mod##_steg_config_t(); \
- virtual const char *name(); \
+ virtual const char *name() const; \
virtual steg_t *steg_create(conn_t *) \
/* deliberate absence of semicolon */
diff --git a/src/subprocess-unix.cc b/src/subprocess-unix.cc
new file mode 100644
index 0000000..7e4b8a2
--- /dev/null
+++ b/src/subprocess-unix.cc
@@ -0,0 +1,468 @@
+/* Copyright 2012 SRI International
+ * Portions copyright 2003-2011 Roger Dingledine, Nick Mathewson,
+ * and/or The Tor Project, Inc.
+ * Portions copyright 1991-2012 The Regents of the University of California
+ * and/or various FreeBSD contributors.
+ * See LICENSE for other credits and copying information.
+ */
+
+// N.B. This file will have to be rewritten more-or-less from scratch
+// for the Windows port. It should be acceptably portable to all Unix
+// implementations still in wide use.
+
+#include "util.h"
+#include "subprocess.h"
+
+#include <map>
+
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifdef HAVE_PATHS_H
+#include <paths.h>
+#endif
+#ifndef _PATH_DEFPATH
+#define _PATH_DEFPATH "/usr/bin:/bin"
+#endif
+
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+# define PATH_MAX MAXPATHLEN
+# else
+# define PATH_MAX 4096
+# endif
+#endif
+
+#ifndef OPEN_MAX
+# define OPEN_MAX 256
+#endif
+
+extern char **environ;
+
+using std::map;
+using std::vector;
+using std::string;
+
+
+// Space for hex values of child state, a slash, saved_errno (with
+// leading minus) and newline (no null)
+#define HEX_ERRNO_SIZE (sizeof(int)*2 + 4)
+
+// State codes for the child side of the fork.
+#define CHILD_STATE_REDIRECT_STDIN 1
+#define CHILD_STATE_REDIRECT_STDOUT 2
+#define CHILD_STATE_REDIRECT_STDERR 3
+#define CHILD_STATE_CLOSEFROM 4
+#define CHILD_STATE_EXEC 5
+
+// Some C libraries get very unhappy with you if you ignore the result
+// of a write call, but where it's used in this file, there is nothing
+// whatsoever we can do if it fails.
+#define IGNORE_FAILURE(expr) do { if (expr) {} } while (0)
+
+// We have not prevented copying of |subprocess| objects, so it is
+// possible that |wait| will be called more than once for the same
+// PID, with no state in the object to tell us so. To prevent
+// problems, maintain a table of processes that we have waited for.
+// We make no attempt to prune this table; its memory requirements
+// should be trivial for the expected uses of this API.
+static map<pid_t, int> already_waited;
+
+// Internal utilities and replacements for system library routines
+// that may or may not exist.
+
+#ifndef HAVE_CLOSEFROM
+static void
+closefrom(int lowfd)
+{
+#ifdef F_CLOSEM
+ // Try F_CLOSEM if it's defined. But it might not work.
+ if (fcntl(lowfd, F_CLOSEM, 0) == 0)
+ return;
+#endif
+
+ // If /proc/self/fd is available, use it.
+ // N.B. Theoretically you are not allowed to use opendir() after fork()
+ // as it's not async-signal-safe. This is overwhelmingly unlikely to
+ // cause problems in practice.
+ DIR *dirp;
+ if ((dirp = opendir("/proc/self/fd")) != 0) {
+ struct dirent *dent;
+ char *endp;
+ while ((dent = readdir(dirp)) != NULL) {
+ unsigned long fd = strtoul(dent->d_name, &endp, 10);
+ if (dent->d_name != endp && *endp == '\0' &&
+ fd < (unsigned long)INT_MAX &&
+ fd >= (unsigned long)lowfd &&
+ fd != (unsigned long)dirfd(dirp))
+ close((int)fd);
+ }
+ closedir(dirp);
+ return;
+ }
+
+ // As a last resort, blindly close all possible fd numbers
+ // between lowfd and _SC_OPEN_MAX.
+ unsigned long maxfd = sysconf(_SC_OPEN_MAX);
+ if (maxfd == (unsigned long)(-1L))
+ maxfd = OPEN_MAX;
+ for (unsigned long fd = lowfd; fd < maxfd; fd++)
+ close((int)fd);
+}
+#endif
+
+#ifndef HAVE_EXECVPE
+// Implementation courtesy FreeBSD 9.0 src/lib/libc/gen/exec.c
+// some adjustments made with reference to the glibc implementation
+static int
+execvpe(const char *name, char * const argv[], char * const envp[])
+{
+ const char *path;
+ const char *p, *q;
+ size_t lp, ln;
+ bool eacces = false;
+ char buf[PATH_MAX];
+
+ // If it's an empty path name, fail immediately.
+ if (*name == '\0') {
+ errno = ENOENT;
+ return -1;
+ }
+
+ // If it's an absolute or relative pathname, do not search $PATH.
+ if (strchr(name, '/')) {
+ execve(name, argv, envp);
+ return -1;
+ }
+ ln = strlen(name);
+
+ // Get the path to search. Intentionally uses the parent
+ // environment, not 'envp'.
+ if (!(path = getenv("PATH")))
+ path = _PATH_DEFPATH;
+
+ q = path;
+ do {
+ p = q;
+ while (*q != '\0' && *q != ':')
+ q++;
+
+ // Double, leading and trailing colons mean the current directory.
+ if (q == p) {
+ p = ".";
+ lp = 1;
+ } else
+ lp = q - p;
+ q++;
+
+ // If the path is too long, complain and skip it. This is a
+ // possible security issue; given a way to make the path too long
+ // the user may execute the wrong program.
+ if (lp + ln + 2 > sizeof(buf)) {
+ IGNORE_FAILURE(write(2, "execvpe: ", 8));
+ IGNORE_FAILURE(write(2, p, lp));
+ IGNORE_FAILURE(write(2, ": path too long\n", 16));
+ continue;
+ }
+
+ memcpy(buf, p, lp);
+ buf[lp] = '/';
+ memcpy(buf + lp + 1, name, ln);
+ buf[lp + ln + 1] = '\0';
+
+ execve(buf, argv, envp);
+ switch (errno) {
+ // These errors all indicate that we should try the next directory.
+ case EACCES:
+ // Remember that at least one failure was due to a permission check;
+ // this will be preferentially reported, unless we hit something even
+ // more serious.
+ eacces = true;
+ case ELOOP:
+ case ENAMETOOLONG:
+ case ENOENT:
+ case ENOTDIR:
+ case ESTALE:
+ case ETIMEDOUT:
+ continue;
+
+ default:
+ // On any other error, give up.
+ // Shell fallback for ENOEXEC deliberately removed, as it is a
+ // historical vestige and involves allocating memory.
+ return -1;
+ }
+ } while (*q);
+
+ if (eacces)
+ errno = EACCES;
+ return -1;
+}
+#endif
+
+/** Format <b>child_state</b> and <b>saved_errno</b> as a hex string placed in
+ * <b>hex_errno</b>. Called between fork and _exit, so must be signal-handler
+ * safe.
+ *
+ * <b>hex_errno</b> must have at least HEX_ERRNO_SIZE bytes available.
+ *
+ * The format of <b>hex_errno</b> is: "CHILD_STATE/ERRNO\n", left-padded
+ * with spaces. Note that there is no trailing \0. CHILD_STATE indicates where
+ * in the processs of starting the child process did the failure occur (see
+ * CHILD_STATE_* macros for definition), and SAVED_ERRNO is the value of
+ * errno when the failure occurred.
+ */
+static void
+format_helper_exit_status(unsigned char child_state, int saved_errno,
+ char *hex_errno)
+{
+ unsigned int unsigned_errno;
+ char *cur;
+ size_t i;
+
+ /* Fill hex_errno with spaces, and a trailing newline (memset may
+ not be signal handler safe, so we can't use it) */
+ for (i = 0; i < (HEX_ERRNO_SIZE - 1); i++)
+ hex_errno[i] = ' ';
+ hex_errno[HEX_ERRNO_SIZE - 1] = '\n';
+
+ /* Convert errno to be unsigned for hex conversion */
+ if (saved_errno < 0) {
+ unsigned_errno = (unsigned int) -saved_errno;
+ } else {
+ unsigned_errno = (unsigned int) saved_errno;
+ }
+
+ /* Convert errno to hex (start before \n) */
+ cur = hex_errno + HEX_ERRNO_SIZE - 2;
+
+ /* Check for overflow on first iteration of the loop */
+ if (cur < hex_errno)
+ return;
+
+ do {
+ *cur-- = "0123456789ABCDEF"[unsigned_errno % 16];
+ unsigned_errno /= 16;
+ } while (unsigned_errno != 0 && cur >= hex_errno);
+
+ /* Prepend the minus sign if errno was negative */
+ if (saved_errno < 0 && cur >= hex_errno)
+ *cur-- = '-';
+
+ /* Leave a gap */
+ if (cur >= hex_errno)
+ *cur-- = '/';
+
+ /* Check for overflow on first iteration of the loop */
+ if (cur < hex_errno)
+ return;
+
+ /* Convert child_state to hex */
+ do {
+ *cur-- = "0123456789ABCDEF"[child_state % 16];
+ child_state /= 16;
+ } while (child_state != 0 && cur >= hex_errno);
+}
+
+/** Start a program in the background. If <b>filename</b> contains a '/',
+ * then it will be treated as an absolute or relative path. Otherwise the
+ * system path will be searched for <b>filename</b>. The strings in
+ * <b>argv</b> will be passed as the command line arguments of the child
+ * program (following convention, argv[0] should normally be the filename of
+ * the executable), and the strings in <b>envp</b> will be passed as its
+ * environment variables.
+ *
+ * The child's standard input and output will both be /dev/null;
+ * the child's standard error will be whatever it is in the parent
+ * (unless it is closed in the parent, in which case it will also be
+ * /dev/null)
+ *
+ * All file descriptors numbered higher than 2 will be closed.
+ *
+ * On success, returns the PID of the child; on failure, returns -1.
+ */
+static pid_t
+do_fork_exec(const char *const filename,
+ const char **argv,
+ const char **envp)
+{
+ pid_t pid = fork();
+
+ if (pid == -1) {
+ log_warn("Failed to fork child process: %s", strerror(errno));
+ return -1;
+ }
+
+ if (pid != 0) {
+ // In parent.
+ // If we spawn a child, wait for it, the PID counter wraps
+ // completely around, and then we spawn another child which
+ // happens to get exactly the same PID as the first one, we had
+ // better remove the old record from the already_waited table or
+ // we won't ever actually wait for the new child. The odds of
+ // this are small, but not ridiculously small.
+ already_waited.erase(pid);
+ return pid;
+ }
+
+ // In child
+ char hex_errno[HEX_ERRNO_SIZE];
+ unsigned int child_state = CHILD_STATE_REDIRECT_STDIN;
+
+ close(0);
+ if (open("/dev/null", O_RDONLY) != 0)
+ goto error;
+
+ child_state = CHILD_STATE_REDIRECT_STDOUT;
+
+ close(1);
+ if (open("/dev/null", O_WRONLY) != 1)
+ goto error;
+
+ child_state = CHILD_STATE_REDIRECT_STDERR;
+ if (!isatty(2) && errno == EBADF) {
+ if (open("/dev/null", O_WRONLY) != 2)
+ goto error;
+ }
+
+ child_state = CHILD_STATE_CLOSEFROM;
+ closefrom(3);
+
+ child_state = CHILD_STATE_EXEC;
+
+ // We need the casts because execvpe doesn't declare argv or envp
+ // as const, even though it does not modify them.
+ execvpe(filename, (char *const *) argv, (char *const *)envp);
+
+ error:
+ format_helper_exit_status(child_state, errno, hex_errno);
+
+#define error_message "ERR: Failed to spawn child process: code "
+
+ IGNORE_FAILURE(write(2, error_message, sizeof error_message - 1));
+ IGNORE_FAILURE(write(2, hex_errno, sizeof hex_errno));
+
+#undef error_message
+
+ _exit(255);
+}
+
+// Wrapper: marshal the C++-y vector and map into the form the kernel
+// expects.
+static pid_t
+do_fork_exec(vector<string> const& args,
+ vector<string> const& env)
+{
+ char const* argv[args.size() + 1];
+ char const* envp[env.size() + 1];
+
+ for (size_t i = 0; i < args.size(); i++)
+ argv[i] = args[i].c_str();
+ argv[args.size()] = 0;
+
+ for (size_t i = 0; i < env.size(); i++)
+ envp[i] = env[i].c_str();
+ envp[env.size()] = 0;
+
+ return do_fork_exec(argv[0], argv, envp);
+}
+
+static void
+decode_status(int status, int& state, int& rc)
+{
+ if (WIFEXITED(status)) {
+ rc = WEXITSTATUS(status);
+ state = CLD_EXITED;
+ } else if (WIFSIGNALED(status)) {
+ rc = WTERMSIG(status);
+#ifdef WCOREDUMP
+ if (WCOREDUMP(status))
+ state = CLD_DUMPED;
+ else
+#endif
+ state = CLD_KILLED;
+ } else {
+ // we do not use WUNTRACED, WCONTINUED, or ptrace, so the other
+ // WIF* possibilities should never happen
+ log_abort("impossible wait status %04x", (unsigned int)status);
+ }
+}
+
+static bool
+wait_common(pid_t pid, int& state, int& rc, bool wnohang)
+{
+ if (pid == -1) {
+ // Map failure to fork into the same exit state that we get if
+ // there's a failure in between fork and exec.
+ state = CLD_EXITED;
+ rc = 255;
+ return true;
+ }
+
+ map<pid_t, int>::iterator p = already_waited.find(pid);
+ if (p != already_waited.end()) {
+ decode_status(p->second, state, rc);
+ return true;
+ }
+
+ int status;
+ pid_t rv = waitpid(pid, &status, wnohang ? WNOHANG : 0);
+ if (rv == pid) {
+ decode_status(status, state, rc);
+ already_waited.insert(std::make_pair(pid, status));
+ return true;
+ } else if (rv == 0 && wnohang) {
+ return false;
+ } else {
+ log_warn("waitpid(%d) failed: %s", pid, strerror(errno));
+ return false;
+ }
+}
+
+// subprocess methods
+
+subprocess::subprocess(vector<string> const& args,
+ vector<string> const& env)
+ : pid(do_fork_exec(args, env)),
+ state(0),
+ returncode(-1)
+{
+}
+
+subprocess
+subprocess::call(vector<string> const& args, vector<string> const& env)
+{
+ subprocess proc(args, env);
+ proc.wait();
+ return proc;
+}
+
+bool
+subprocess::poll()
+{
+ return wait_common(pid, state, returncode, true);
+}
+
+void
+subprocess::wait()
+{
+ wait_common(pid, state, returncode, false);
+}
+
+// public utilities
+
+vector<string>
+get_environ(const char *exclude)
+{
+ vector<string> result;
+ size_t exlen = exclude ? strlen(exclude) : 0;
+
+ for (char **p = environ; *p; p++)
+ if (!exclude || strncmp(exclude, *p, exlen))
+ result.push_back(*p);
+
+ return result;
+}
diff --git a/src/subprocess.h b/src/subprocess.h
new file mode 100644
index 0000000..95fe848
--- /dev/null
+++ b/src/subprocess.h
@@ -0,0 +1,68 @@
+/* Copyright 2012 SRI International
+ * See LICENSE for other credits and copying information
+ */
+
+#ifndef SUBPROCESS_H
+#define SUBPROCESS_H
+
+#include <string>
+#include <vector>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+// This API is inspired by the Python subprocess module. While it
+// could be extended to do everything that that does, at present it
+// does much less. If you add features, please consider matching
+// Python's presentation of same.
+
+#ifndef CLD_EXITED
+#define CLD_EXITED 1
+#endif
+#ifndef CLD_KILLED
+#define CLD_KILLED 2
+#endif
+#ifndef CLD_DUMPED
+#define CLD_DUMPED 3
+#endif
+
+struct subprocess
+{
+ // Start a new subprocess with argument vector |args| and environment
+ // vector |env|. stdin and stdout are /dev/null. stderr is inherited.
+ // All file descriptors numbered 3 and higher are closed.
+ // The current working directory is inherited.
+ subprocess(std::vector<std::string> const& args,
+ std::vector<std::string> const& env);
+
+ // Convenience: spawn a subprocess and wait for it to terminate.
+ static subprocess call(std::vector<std::string> const& args,
+ std::vector<std::string> const& env);
+
+ // Check whether the child process has terminated. Returns true if it
+ // has, false otherwise; sets 'state' and 'returncode'.
+ bool poll();
+
+ // Wait for the child process to terminate.
+ void wait();
+
+ // Process ID of the child. -1 on failure to spawn, in which case
+ // an error message has already been logged.
+ const pid_t pid;
+
+ // Child state, either 0 (running) or one of the <signal.h> constants
+ // CLD_EXITED, CLD_KILLED, or CLD_DUMPED.
+ int state;
+
+ // Exit status (if state == CLD_EXITED) or signal that terminated the
+ // process (if state == CLD_KILLED or CLD_DUMPED); -1 otherwise.
+ int returncode;
+};
+
+// Convert the global environment vector to a C++ vector.
+// If 'exclude' is not NULL, then any environment variable whose name
+// begins with those characters will be excluded from the result.
+extern std::vector<std::string> get_environ(const char *exclude = 0);
+
+#endif