[tor-commits] [torsocks/master] Lookup symbols in libc in constructor

dgoulet at torproject.org dgoulet at torproject.org
Fri Apr 4 22:40:26 UTC 2014


commit d0f4415714af4d36f019ba50a3d368e532303010
Author: David Goulet <dgoulet at ev0ke.net>
Date:   Thu Oct 31 15:52:09 2013 -0400

    Lookup symbols in libc in constructor
    
    This is to make sure that before main() we get the libc pointer from the
    system library.
    
    This commit also adds mmap/munmap support for syscall(). A comment in
    lib/syscall.c explains for what specific case this is needed.
    
    Finally, stop looking up symbols that were already looked up in the
    constructor.
    
    Signed-off-by: David Goulet <dgoulet at ev0ke.net>
---
 configure.ac        |  153 ++++++++-------------------------------------------
 src/common/compat.h |   10 ++++
 src/lib/close.c     |    5 +-
 src/lib/connect.c   |    3 -
 src/lib/socket.c    |    3 -
 src/lib/syscall.c   |   70 ++++++++++++++++++++---
 src/lib/torsocks.c  |   39 +++++++++++--
 7 files changed, 132 insertions(+), 151 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6c9c5fd..23fc6c8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -65,26 +65,6 @@ AC_CHECK_FUNCS(strcspn strdup strerror strspn strtol mmap strcasecmp \
 #    in torsocks.c and elsewhere is platform-dependent.
 ##############################################################################
 
-dnl First find the library that contains connect() (obviously
-dnl the most important library for us). Once we've found it
-dnl we chuck it on the end of LIBS, that lib may end up there
-dnl more than once (since we do our search with an empty libs
-dnl list) but that isn't a problem
-OLDLIBS="${LIBS}"
-LIBS=
-CONNECTLIB=
-for LIB in c socket; do
-  AC_CHECK_LIB("${LIB}",connect,[
-    CONNECTLIB="${LIB}"
-    break
-  ],)
-done
-LIBS="${OLDLIBS} -l${CONNECTLIB}"
-if test "${CONNECTLIB}" = ""; then
-  AC_MSG_ERROR('Could not find library containing connect()')
-fi
-
-
 dnl Check for socket
 AC_CHECK_FUNC(socket,, [
   AC_CHECK_LIB(socket, socket,,AC_MSG_ERROR("socket function not found"))])
@@ -131,124 +111,39 @@ case $host in
     ;;
 esac
 
-
-##############################################################################
-# 3. Check if we need to use --enable-oldmethod, regardless of what was
-#    given on the ./configure command line.
-##############################################################################
-
-dnl If we're using gcc here define _GNU_SOURCE
-AC_MSG_CHECKING(for RTLD_NEXT from dlfcn.h)
-AC_EGREP_CPP(yes,
-[
- #include <dlfcn.h>
- #ifdef RTLD_NEXT
-	yes
- #endif
-], [
-  AC_MSG_RESULT(yes)
-], [
-  AC_MSG_RESULT(no)
-  AC_MSG_CHECKING(for RTLD_NEXT from dlfcn.h with _GNU_SOURCE)
-  AC_EGREP_CPP(yes,
-  [
-   #define _GNU_SOURCE
-   #include <dlfcn.h>
-   #ifdef RTLD_NEXT
-	yes
-   #endif
-  ], [
-    AC_MSG_RESULT(yes)
-    AC_DEFINE([USE_GNU_SOURCE],[],[Description])
-  ], [
-    AC_MSG_RESULT(no)
-    AC_DEFINE([USE_OLD_DLSYM],[],[Description])
-    oldmethod="yes"
-  ])   
-])
-
 if test "x${enable_envconf}" = "x"; then
   AC_DEFINE([ALLOW_ENV_CONFIG],[],[Description])
 fi
 
-##############################################################################
-# 3. If --enable-oldmethod was requested, perform the necessary checks
-##############################################################################
+dnl Check that find is available, it should be somehere
+dnl in the path
+AC_CHECK_PROG(FIND, find, find)
+if test "${FIND}" = ""; then
+	AC_MSG_ERROR('find not found in path')
+fi
 
-if test "${enable_oldmethod}" = "yes"; then
-  AC_DEFINE([USE_OLD_DLSYM],[],[Description])
-  oldmethod="yes"
+dnl Find tail, it should always be somewhere in the path
+dnl but for safety's sake
+AC_CHECK_PROG(TAIL, tail, tail)
+if test "${TAIL}" = ""; then
+	AC_MSG_ERROR('tail not found in path')
 fi
 
-dnl If we have to use the old method of overriding connect (i.e no
-dnl RTLD_NEXT) we need to know the location of the library that
-dnl contains connect(), select(), poll() and close()
-if test "${oldmethod}" = "yes"; then
-  dnl We need to find the path to the library, to do
-  dnl this we use find on the usual suspects, i.e /lib and
-  dnl /usr/lib
-
-  dnl Check that find is available, it should be somehere
-  dnl in the path
-  AC_CHECK_PROG(FIND, find, find)
-  if test "${FIND}" = ""; then
-    AC_MSG_ERROR('find not found in path')
-  fi
-
-  dnl Find tail, it should always be somewhere in the path
-  dnl but for safety's sake
-  AC_CHECK_PROG(TAIL, tail, tail)
-  if test "${TAIL}" = ""; then
-    AC_MSG_ERROR('tail not found in path')
-  fi
-
-  dnl Now find the library we need
-  AC_MSG_CHECKING(location of lib${CONNECTLIB}.so)
-  LIBCONNECT=
-  for DIR in '/lib' '/usr/lib'; do
-    if test "${LIBCONNECT}" = ""; then
-      LIBCONNECT=`$FIND $DIR -name "lib${CONNECTLIB}.so.?" 2>/dev/null | $TAIL -1`
-    fi
-  done
-  AC_DEFINE_UNQUOTED([LIBCONNECT],["${LIBCONNECT}"],[Description])
-  if test "${LIBCONNECT}" = ""; then
-     AC_MSG_ERROR("not found!")
-  fi
-
-  AC_MSG_RESULT($LIBCONNECT)
-
-  dnl Now find the resolve library we need
-  AC_MSG_CHECKING(location of libresolv.so)
-  LIBRESOLV=
-  for DIR in '/lib' '/usr/lib'; do
-    if test "${LIBRESOLV}" = ""; then
-      LIBRESOLV=`$FIND $DIR -name "libresolv.so.?" 2>/dev/null | $TAIL -1`
-    fi
-  done
-  AC_DEFINE_UNQUOTED([LIBRESOLV],["${LIBRESOLV}"],[Description])
-  if test "${LIBRESOLV}" = ""; then
-     AC_MSG_ERROR("not found!")
-  fi
-
-  AC_MSG_RESULT($LIBRESOLV)
-
-  dnl close() should be in libc, find it
-  AC_MSG_CHECKING(location of libc.so)
-  LIBC=
-  for DIR in '/lib' '/usr/lib'; do
-    if test "${LIBC}" = ""; then
-      LIBC=`$FIND $DIR -name "libc.so.?" 2>/dev/null | $TAIL -1`
-    fi
-  done
-
-  AC_DEFINE_UNQUOTED([LIBC],["${LIBC}"],[Description])
-  if test "${LIBC}" = ""; then
-     AC_MSG_ERROR("not found!")
-  fi
-
-  AC_MSG_RESULT($LIBC)
+dnl Get libc full system path. Use prefix or some hardcoded standard
+dnl location on Unixish system.
+AC_MSG_CHECKING(location of libc.so)
+for DIR in "$prefix/lib" "$prefix/usr/lib" '/lib' '/usr/lib'; do
+	if test "${LIBC_PATH}" = ""; then
+		LIBC_PATH=`$FIND $DIR -name "libc.so.?" 2>/dev/null | $TAIL -1`
+	fi
+done
+AC_DEFINE_UNQUOTED([LIBC_PATH],["${LIBC_PATH}"],[Description])
+if test "${LIBC_PATH}" = ""; then
+	AC_MSG_ERROR("not found!")
 fi
 
+AC_MSG_RESULT($LIBC_PATH)
+
 ##############################################################################
 # 5. Determine how to preload libtorsocks.so on this system.
 #    On Linux this is with the LD_PRELOAD variable, on OSX
diff --git a/src/common/compat.h b/src/common/compat.h
index 8207fc0..2eacb35 100644
--- a/src/common/compat.h
+++ b/src/common/compat.h
@@ -69,10 +69,18 @@ void tsocks_mutex_unlock(tsocks_mutex_t *m);
 #ifndef __NR_close
 #define __NR_close -1
 #endif
+#ifndef __NR_mmap
+#define __NR_mmap -1
+#endif
+#ifndef __NR_munmap
+#define __NR_munmap -1
+#endif
 
 #define TSOCKS_NR_SOCKET    __NR_socket
 #define TSOCKS_NR_CONNECT   __NR_connect
 #define TSOCKS_NR_CLOSE     __NR_close
+#define TSOCKS_NR_MMAP      __NR_mmap
+#define TSOCKS_NR_MUNMAP    __NR_munmap
 
 #endif /* __linux__ */
 
@@ -84,6 +92,8 @@ void tsocks_mutex_unlock(tsocks_mutex_t *m);
 #define TSOCKS_NR_SOCKET    SYS_socket
 #define TSOCKS_NR_CONNECT   SYS_connect
 #define TSOCKS_NR_CLOSE     SYS_close
+#define TSOCKS_NR_MMAP      SYS_mmap
+#define TSOCKS_NR_MUNMAP    SYS_munmap
 
 #endif /* __FreeBSD__, __FreeBSD_kernel__, __darwin__ */
 
diff --git a/src/lib/close.c b/src/lib/close.c
index 699b93c..b8a6364 100644
--- a/src/lib/close.c
+++ b/src/lib/close.c
@@ -20,6 +20,9 @@
 
 #include "torsocks.h"
 
+/* close(2) */
+TSOCKS_LIBC_DECL(close, LIBC_CLOSE_RET_TYPE, LIBC_CLOSE_SIG)
+
 /*
  * Torsocks call for close(2).
  */
@@ -58,7 +61,5 @@ LIBC_CLOSE_RET_TYPE tsocks_close(LIBC_CLOSE_SIG)
  */
 LIBC_CLOSE_DECL
 {
-	tsocks_libc_close = tsocks_find_libc_symbol(LIBC_CLOSE_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
 	return tsocks_close(LIBC_CLOSE_ARGS);
 }
diff --git a/src/lib/connect.c b/src/lib/connect.c
index 3d75a94..bc3ac9d 100644
--- a/src/lib/connect.c
+++ b/src/lib/connect.c
@@ -143,8 +143,5 @@ error:
  */
 LIBC_CONNECT_DECL
 {
-	/* Find symbol if not already set. Exit if not found. */
-	tsocks_libc_connect = tsocks_find_libc_symbol(LIBC_CONNECT_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
 	return tsocks_connect(LIBC_CONNECT_ARGS);
 }
diff --git a/src/lib/socket.c b/src/lib/socket.c
index aa5297e..8a4c0d1 100644
--- a/src/lib/socket.c
+++ b/src/lib/socket.c
@@ -53,8 +53,5 @@ LIBC_SOCKET_RET_TYPE tsocks_socket(LIBC_SOCKET_SIG)
  */
 LIBC_SOCKET_DECL
 {
-	/* Find symbol if not already set. Exit if not found. */
-	tsocks_libc_socket = tsocks_find_libc_symbol(LIBC_SOCKET_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
 	return tsocks_socket(LIBC_SOCKET_ARGS);
 }
diff --git a/src/lib/syscall.c b/src/lib/syscall.c
index b06e2e3..674ee6e 100644
--- a/src/lib/syscall.c
+++ b/src/lib/syscall.c
@@ -17,6 +17,7 @@
 
 #include <assert.h>
 #include <stdarg.h>
+#include <sys/mman.h>
 
 #include <common/log.h>
 
@@ -25,9 +26,6 @@
 /* syscall(2) */
 TSOCKS_LIBC_DECL(syscall, LIBC_SYSCALL_RET_TYPE, LIBC_SYSCALL_SIG)
 
-/* close(2) */
-TSOCKS_LIBC_DECL(close, LIBC_CLOSE_RET_TYPE, LIBC_CLOSE_SIG)
-
 /*
  * Handle close syscall to be called with tsocks call.
  */
@@ -71,11 +69,45 @@ static LIBC_CONNECT_RET_TYPE handle_connect(va_list args)
 }
 
 /*
+ * Handle mmap(2) syscall.
+ */
+static LIBC_SYSCALL_RET_TYPE handle_mmap(va_list args)
+{
+	void *addr;
+	size_t len;
+	int prot, flags, fd;
+	off_t offset;
+
+	addr = va_arg(args, __typeof__(addr));
+	len = va_arg(args, __typeof__(len));
+	prot = va_arg(args, __typeof__(prot));
+	flags = va_arg(args, __typeof__(flags));
+	fd = va_arg(args, __typeof__(fd));
+	offset = va_arg(args, __typeof__(offset));
+
+	return (LIBC_SYSCALL_RET_TYPE) mmap(addr, len, prot, flags, fd, offset);
+}
+
+/*
+ * Handle munmap(2) syscall.
+ */
+static LIBC_SYSCALL_RET_TYPE handle_munmap(va_list args)
+{
+	void *addr;
+	size_t len;
+
+	addr = va_arg(args, __typeof__(addr));
+	len = va_arg(args, __typeof__(len));
+
+	return (LIBC_SYSCALL_RET_TYPE) munmap(addr, len);
+}
+
+/*
  * Torsocks call for syscall(2)
  */
 LIBC_SYSCALL_RET_TYPE tsocks_syscall(long int __number, va_list args)
 {
-	long int ret;
+	LIBC_SYSCALL_RET_TYPE ret;
 
 	DBG("[syscall] Syscall libc wrapper number %ld called", __number);
 
@@ -89,6 +121,32 @@ LIBC_SYSCALL_RET_TYPE tsocks_syscall(long int __number, va_list args)
 	case TSOCKS_NR_CLOSE:
 		ret = handle_close(args);
 		break;
+	case TSOCKS_NR_MMAP:
+		/*
+		 * The mmap/munmap syscall are handled here for a very specific case so
+		 * buckle up here for the explanation :).
+		 *
+		 * Considering an application that handles its own memory using a
+		 * malloc(2) hook for instance *AND* mmap() is called with syscall(),
+		 * we have to route the call to the libc in order to complete the
+		 * syscall() symbol lookup.
+		 *
+		 * The lookup process of the libdl (using dlsym(3)) calls at some point
+		 * malloc for a temporary buffer so we end up in this torsocks wrapper
+		 * when mmap() is called to create a new memory region for the
+		 * application (remember the malloc hook). When getting here, the libc
+		 * syscall() symbol is NOT yet populated because we are in the lookup
+		 * code path. For this, we directly call mmap/munmap using the libc so
+		 * the lookup can be completed.
+		 *
+		 * This crazy situation is present in Mozilla Firefox which handles its
+		 * own memory using mmap() called by syscall(). Same for munmap().
+		 */
+		ret = handle_mmap(args);
+		break;
+	case TSOCKS_NR_MUNMAP:
+		ret = handle_munmap(args);
+		break;
 	default:
 		/*
 		 * Deny call since we have no idea if this call can leak or not data
@@ -112,10 +170,6 @@ LIBC_SYSCALL_DECL
 	LIBC_SYSCALL_RET_TYPE ret;
 	va_list args;
 
-	/* Find symbol if not already set. Exit if not found. */
-	tsocks_libc_syscall = tsocks_find_libc_symbol(LIBC_SYSCALL_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
-
 	va_start(args, __number);
 	ret = tsocks_syscall(__number, args);
 	va_end(args);
diff --git a/src/lib/torsocks.c b/src/lib/torsocks.c
index 833a472..55ae98b 100644
--- a/src/lib/torsocks.c
+++ b/src/lib/torsocks.c
@@ -119,12 +119,39 @@ static void init_config(void)
  */
 static void init_libc_symbols(void)
 {
-	tsocks_libc_connect = tsocks_find_libc_symbol(LIBC_CONNECT_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
-	tsocks_libc_close = tsocks_find_libc_symbol(LIBC_CLOSE_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
-	tsocks_libc_socket = tsocks_find_libc_symbol(LIBC_SOCKET_NAME_STR,
-			TSOCKS_SYM_EXIT_NOT_FOUND);
+	int ret;
+	void *libc_ptr;
+
+	dlerror();
+	libc_ptr = dlopen(LIBC_PATH, RTLD_LAZY);
+	if (!libc_ptr) {
+		ERR("Unable to dlopen() library " LIBC_PATH "(%s)", dlerror());
+		goto error;
+	}
+
+	dlerror();
+	tsocks_libc_connect = dlsym(libc_ptr, LIBC_CONNECT_NAME_STR);
+	tsocks_libc_close = dlsym(libc_ptr, LIBC_CLOSE_NAME_STR);
+	tsocks_libc_socket = dlsym(libc_ptr, LIBC_SOCKET_NAME_STR);
+	tsocks_libc_syscall = dlsym(libc_ptr, LIBC_SYSCALL_NAME_STR);
+	if (!tsocks_libc_connect || !tsocks_libc_close || !tsocks_libc_socket
+			|| !tsocks_libc_syscall) {
+		ERR("Unable to lookup symbols in " LIBC_PATH "(%s)", dlerror());
+		goto error;
+	}
+
+	ret = dlclose(libc_ptr);
+	if (ret != 0) {
+		ERR("dlclose: %s", dlerror());
+	}
+	return;
+
+error:
+	ret = dlclose(libc_ptr);
+	if (ret != 0) {
+		ERR("dlclose: %s", dlerror());
+	}
+	clean_exit(EXIT_FAILURE);
 }
 
 /*





More information about the tor-commits mailing list