[tor-commits] [stegotorus/master] Finish bm-mcurl

zwol at torproject.org zwol at torproject.org
Fri Jul 20 23:17:06 UTC 2012


commit 3ca55ce2498c8b946fcdcdd52539f2dd90c08ee1
Author: Zack Weinberg <zackw at cmu.edu>
Date:   Mon Feb 13 16:32:08 2012 -0800

    Finish bm-mcurl
---
 scripts/bm-mcurl.c     |  266 ++++++++++++++++++++++++++++++++++--------------
 scripts/tool_urlglob.c |   46 ++++++---
 2 files changed, 219 insertions(+), 93 deletions(-)

diff --git a/scripts/bm-mcurl.c b/scripts/bm-mcurl.c
index ac24f3a..890b9e4 100644
--- a/scripts/bm-mcurl.c
+++ b/scripts/bm-mcurl.c
@@ -15,6 +15,8 @@
 #include <stdbool.h>
 #include <stddef.h>
 
+#include <errno.h>
+#include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
@@ -23,18 +25,73 @@
 #include <curl/curl.h>
 #include "tool_urlglob.h"
 
-#define NORETURN __attribute__((noreturn))
+#define NORETURN void __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
 
 static bool verbose = false;
 
+static inline double
+timevaldiff(const struct timeval *start, const struct timeval *finish)
+{
+  double s = finish->tv_sec - start->tv_sec;
+  s += ((double)(finish->tv_usec - start->tv_usec)) / 1.0e6;
+  return s;
+}
+
+struct url_iter
+{
+  char **upats;
+  URLGlob *uglob;
+  int nglob;
+};
+
+static inline struct url_iter
+url_prep(char **upats)
+{
+  struct url_iter it;
+  it.upats = upats;
+  it.uglob = NULL;
+  it.nglob = -1;
+  return it;
+}
+
+static char *
+url_next(struct url_iter *it)
+{
+  char *url;
+
+  if (!it->uglob) {
+    for (;;) {
+      if (!*it->upats)
+        return 0;
+      if (!glob_url(&it->uglob, *it->upats, &it->nglob, stderr)) {
+        if (verbose)
+          fprintf(stderr, "# %s\n", *it->upats);
+        break;
+      }
+      it->upats++;
+    }
+  }
+
+  if (glob_next_url(&url, it->uglob))
+    abort();
+  if (--it->nglob == 0) {
+    glob_cleanup(it->uglob);
+    it->uglob = 0;
+    it->upats++;
+  }
+  return url;
+}
+
 static size_t
-discard_data(char *ptr, size_t size, size_t nmemb, void *userdata)
+discard_data(char *ptr UNUSED, size_t size, size_t nmemb, void *userdata UNUSED)
 {
   return size * nmemb;
 }
 
 static size_t
-read_abort(void *ptr, size_t size, size_t nmemb, void *userdata)
+read_abort(void *ptr UNUSED, size_t size UNUSED, size_t nmemb UNUSED,
+           void *userdata UNUSED)
 {
   /* we don't do anything that should require this to be called,
      so if it does get called, something is wrong */
@@ -50,11 +107,11 @@ setup_curl_easy_handle(char *proxy)
 #define SET_OR_CRASH(h, opt, param) \
   do { if (curl_easy_setopt(h, opt, param)) abort(); } while (0)
 
-  SET_OR_CRASH(h, CURLOPT_VERBOSE,         (unsigned long)verbose);
+  /*SET_OR_CRASH(h, CURLOPT_VERBOSE,         (unsigned long)verbose);*/
   SET_OR_CRASH(h, CURLOPT_NOPROGRESS,      1L);
   SET_OR_CRASH(h, CURLOPT_FAILONERROR,     1L);
   SET_OR_CRASH(h, CURLOPT_USERAGENT,       "bm-mcurl/0.1");
-  SET_OR_CRASH(h, CURLOPT_ACCEPT_ENCODING, "");
+  SET_OR_CRASH(h, CURLOPT_ENCODING,        "");
   SET_OR_CRASH(h, CURLOPT_AUTOREFERER,     1L);
   SET_OR_CRASH(h, CURLOPT_FOLLOWLOCATION,  1L);
   SET_OR_CRASH(h, CURLOPT_MAXREDIRS,       30L);
@@ -69,99 +126,148 @@ setup_curl_easy_handle(char *proxy)
     SET_OR_CRASH(h, CURLOPT_PROXYTYPE,     CURLPROXY_SOCKS5_HOSTNAME);
   }
 #undef SET_OR_CRASH
+
+  return h;
 }
 
-static bool
-process_events_once(CURLM *multi, unsigned long timeout_max)
+static void
+process_urls(struct url_iter *it, CURLM *multi, CURL **handles,
+             unsigned long limit, double interval)
 {
-  struct timeval tv;
-  int rc; /* select() return code */
-
+  struct timeval last, now, timeout;
   fd_set fdread;
   fd_set fdwrite;
   fd_set fdexcept;
   int maxfd = -1;
-
-  unsigned long timeout = 1000000; /* one second - ultimate default */
-  long curl_tout_ms = -1;
-
-  /* get fd sets for all pending transfers */
-  FD_ZERO(&fdread);
-  FD_ZERO(&fdwrite);
-  FD_ZERO(&fdexcept);
-  curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcept, &maxfd);
-
-  /* timeout */
-  if (timeout_max > 0 && timeout_max < timeout)
-    timeout = timeout_max;
-
-  curl_multi_timeout(multi_handle, &curl_tout_ms);
-
-  if (curl_tout_ms >= 0) {
-    unsigned long curl_tout_us = ((unsigned long)curl_tout_ms) * 1000;
-    if (timeout > curl_tout_us)
-      timeout = curl_tout_us;
+  int rc;
+  int still_running;
+  int dummy;
+  unsigned long maxh = 0;
+  unsigned long i;
+  CURLMsg *msg;
+  CURL *h;
+  char *url;
+  double d_timeout;
+  long curl_timeout;
+  bool no_more_urls = false;
+
+  last.tv_sec = 0;
+  last.tv_usec = 0;
+
+  for (;;) {
+    /* possibly queue another URL for download */
+    if (!no_more_urls) {
+      gettimeofday(&now, 0);
+      if (timevaldiff(&last, &now) >= interval && maxh < limit) {
+        last = now;
+        url = url_next(it);
+        if (url) {
+          if (curl_easy_setopt(handles[maxh], CURLOPT_URL, url) != CURLM_OK)
+            abort();
+          if (curl_multi_add_handle(multi, handles[maxh]) != CURLM_OK)
+            abort();
+          maxh++;
+          free(url); /* curl takes a copy */
+        } else
+          no_more_urls = true;
+      }
+    }
+
+    /* call curl_multi_perform as many times as it wants */
+  again:
+    switch (curl_multi_perform(multi, &still_running)) {
+    case CURLM_OK: break;
+    case CURLM_CALL_MULTI_PERFORM: goto again;
+    default:
+      abort();
+    }
+    if (no_more_urls && still_running == 0)
+      break;
+
+    /* clean up finished downloads */
+    while ((msg = curl_multi_info_read(multi, &dummy))) {
+      if (msg->msg != CURLMSG_DONE)
+        abort(); /* no other messages are defined as of Feb 2012 */
+      h = msg->easy_handle;
+      if (verbose) {
+        double rqtime = 0.0;
+        char *url = "<?>";
+        curl_easy_getinfo(h, CURLINFO_EFFECTIVE_URL, &url);
+        curl_easy_getinfo(h, CURLINFO_TOTAL_TIME, &rqtime);
+        fprintf(stderr, "%f %s\n", rqtime, url);
+      }
+      if (curl_multi_remove_handle(multi, h) != CURLM_OK)
+        abort();
+      for (i = 0; i < maxh; i++) {
+        if (handles[i] == h)
+          goto found;
+      }
+      abort();
+    found:
+      /* shuffle 'h' to the beginning of the set of handles not
+         currently in use */
+      handles[i] = handles[--maxh];
+      handles[maxh] = h;
+    }
+
+    /* wait for external event or timeout */
+    FD_ZERO(&fdread);
+    FD_ZERO(&fdwrite);
+    FD_ZERO(&fdexcept);
+    curl_multi_fdset(multi, &fdread, &fdwrite, &fdexcept, &maxfd);
+
+    curl_multi_timeout(multi, &curl_timeout);
+    if (curl_timeout >= 0)
+      d_timeout = ((double)curl_timeout) / 1000.0;
+    else
+      d_timeout = 1;
+    if (d_timeout > interval)
+      d_timeout = interval;
+
+    timeout.tv_sec = floor(d_timeout);
+    timeout.tv_usec = lrint((d_timeout - timeout.tv_sec) * 1e6);
+
+    do
+      rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &timeout);
+    while (rc == -1 && errno == EINTR);
+    if (rc == -1)
+      abort();
   }
-
-  tv.tv_sec = timeout / 1000000;
-  if(tv.tv_sec >= 1)
-    tv.tv_sec = 1;
-  else
-    tv.tv_usec = timeout % 1000000;
-
-  do {
-    rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &tv);
-  } while (rc == -1 && errno == EINTR);
-
-  if (rc > 0) {
-    int still_running;
-    curl_multi_perform(multi_handle, &still_running);
-    return !!still_running;
-  } else
-    abort();
 }
 
-/* Note: this function must not return until we are ready to start
-   another connection. */
 static void
-queue_one(CURLM *multi, unsigned long rate, unsigned long limit,
-          char *proxy, char *url)
-{
-
-}
-
-static void
-run(unsigned long rate, unsigned long limit, char *proxy, char **urls)
+run(double interval, unsigned long limit, char *proxy, char **upats)
 {
+  struct url_iter it;
   CURLM *multi;
-  curl_global_init();
+  CURL **handles;
+  unsigned long n;
+
+  curl_global_init(CURL_GLOBAL_ALL);
   multi = curl_multi_init();
   if (!multi) abort();
 
-  for (char **upat = urls; *upat; url++) {
-    URLGlob *uglob;
-    int *n;
-    if (glob_url(&uglob, *upat, &n, stderr))
-      continue;
-    do {
-      char *url;
-      if (glob_next_url(&url, uglob)) abort();
-      queue_one(multi, rate, limit, proxy, url); /* takes ownership */
-    } while (--n);
-    glob_cleanup(uglob);
+  handles = calloc(limit, sizeof(CURL *));
+  for (n = 0; n < limit; n++) {
+    handles[n] = setup_curl_easy_handle(proxy);
   }
 
-  /* spin the event loop until all outstanding transfers complete */
-  while (process_events_once(multi, 0));
+  it = url_prep(upats);
+  process_urls(&it, multi, handles, limit, interval);
 
+  for (n = 0; n < limit; n++) {
+    curl_easy_cleanup(handles[n]);
+  }
+  free(handles);
   curl_multi_cleanup(multi);
+  curl_global_cleanup();
 }
 
 static NORETURN
 usage(const char *av0, const char *complaint)
 {
   fprintf(stderr,
-          "%s\nusage: %s [-v] rate limit proxy url [url...]\n",
+          "%s\nusage: %s [-v] cps limit proxy url [url...]\n",
           complaint, av0);
   exit(2);
 }
@@ -169,7 +275,7 @@ usage(const char *av0, const char *complaint)
 int
 main(int argc, char **argv)
 {
-  unsigned long rate;
+  unsigned long cps;
   unsigned long limit;
   char *endp;
 
@@ -181,16 +287,20 @@ main(int argc, char **argv)
   }
 
   if (argc < 5)
-    usage("not enough arguments");
+    usage(argv[0], "not enough arguments");
 
-  rate = strtoul(argv[1], &endp, 10);
+  cps = strtoul(argv[1], &endp, 10);
   if (endp == argv[1] || *endp)
-    usage("rate must be a positive integer (connections per second)");
+    usage(argv[0], "cps must be a positive integer (connections per second)");
 
   limit = strtoul(argv[2], &endp, 10);
   if (endp == argv[2] || *endp)
-    usage("limit must be a positive integer (max outstanding requests)");
+    usage(argv[0],
+          "limit must be a positive integer (max outstanding requests)");
+
+  if (limit == 0)
+    usage(argv[0], "minimum number of outstanding requests is 1");
 
-  run(rate, limit, argv[3], argv+4);
+  run(1./(double)cps, limit, argv[3], argv+4);
   return 0;
 }
diff --git a/scripts/tool_urlglob.c b/scripts/tool_urlglob.c
index d714971..b160188 100644
--- a/scripts/tool_urlglob.c
+++ b/scripts/tool_urlglob.c
@@ -20,6 +20,8 @@
  *
  ***************************************************************************/
 
+#define _XOPEN_SOURCE 600 /* strdup */
+
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
@@ -33,6 +35,20 @@ typedef enum {
   GLOB_ERROR
 } GlobCode;
 
+/* assumes ASCII */
+static inline bool
+ISALPHA(unsigned char c)
+{
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
+/* C90 guarantees '0' through '9' are consecutive */
+static inline bool
+ISDIGIT(unsigned char c)
+{
+  return c >= '0' && c <= '9';
+}
+
 /*
  * glob_word()
  *
@@ -86,8 +102,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
         if(!new_arr) {
           short elem;
           for(elem = 0; elem < pat->content.Set.size; elem++)
-            Curl_safefree(pat->content.Set.elements[elem]);
-          Curl_safefree(pat->content.Set.elements);
+            free(pat->content.Set.elements[elem]);
+          free(pat->content.Set.elements);
           pat->content.Set.ptr_s = 0;
           pat->content.Set.size = 0;
         }
@@ -104,8 +120,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
       if(!pat->content.Set.elements[pat->content.Set.size]) {
         short elem;
         for(elem = 0; elem < pat->content.Set.size; elem++)
-          Curl_safefree(pat->content.Set.elements[elem]);
-        Curl_safefree(pat->content.Set.elements);
+          free(pat->content.Set.elements[elem]);
+        free(pat->content.Set.elements);
         pat->content.Set.ptr_s = 0;
         pat->content.Set.size = 0;
         snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
@@ -122,8 +138,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
         if(res) {
           short elem;
           for(elem = 0; elem < pat->content.Set.size; elem++)
-            Curl_safefree(pat->content.Set.elements[elem]);
-          Curl_safefree(pat->content.Set.elements);
+            free(pat->content.Set.elements[elem]);
+          free(pat->content.Set.elements);
           pat->content.Set.ptr_s = 0;
           pat->content.Set.size = 0;
           return res;
@@ -337,7 +353,7 @@ static GlobCode glob_word(URLGlob *glob, char *pattern,
   }
 
   if(res)
-    Curl_safefree(glob->literal[litindex]);
+    free(glob->literal[litindex]);
 
   return res;
 }
@@ -361,7 +377,7 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
 
   glob_expand = calloc(1, sizeof(URLGlob));
   if(!glob_expand) {
-    Curl_safefree(glob_buffer);
+    free(glob_buffer);
     return CURLE_OUT_OF_MEMORY;
   }
   glob_expand->size = 0;
@@ -380,8 +396,8 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
               glob_expand->errormsg);
     }
     /* it failed, we cleanup */
-    Curl_safefree(glob_buffer);
-    Curl_safefree(glob_expand);
+    free(glob_buffer);
+    free(glob_expand);
     *urlnum = 1;
     return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
   }
@@ -397,7 +413,7 @@ void glob_cleanup(URLGlob* glob)
 
   for(i = glob->size - 1; i < glob->size; --i) {
     if(!(i & 1)) {     /* even indexes contain literals */
-      Curl_safefree(glob->literal[i/2]);
+      free(glob->literal[i/2]);
     }
     else {              /* odd indexes contain sets or ranges */
       if((glob->pattern[i/2].type == UPTSet) &&
@@ -405,14 +421,14 @@ void glob_cleanup(URLGlob* glob)
         for(elem = glob->pattern[i/2].content.Set.size - 1;
              elem >= 0;
              --elem) {
-          Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
+          free(glob->pattern[i/2].content.Set.elements[elem]);
         }
-        Curl_safefree(glob->pattern[i/2].content.Set.elements);
+        free(glob->pattern[i/2].content.Set.elements);
       }
     }
   }
-  Curl_safefree(glob->glob_buffer);
-  Curl_safefree(glob);
+  free(glob->glob_buffer);
+  free(glob);
 }
 
 int glob_next_url(char **globbed, URLGlob *glob)





More information about the tor-commits mailing list