commit 3ca55ce2498c8b946fcdcdd52539f2dd90c08ee1
Author: Zack Weinberg <zackw(a)cmu.edu>
Date: Mon Feb 13 16:32:08 2012 -0800
Finish bm-mcurl
---
scripts/bm-mcurl.c | 266 ++++++++++++++++++++++++++++++++++--------------
scripts/tool_urlglob.c | 46 ++++++---
2 files changed, 219 insertions(+), 93 deletions(-)
diff --git a/scripts/bm-mcurl.c b/scripts/bm-mcurl.c
index ac24f3a..890b9e4 100644
--- a/scripts/bm-mcurl.c
+++ b/scripts/bm-mcurl.c
@@ -15,6 +15,8 @@
#include <stdbool.h>
#include <stddef.h>
+#include <errno.h>
+#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
@@ -23,18 +25,73 @@
#include <curl/curl.h>
#include "tool_urlglob.h"
-#define NORETURN __attribute__((noreturn))
+#define NORETURN void __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
static bool verbose = false;
+static inline double
+timevaldiff(const struct timeval *start, const struct timeval *finish)
+{
+ double s = finish->tv_sec - start->tv_sec;
+ s += ((double)(finish->tv_usec - start->tv_usec)) / 1.0e6;
+ return s;
+}
+
+struct url_iter
+{
+ char **upats;
+ URLGlob *uglob;
+ int nglob;
+};
+
+static inline struct url_iter
+url_prep(char **upats)
+{
+ struct url_iter it;
+ it.upats = upats;
+ it.uglob = NULL;
+ it.nglob = -1;
+ return it;
+}
+
+static char *
+url_next(struct url_iter *it)
+{
+ char *url;
+
+ if (!it->uglob) {
+ for (;;) {
+ if (!*it->upats)
+ return 0;
+ if (!glob_url(&it->uglob, *it->upats, &it->nglob, stderr)) {
+ if (verbose)
+ fprintf(stderr, "# %s\n", *it->upats);
+ break;
+ }
+ it->upats++;
+ }
+ }
+
+ if (glob_next_url(&url, it->uglob))
+ abort();
+ if (--it->nglob == 0) {
+ glob_cleanup(it->uglob);
+ it->uglob = 0;
+ it->upats++;
+ }
+ return url;
+}
+
static size_t
-discard_data(char *ptr, size_t size, size_t nmemb, void *userdata)
+discard_data(char *ptr UNUSED, size_t size, size_t nmemb, void *userdata UNUSED)
{
return size * nmemb;
}
static size_t
-read_abort(void *ptr, size_t size, size_t nmemb, void *userdata)
+read_abort(void *ptr UNUSED, size_t size UNUSED, size_t nmemb UNUSED,
+ void *userdata UNUSED)
{
/* we don't do anything that should require this to be called,
so if it does get called, something is wrong */
@@ -50,11 +107,11 @@ setup_curl_easy_handle(char *proxy)
#define SET_OR_CRASH(h, opt, param) \
do { if (curl_easy_setopt(h, opt, param)) abort(); } while (0)
- SET_OR_CRASH(h, CURLOPT_VERBOSE, (unsigned long)verbose);
+ /*SET_OR_CRASH(h, CURLOPT_VERBOSE, (unsigned long)verbose);*/
SET_OR_CRASH(h, CURLOPT_NOPROGRESS, 1L);
SET_OR_CRASH(h, CURLOPT_FAILONERROR, 1L);
SET_OR_CRASH(h, CURLOPT_USERAGENT, "bm-mcurl/0.1");
- SET_OR_CRASH(h, CURLOPT_ACCEPT_ENCODING, "");
+ SET_OR_CRASH(h, CURLOPT_ENCODING, "");
SET_OR_CRASH(h, CURLOPT_AUTOREFERER, 1L);
SET_OR_CRASH(h, CURLOPT_FOLLOWLOCATION, 1L);
SET_OR_CRASH(h, CURLOPT_MAXREDIRS, 30L);
@@ -69,99 +126,148 @@ setup_curl_easy_handle(char *proxy)
SET_OR_CRASH(h, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
}
#undef SET_OR_CRASH
+
+ return h;
}
-static bool
-process_events_once(CURLM *multi, unsigned long timeout_max)
+static void
+process_urls(struct url_iter *it, CURLM *multi, CURL **handles,
+ unsigned long limit, double interval)
{
- struct timeval tv;
- int rc; /* select() return code */
-
+ struct timeval last, now, timeout;
fd_set fdread;
fd_set fdwrite;
fd_set fdexcept;
int maxfd = -1;
-
- unsigned long timeout = 1000000; /* one second - ultimate default */
- long curl_tout_ms = -1;
-
- /* get fd sets for all pending transfers */
- FD_ZERO(&fdread);
- FD_ZERO(&fdwrite);
- FD_ZERO(&fdexcept);
- curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcept, &maxfd);
-
- /* timeout */
- if (timeout_max > 0 && timeout_max < timeout)
- timeout = timeout_max;
-
- curl_multi_timeout(multi_handle, &curl_tout_ms);
-
- if (curl_tout_ms >= 0) {
- unsigned long curl_tout_us = ((unsigned long)curl_tout_ms) * 1000;
- if (timeout > curl_tout_us)
- timeout = curl_tout_us;
+ int rc;
+ int still_running;
+ int dummy;
+ unsigned long maxh = 0;
+ unsigned long i;
+ CURLMsg *msg;
+ CURL *h;
+ char *url;
+ double d_timeout;
+ long curl_timeout;
+ bool no_more_urls = false;
+
+ last.tv_sec = 0;
+ last.tv_usec = 0;
+
+ for (;;) {
+ /* possibly queue another URL for download */
+ if (!no_more_urls) {
+ gettimeofday(&now, 0);
+ if (timevaldiff(&last, &now) >= interval && maxh < limit) {
+ last = now;
+ url = url_next(it);
+ if (url) {
+ if (curl_easy_setopt(handles[maxh], CURLOPT_URL, url) != CURLM_OK)
+ abort();
+ if (curl_multi_add_handle(multi, handles[maxh]) != CURLM_OK)
+ abort();
+ maxh++;
+ free(url); /* curl takes a copy */
+ } else
+ no_more_urls = true;
+ }
+ }
+
+ /* call curl_multi_perform as many times as it wants */
+ again:
+ switch (curl_multi_perform(multi, &still_running)) {
+ case CURLM_OK: break;
+ case CURLM_CALL_MULTI_PERFORM: goto again;
+ default:
+ abort();
+ }
+ if (no_more_urls && still_running == 0)
+ break;
+
+ /* clean up finished downloads */
+ while ((msg = curl_multi_info_read(multi, &dummy))) {
+ if (msg->msg != CURLMSG_DONE)
+ abort(); /* no other messages are defined as of Feb 2012 */
+ h = msg->easy_handle;
+ if (verbose) {
+ double rqtime = 0.0;
+ char *url = "<?>";
+ curl_easy_getinfo(h, CURLINFO_EFFECTIVE_URL, &url);
+ curl_easy_getinfo(h, CURLINFO_TOTAL_TIME, &rqtime);
+ fprintf(stderr, "%f %s\n", rqtime, url);
+ }
+ if (curl_multi_remove_handle(multi, h) != CURLM_OK)
+ abort();
+ for (i = 0; i < maxh; i++) {
+ if (handles[i] == h)
+ goto found;
+ }
+ abort();
+ found:
+ /* shuffle 'h' to the beginning of the set of handles not
+ currently in use */
+ handles[i] = handles[--maxh];
+ handles[maxh] = h;
+ }
+
+ /* wait for external event or timeout */
+ FD_ZERO(&fdread);
+ FD_ZERO(&fdwrite);
+ FD_ZERO(&fdexcept);
+ curl_multi_fdset(multi, &fdread, &fdwrite, &fdexcept, &maxfd);
+
+ curl_multi_timeout(multi, &curl_timeout);
+ if (curl_timeout >= 0)
+ d_timeout = ((double)curl_timeout) / 1000.0;
+ else
+ d_timeout = 1;
+ if (d_timeout > interval)
+ d_timeout = interval;
+
+ timeout.tv_sec = floor(d_timeout);
+ timeout.tv_usec = lrint((d_timeout - timeout.tv_sec) * 1e6);
+
+ do
+ rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &timeout);
+ while (rc == -1 && errno == EINTR);
+ if (rc == -1)
+ abort();
}
-
- tv.tv_sec = timeout / 1000000;
- if(tv.tv_sec >= 1)
- tv.tv_sec = 1;
- else
- tv.tv_usec = timeout % 1000000;
-
- do {
- rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &tv);
- } while (rc == -1 && errno == EINTR);
-
- if (rc > 0) {
- int still_running;
- curl_multi_perform(multi_handle, &still_running);
- return !!still_running;
- } else
- abort();
}
-/* Note: this function must not return until we are ready to start
- another connection. */
static void
-queue_one(CURLM *multi, unsigned long rate, unsigned long limit,
- char *proxy, char *url)
-{
-
-}
-
-static void
-run(unsigned long rate, unsigned long limit, char *proxy, char **urls)
+run(double interval, unsigned long limit, char *proxy, char **upats)
{
+ struct url_iter it;
CURLM *multi;
- curl_global_init();
+ CURL **handles;
+ unsigned long n;
+
+ curl_global_init(CURL_GLOBAL_ALL);
multi = curl_multi_init();
if (!multi) abort();
- for (char **upat = urls; *upat; url++) {
- URLGlob *uglob;
- int *n;
- if (glob_url(&uglob, *upat, &n, stderr))
- continue;
- do {
- char *url;
- if (glob_next_url(&url, uglob)) abort();
- queue_one(multi, rate, limit, proxy, url); /* takes ownership */
- } while (--n);
- glob_cleanup(uglob);
+ handles = calloc(limit, sizeof(CURL *));
+ for (n = 0; n < limit; n++) {
+ handles[n] = setup_curl_easy_handle(proxy);
}
- /* spin the event loop until all outstanding transfers complete */
- while (process_events_once(multi, 0));
+ it = url_prep(upats);
+ process_urls(&it, multi, handles, limit, interval);
+ for (n = 0; n < limit; n++) {
+ curl_easy_cleanup(handles[n]);
+ }
+ free(handles);
curl_multi_cleanup(multi);
+ curl_global_cleanup();
}
static NORETURN
usage(const char *av0, const char *complaint)
{
fprintf(stderr,
- "%s\nusage: %s [-v] rate limit proxy url [url...]\n",
+ "%s\nusage: %s [-v] cps limit proxy url [url...]\n",
complaint, av0);
exit(2);
}
@@ -169,7 +275,7 @@ usage(const char *av0, const char *complaint)
int
main(int argc, char **argv)
{
- unsigned long rate;
+ unsigned long cps;
unsigned long limit;
char *endp;
@@ -181,16 +287,20 @@ main(int argc, char **argv)
}
if (argc < 5)
- usage("not enough arguments");
+ usage(argv[0], "not enough arguments");
- rate = strtoul(argv[1], &endp, 10);
+ cps = strtoul(argv[1], &endp, 10);
if (endp == argv[1] || *endp)
- usage("rate must be a positive integer (connections per second)");
+ usage(argv[0], "cps must be a positive integer (connections per second)");
limit = strtoul(argv[2], &endp, 10);
if (endp == argv[2] || *endp)
- usage("limit must be a positive integer (max outstanding requests)");
+ usage(argv[0],
+ "limit must be a positive integer (max outstanding requests)");
+
+ if (limit == 0)
+ usage(argv[0], "minimum number of outstanding requests is 1");
- run(rate, limit, argv[3], argv+4);
+ run(1./(double)cps, limit, argv[3], argv+4);
return 0;
}
diff --git a/scripts/tool_urlglob.c b/scripts/tool_urlglob.c
index d714971..b160188 100644
--- a/scripts/tool_urlglob.c
+++ b/scripts/tool_urlglob.c
@@ -20,6 +20,8 @@
*
***************************************************************************/
+#define _XOPEN_SOURCE 600 /* strdup */
+
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
@@ -33,6 +35,20 @@ typedef enum {
GLOB_ERROR
} GlobCode;
+/* assumes ASCII */
+static inline bool
+ISALPHA(unsigned char c)
+{
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
+/* C90 guarantees '0' through '9' are consecutive */
+static inline bool
+ISDIGIT(unsigned char c)
+{
+ return c >= '0' && c <= '9';
+}
+
/*
* glob_word()
*
@@ -86,8 +102,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
if(!new_arr) {
short elem;
for(elem = 0; elem < pat->content.Set.size; elem++)
- Curl_safefree(pat->content.Set.elements[elem]);
- Curl_safefree(pat->content.Set.elements);
+ free(pat->content.Set.elements[elem]);
+ free(pat->content.Set.elements);
pat->content.Set.ptr_s = 0;
pat->content.Set.size = 0;
}
@@ -104,8 +120,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
if(!pat->content.Set.elements[pat->content.Set.size]) {
short elem;
for(elem = 0; elem < pat->content.Set.size; elem++)
- Curl_safefree(pat->content.Set.elements[elem]);
- Curl_safefree(pat->content.Set.elements);
+ free(pat->content.Set.elements[elem]);
+ free(pat->content.Set.elements);
pat->content.Set.ptr_s = 0;
pat->content.Set.size = 0;
snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n");
@@ -122,8 +138,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern,
if(res) {
short elem;
for(elem = 0; elem < pat->content.Set.size; elem++)
- Curl_safefree(pat->content.Set.elements[elem]);
- Curl_safefree(pat->content.Set.elements);
+ free(pat->content.Set.elements[elem]);
+ free(pat->content.Set.elements);
pat->content.Set.ptr_s = 0;
pat->content.Set.size = 0;
return res;
@@ -337,7 +353,7 @@ static GlobCode glob_word(URLGlob *glob, char *pattern,
}
if(res)
- Curl_safefree(glob->literal[litindex]);
+ free(glob->literal[litindex]);
return res;
}
@@ -361,7 +377,7 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
glob_expand = calloc(1, sizeof(URLGlob));
if(!glob_expand) {
- Curl_safefree(glob_buffer);
+ free(glob_buffer);
return CURLE_OUT_OF_MEMORY;
}
glob_expand->size = 0;
@@ -380,8 +396,8 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
glob_expand->errormsg);
}
/* it failed, we cleanup */
- Curl_safefree(glob_buffer);
- Curl_safefree(glob_expand);
+ free(glob_buffer);
+ free(glob_expand);
*urlnum = 1;
return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
}
@@ -397,7 +413,7 @@ void glob_cleanup(URLGlob* glob)
for(i = glob->size - 1; i < glob->size; --i) {
if(!(i & 1)) { /* even indexes contain literals */
- Curl_safefree(glob->literal[i/2]);
+ free(glob->literal[i/2]);
}
else { /* odd indexes contain sets or ranges */
if((glob->pattern[i/2].type == UPTSet) &&
@@ -405,14 +421,14 @@ void glob_cleanup(URLGlob* glob)
for(elem = glob->pattern[i/2].content.Set.size - 1;
elem >= 0;
--elem) {
- Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]);
+ free(glob->pattern[i/2].content.Set.elements[elem]);
}
- Curl_safefree(glob->pattern[i/2].content.Set.elements);
+ free(glob->pattern[i/2].content.Set.elements);
}
}
}
- Curl_safefree(glob->glob_buffer);
- Curl_safefree(glob);
+ free(glob->glob_buffer);
+ free(glob);
}
int glob_next_url(char **globbed, URLGlob *glob)