commit 3ca55ce2498c8b946fcdcdd52539f2dd90c08ee1 Author: Zack Weinberg zackw@cmu.edu Date: Mon Feb 13 16:32:08 2012 -0800
Finish bm-mcurl --- scripts/bm-mcurl.c | 266 ++++++++++++++++++++++++++++++++++-------------- scripts/tool_urlglob.c | 46 ++++++--- 2 files changed, 219 insertions(+), 93 deletions(-)
diff --git a/scripts/bm-mcurl.c b/scripts/bm-mcurl.c index ac24f3a..890b9e4 100644 --- a/scripts/bm-mcurl.c +++ b/scripts/bm-mcurl.c @@ -15,6 +15,8 @@ #include <stdbool.h> #include <stddef.h>
+#include <errno.h> +#include <math.h> #include <stdlib.h> #include <string.h> #include <time.h> @@ -23,18 +25,73 @@ #include <curl/curl.h> #include "tool_urlglob.h"
-#define NORETURN __attribute__((noreturn)) +#define NORETURN void __attribute__((noreturn)) +#define UNUSED __attribute__((unused))
static bool verbose = false;
+static inline double +timevaldiff(const struct timeval *start, const struct timeval *finish) +{ + double s = finish->tv_sec - start->tv_sec; + s += ((double)(finish->tv_usec - start->tv_usec)) / 1.0e6; + return s; +} + +struct url_iter +{ + char **upats; + URLGlob *uglob; + int nglob; +}; + +static inline struct url_iter +url_prep(char **upats) +{ + struct url_iter it; + it.upats = upats; + it.uglob = NULL; + it.nglob = -1; + return it; +} + +static char * +url_next(struct url_iter *it) +{ + char *url; + + if (!it->uglob) { + for (;;) { + if (!*it->upats) + return 0; + if (!glob_url(&it->uglob, *it->upats, &it->nglob, stderr)) { + if (verbose) + fprintf(stderr, "# %s\n", *it->upats); + break; + } + it->upats++; + } + } + + if (glob_next_url(&url, it->uglob)) + abort(); + if (--it->nglob == 0) { + glob_cleanup(it->uglob); + it->uglob = 0; + it->upats++; + } + return url; +} + static size_t -discard_data(char *ptr, size_t size, size_t nmemb, void *userdata) +discard_data(char *ptr UNUSED, size_t size, size_t nmemb, void *userdata UNUSED) { return size * nmemb; }
static size_t -read_abort(void *ptr, size_t size, size_t nmemb, void *userdata) +read_abort(void *ptr UNUSED, size_t size UNUSED, size_t nmemb UNUSED, + void *userdata UNUSED) { /* we don't do anything that should require this to be called, so if it does get called, something is wrong */ @@ -50,11 +107,11 @@ setup_curl_easy_handle(char *proxy) #define SET_OR_CRASH(h, opt, param) \ do { if (curl_easy_setopt(h, opt, param)) abort(); } while (0)
- SET_OR_CRASH(h, CURLOPT_VERBOSE, (unsigned long)verbose); + /*SET_OR_CRASH(h, CURLOPT_VERBOSE, (unsigned long)verbose);*/ SET_OR_CRASH(h, CURLOPT_NOPROGRESS, 1L); SET_OR_CRASH(h, CURLOPT_FAILONERROR, 1L); SET_OR_CRASH(h, CURLOPT_USERAGENT, "bm-mcurl/0.1"); - SET_OR_CRASH(h, CURLOPT_ACCEPT_ENCODING, ""); + SET_OR_CRASH(h, CURLOPT_ENCODING, ""); SET_OR_CRASH(h, CURLOPT_AUTOREFERER, 1L); SET_OR_CRASH(h, CURLOPT_FOLLOWLOCATION, 1L); SET_OR_CRASH(h, CURLOPT_MAXREDIRS, 30L); @@ -69,99 +126,148 @@ setup_curl_easy_handle(char *proxy) SET_OR_CRASH(h, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); } #undef SET_OR_CRASH + + return h; }
-static bool -process_events_once(CURLM *multi, unsigned long timeout_max) +static void +process_urls(struct url_iter *it, CURLM *multi, CURL **handles, + unsigned long limit, double interval) { - struct timeval tv; - int rc; /* select() return code */ - + struct timeval last, now, timeout; fd_set fdread; fd_set fdwrite; fd_set fdexcept; int maxfd = -1; - - unsigned long timeout = 1000000; /* one second - ultimate default */ - long curl_tout_ms = -1; - - /* get fd sets for all pending transfers */ - FD_ZERO(&fdread); - FD_ZERO(&fdwrite); - FD_ZERO(&fdexcept); - curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcept, &maxfd); - - /* timeout */ - if (timeout_max > 0 && timeout_max < timeout) - timeout = timeout_max; - - curl_multi_timeout(multi_handle, &curl_tout_ms); - - if (curl_tout_ms >= 0) { - unsigned long curl_tout_us = ((unsigned long)curl_tout_ms) * 1000; - if (timeout > curl_tout_us) - timeout = curl_tout_us; + int rc; + int still_running; + int dummy; + unsigned long maxh = 0; + unsigned long i; + CURLMsg *msg; + CURL *h; + char *url; + double d_timeout; + long curl_timeout; + bool no_more_urls = false; + + last.tv_sec = 0; + last.tv_usec = 0; + + for (;;) { + /* possibly queue another URL for download */ + if (!no_more_urls) { + gettimeofday(&now, 0); + if (timevaldiff(&last, &now) >= interval && maxh < limit) { + last = now; + url = url_next(it); + if (url) { + if (curl_easy_setopt(handles[maxh], CURLOPT_URL, url) != CURLM_OK) + abort(); + if (curl_multi_add_handle(multi, handles[maxh]) != CURLM_OK) + abort(); + maxh++; + free(url); /* curl takes a copy */ + } else + no_more_urls = true; + } + } + + /* call curl_multi_perform as many times as it wants */ + again: + switch (curl_multi_perform(multi, &still_running)) { + case CURLM_OK: break; + case CURLM_CALL_MULTI_PERFORM: goto again; + default: + abort(); + } + if (no_more_urls && still_running == 0) + break; + + /* clean up finished downloads */ + while ((msg = curl_multi_info_read(multi, &dummy))) { + if (msg->msg != CURLMSG_DONE) + abort(); /* no other messages are defined as of Feb 2012 */ + h = msg->easy_handle; + if (verbose) { + double rqtime = 0.0; + char *url = "<?>"; + curl_easy_getinfo(h, CURLINFO_EFFECTIVE_URL, &url); + curl_easy_getinfo(h, CURLINFO_TOTAL_TIME, &rqtime); + fprintf(stderr, "%f %s\n", rqtime, url); + } + if (curl_multi_remove_handle(multi, h) != CURLM_OK) + abort(); + for (i = 0; i < maxh; i++) { + if (handles[i] == h) + goto found; + } + abort(); + found: + /* shuffle 'h' to the beginning of the set of handles not + currently in use */ + handles[i] = handles[--maxh]; + handles[maxh] = h; + } + + /* wait for external event or timeout */ + FD_ZERO(&fdread); + FD_ZERO(&fdwrite); + FD_ZERO(&fdexcept); + curl_multi_fdset(multi, &fdread, &fdwrite, &fdexcept, &maxfd); + + curl_multi_timeout(multi, &curl_timeout); + if (curl_timeout >= 0) + d_timeout = ((double)curl_timeout) / 1000.0; + else + d_timeout = 1; + if (d_timeout > interval) + d_timeout = interval; + + timeout.tv_sec = floor(d_timeout); + timeout.tv_usec = lrint((d_timeout - timeout.tv_sec) * 1e6); + + do + rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &timeout); + while (rc == -1 && errno == EINTR); + if (rc == -1) + abort(); } - - tv.tv_sec = timeout / 1000000; - if(tv.tv_sec >= 1) - tv.tv_sec = 1; - else - tv.tv_usec = timeout % 1000000; - - do { - rc = select(maxfd+1, &fdread, &fdwrite, &fdexcept, &tv); - } while (rc == -1 && errno == EINTR); - - if (rc > 0) { - int still_running; - curl_multi_perform(multi_handle, &still_running); - return !!still_running; - } else - abort(); }
-/* Note: this function must not return until we are ready to start - another connection. */ static void -queue_one(CURLM *multi, unsigned long rate, unsigned long limit, - char *proxy, char *url) -{ - -} - -static void -run(unsigned long rate, unsigned long limit, char *proxy, char **urls) +run(double interval, unsigned long limit, char *proxy, char **upats) { + struct url_iter it; CURLM *multi; - curl_global_init(); + CURL **handles; + unsigned long n; + + curl_global_init(CURL_GLOBAL_ALL); multi = curl_multi_init(); if (!multi) abort();
- for (char **upat = urls; *upat; url++) { - URLGlob *uglob; - int *n; - if (glob_url(&uglob, *upat, &n, stderr)) - continue; - do { - char *url; - if (glob_next_url(&url, uglob)) abort(); - queue_one(multi, rate, limit, proxy, url); /* takes ownership */ - } while (--n); - glob_cleanup(uglob); + handles = calloc(limit, sizeof(CURL *)); + for (n = 0; n < limit; n++) { + handles[n] = setup_curl_easy_handle(proxy); }
- /* spin the event loop until all outstanding transfers complete */ - while (process_events_once(multi, 0)); + it = url_prep(upats); + process_urls(&it, multi, handles, limit, interval);
+ for (n = 0; n < limit; n++) { + curl_easy_cleanup(handles[n]); + } + free(handles); curl_multi_cleanup(multi); + curl_global_cleanup(); }
static NORETURN usage(const char *av0, const char *complaint) { fprintf(stderr, - "%s\nusage: %s [-v] rate limit proxy url [url...]\n", + "%s\nusage: %s [-v] cps limit proxy url [url...]\n", complaint, av0); exit(2); } @@ -169,7 +275,7 @@ usage(const char *av0, const char *complaint) int main(int argc, char **argv) { - unsigned long rate; + unsigned long cps; unsigned long limit; char *endp;
@@ -181,16 +287,20 @@ main(int argc, char **argv) }
if (argc < 5) - usage("not enough arguments"); + usage(argv[0], "not enough arguments");
- rate = strtoul(argv[1], &endp, 10); + cps = strtoul(argv[1], &endp, 10); if (endp == argv[1] || *endp) - usage("rate must be a positive integer (connections per second)"); + usage(argv[0], "cps must be a positive integer (connections per second)");
limit = strtoul(argv[2], &endp, 10); if (endp == argv[2] || *endp) - usage("limit must be a positive integer (max outstanding requests)"); + usage(argv[0], + "limit must be a positive integer (max outstanding requests)"); + + if (limit == 0) + usage(argv[0], "minimum number of outstanding requests is 1");
- run(rate, limit, argv[3], argv+4); + run(1./(double)cps, limit, argv[3], argv+4); return 0; } diff --git a/scripts/tool_urlglob.c b/scripts/tool_urlglob.c index d714971..b160188 100644 --- a/scripts/tool_urlglob.c +++ b/scripts/tool_urlglob.c @@ -20,6 +20,8 @@ * ***************************************************************************/
+#define _XOPEN_SOURCE 600 /* strdup */ + #include <stdbool.h> #include <stdlib.h> #include <string.h> @@ -33,6 +35,20 @@ typedef enum { GLOB_ERROR } GlobCode;
+/* assumes ASCII */ +static inline bool +ISALPHA(unsigned char c) +{ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +/* C90 guarantees '0' through '9' are consecutive */ +static inline bool +ISDIGIT(unsigned char c) +{ + return c >= '0' && c <= '9'; +} + /* * glob_word() * @@ -86,8 +102,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern, if(!new_arr) { short elem; for(elem = 0; elem < pat->content.Set.size; elem++) - Curl_safefree(pat->content.Set.elements[elem]); - Curl_safefree(pat->content.Set.elements); + free(pat->content.Set.elements[elem]); + free(pat->content.Set.elements); pat->content.Set.ptr_s = 0; pat->content.Set.size = 0; } @@ -104,8 +120,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern, if(!pat->content.Set.elements[pat->content.Set.size]) { short elem; for(elem = 0; elem < pat->content.Set.size; elem++) - Curl_safefree(pat->content.Set.elements[elem]); - Curl_safefree(pat->content.Set.elements); + free(pat->content.Set.elements[elem]); + free(pat->content.Set.elements); pat->content.Set.ptr_s = 0; pat->content.Set.size = 0; snprintf(glob->errormsg, sizeof(glob->errormsg), "out of memory\n"); @@ -122,8 +138,8 @@ static GlobCode glob_set(URLGlob *glob, char *pattern, if(res) { short elem; for(elem = 0; elem < pat->content.Set.size; elem++) - Curl_safefree(pat->content.Set.elements[elem]); - Curl_safefree(pat->content.Set.elements); + free(pat->content.Set.elements[elem]); + free(pat->content.Set.elements); pat->content.Set.ptr_s = 0; pat->content.Set.size = 0; return res; @@ -337,7 +353,7 @@ static GlobCode glob_word(URLGlob *glob, char *pattern, }
if(res) - Curl_safefree(glob->literal[litindex]); + free(glob->literal[litindex]);
return res; } @@ -361,7 +377,7 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error)
glob_expand = calloc(1, sizeof(URLGlob)); if(!glob_expand) { - Curl_safefree(glob_buffer); + free(glob_buffer); return CURLE_OUT_OF_MEMORY; } glob_expand->size = 0; @@ -380,8 +396,8 @@ int glob_url(URLGlob** glob, char* url, int *urlnum, FILE *error) glob_expand->errormsg); } /* it failed, we cleanup */ - Curl_safefree(glob_buffer); - Curl_safefree(glob_expand); + free(glob_buffer); + free(glob_expand); *urlnum = 1; return (res == GLOB_NO_MEM) ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; } @@ -397,7 +413,7 @@ void glob_cleanup(URLGlob* glob)
for(i = glob->size - 1; i < glob->size; --i) { if(!(i & 1)) { /* even indexes contain literals */ - Curl_safefree(glob->literal[i/2]); + free(glob->literal[i/2]); } else { /* odd indexes contain sets or ranges */ if((glob->pattern[i/2].type == UPTSet) && @@ -405,14 +421,14 @@ void glob_cleanup(URLGlob* glob) for(elem = glob->pattern[i/2].content.Set.size - 1; elem >= 0; --elem) { - Curl_safefree(glob->pattern[i/2].content.Set.elements[elem]); + free(glob->pattern[i/2].content.Set.elements[elem]); } - Curl_safefree(glob->pattern[i/2].content.Set.elements); + free(glob->pattern[i/2].content.Set.elements); } } } - Curl_safefree(glob->glob_buffer); - Curl_safefree(glob); + free(glob->glob_buffer); + free(glob); }
int glob_next_url(char **globbed, URLGlob *glob)