commit ff595f26a6be2c4ca58637e04c012b804e69617e
Author: David Fifield <david(a)bamsoftware.com>
Date: Sat May 10 17:41:22 2014 -0700
Retry the HTTP roundtrip a few times if it fails the first time.
Try sending a request up to 10 times, with 30 seconds in between each
try.
App Engine seems to return a run of 500 errors a few times a day, for
reasons that are not obvious. It appears that most of the time, just
trying a request again after a few seconds makes it start working again.
Previously, we were giving up on a circuit the first time a request
failed.
Retrying a request doesn't make conceptual sense, because we don't know
if the remote server received the bytes we sent already. (We don't know
whether the error happened on the way out or on the way back.) But it
seems that in practice the error usually happens on the way out.
Retrying a few times is working better for me for long-lived
connections. My system tor is getting disconnected from IRC only about
zero or one time a day, rather than the five or six times it was getting
without retries.
A retry looks like this:
2014/05/27 08:58:07 status code was 500, not 200; trying again after 30 seconds (9)
Occasionally all the retries will still fail. It looks like:
2014/05/28 00:02:21 status code was 500, not 200; trying again after 30 seconds (9)
2014/05/28 00:02:51 status code was 500, not 200; trying again after 30 seconds (8)
2014/05/28 00:03:22 status code was 500, not 200; trying again after 30 seconds (7)
2014/05/28 00:03:54 status code was 500, not 200; trying again after 30 seconds (6)
2014/05/28 00:04:24 status code was 500, not 200; trying again after 30 seconds (5)
2014/05/28 00:04:54 status code was 500, not 200; trying again after 30 seconds (4)
2014/05/28 00:05:25 status code was 500, not 200; trying again after 30 seconds (3)
2014/05/28 00:05:55 status code was 500, not 200; trying again after 30 seconds (2)
2014/05/28 00:06:25 status code was 500, not 200; trying again after 30 seconds (1)
2014/05/28 00:06:55 error in handling request: status code was 500, not 200
---
meek-client/meek-client.go | 45 +++++++++++++++++++++++++++++++++++---------
1 file changed, 36 insertions(+), 9 deletions(-)
diff --git a/meek-client/meek-client.go b/meek-client/meek-client.go
index 0093cc0..441348d 100644
--- a/meek-client/meek-client.go
+++ b/meek-client/meek-client.go
@@ -70,6 +70,10 @@ const (
// Geometric increase in the polling interval each time we fail to read
// data.
pollIntervalMultiplier = 1.5
+ // Try an HTTP roundtrip at most this many times.
+ maxTries = 10
+ // Wait this long between retries.
+ retryDelay = 30 * time.Second
// Safety limits on interaction with the HTTP helper.
maxHelperResponseLength = 10000000
helperReadTimeout = 60 * time.Second
@@ -127,23 +131,46 @@ func roundTripWithHTTP(buf []byte, info *RequestInfo) (*http.Response, error) {
return tr.RoundTrip(req)
}
-// Send the data in buf to the remote URL, wait for a reply, and feed the reply
-// body back into conn.
-func sendRecv(buf []byte, conn net.Conn, info *RequestInfo) (int64, error) {
+// Do a roundtrip, trying at most limit times if there is an HTTP status other
+// than 200. In case all tries result in error, returns the last error seen.
+//
+// Retrying the request immediately is a bit bogus, because we don't know if the
+// remote server received our bytes or not, so we may be sending duplicates,
+// which will cause the connection to die. The alternative, though, is to just
+// kill the connection immediately. A better solution would be a system of
+// acknowledgements so we know what to resend after an error.
+func roundTripRetries(buf []byte, info *RequestInfo, limit int) (*http.Response, error) {
roundTrip := roundTripWithHTTP
if options.HelperAddr != nil {
roundTrip = roundTripWithHelper
}
- resp, err := roundTrip(buf, info)
+ var resp *http.Response
+ var err error
+again:
+ limit--
+ resp, err = roundTrip(buf, info)
+ // Retry only if the HTTP roundtrip completed without error, but
+ // returned a status other than 200. Other kinds of errors and success
+ // with 200 always return immediately.
+ if err == nil && resp.StatusCode != http.StatusOK {
+ err = errors.New(fmt.Sprintf("status code was %d, not %d", resp.StatusCode, http.StatusOK))
+ if limit > 0 {
+ log.Printf("%s; trying again after %.f seconds (%d)", err, retryDelay.Seconds(), limit)
+ time.Sleep(retryDelay);
+ goto again
+ }
+ }
+ return resp, err
+}
+
+// Send the data in buf to the remote URL, wait for a reply, and feed the reply
+// body back into conn.
+func sendRecv(buf []byte, conn net.Conn, info *RequestInfo) (int64, error) {
+ resp, err := roundTripRetries(buf, info, maxTries)
if err != nil {
return 0, err
}
defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- return 0, errors.New(fmt.Sprintf("status code was %d, not %d", resp.StatusCode, http.StatusOK))
- }
-
return io.Copy(conn, io.LimitReader(resp.Body, maxPayloadLength))
}