mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-29 04:12:25 -05:00 
			
		
		
		
	[performance] massively improved ActivityPub delivery worker efficiency (#2812)
* add delivery worker type that pulls from queue to httpclient package
* finish up some code commenting, bodge a vendored activity library change, integrate the deliverypool changes into transportcontroller
* hook up queue deletion logic
* support deleting queued http requests by target ID
* don't index APRequest by hostname in the queue
* use gorun
* use the original context's values when wrapping msg type as delivery{}
* actually log in the AP delivery worker ...
* add uncommitted changes
* use errors.AsV2()
* use errorsv2.AsV2()
* finish adding some code comments, add bad host handling to delivery workers
* slightly tweak deliveryworkerpool API, use advanced sender multiplier
* remove PopCtx() method, let others instead rely on Wait()
* shuffle things around to move delivery stuff into transport/ subpkg
* remove dead code
* formatting
* validate request before queueing for delivery
* finish adding code comments, fix up backoff code
* finish adding more code comments
* clamp minimum no. senders to 1
* add start/stop logging to delivery worker, some slight changes
* remove double logging
* use worker ptrs
* expose the embedded log fields in httpclient.Request{}
* ensure request context values are preserved when updating ctx
* add delivery worker tests
* fix linter issues
* ensure delivery worker gets inited in testrig
* fix tests to delivering messages to check worker delivery queue
* update error type to use ptr instead of value receiver
* fix test calling Workers{}.Start() instead of testrig.StartWorkers()
* update docs for advanced-sender-multiplier
* update to the latest activity library version
* add comment about not using httptest.Server{}
	
	
This commit is contained in:
		
					parent
					
						
							
								15733cddb2
							
						
					
				
			
			
				commit
				
					
						a483bd9e38
					
				
			
		
					 32 changed files with 1285 additions and 374 deletions
				
			
		|  | @ -35,7 +35,6 @@ import ( | |||
| 	"codeberg.org/gruf/go-cache/v3" | ||||
| 	errorsv2 "codeberg.org/gruf/go-errors/v2" | ||||
| 	"codeberg.org/gruf/go-iotools" | ||||
| 	"codeberg.org/gruf/go-kv" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/log" | ||||
|  | @ -109,11 +108,13 @@ type Client struct { | |||
| 	client   http.Client | ||||
| 	badHosts cache.TTLCache[string, struct{}] | ||||
| 	bodyMax  int64 | ||||
| 	retries  uint | ||||
| } | ||||
| 
 | ||||
| // New returns a new instance of Client initialized using configuration. | ||||
| func New(cfg Config) *Client { | ||||
| 	var c Client | ||||
| 	c.retries = 5 | ||||
| 
 | ||||
| 	d := &net.Dialer{ | ||||
| 		Timeout:   15 * time.Second, | ||||
|  | @ -177,7 +178,7 @@ func New(cfg Config) *Client { | |||
| 	}} | ||||
| 
 | ||||
| 	// Initiate outgoing bad hosts lookup cache. | ||||
| 	c.badHosts = cache.NewTTL[string, struct{}](0, 1000, 0) | ||||
| 	c.badHosts = cache.NewTTL[string, struct{}](0, 512, 0) | ||||
| 	c.badHosts.SetTTL(time.Hour, false) | ||||
| 	if !c.badHosts.Start(time.Minute) { | ||||
| 		log.Panic(nil, "failed to start transport controller cache") | ||||
|  | @ -187,154 +188,184 @@ func New(cfg Config) *Client { | |||
| } | ||||
| 
 | ||||
| // Do will essentially perform http.Client{}.Do() with retry-backoff functionality. | ||||
| func (c *Client) Do(r *http.Request) (*http.Response, error) { | ||||
| 	return c.DoSigned(r, func(r *http.Request) error { | ||||
| 		return nil // no request signing | ||||
| 	}) | ||||
| } | ||||
| 
 | ||||
| // DoSigned will essentially perform http.Client{}.Do() with retry-backoff functionality and requesting signing.. | ||||
| func (c *Client) DoSigned(r *http.Request, sign SignFunc) (rsp *http.Response, err error) { | ||||
| 	const ( | ||||
| 		// max no. attempts. | ||||
| 		maxRetries = 5 | ||||
| 
 | ||||
| 		// starting backoff duration. | ||||
| 		baseBackoff = 2 * time.Second | ||||
| 	) | ||||
| func (c *Client) Do(r *http.Request) (rsp *http.Response, err error) { | ||||
| 
 | ||||
| 	// First validate incoming request. | ||||
| 	if err := ValidateRequest(r); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	// Get request hostname. | ||||
| 	host := r.URL.Hostname() | ||||
| 	// Wrap in our own request | ||||
| 	// type for retry-backoff. | ||||
| 	req := WrapRequest(r) | ||||
| 
 | ||||
| 	// Check whether request should fast fail. | ||||
| 	fastFail := gtscontext.IsFastfail(r.Context()) | ||||
| 	if !fastFail { | ||||
| 		// Check if recently reached max retries for this host | ||||
| 		// so we don't bother with a retry-backoff loop. The only | ||||
| 		// errors that are retried upon are server failure, TLS | ||||
| 		// and domain resolution type errors, so this cached result | ||||
| 		// indicates this server is likely having issues. | ||||
| 		fastFail = c.badHosts.Has(host) | ||||
| 		defer func() { | ||||
| 			if err != nil { | ||||
| 				// On error return mark as bad-host. | ||||
| 				c.badHosts.Set(host, struct{}{}) | ||||
| 			} | ||||
| 		}() | ||||
| 	if gtscontext.IsFastfail(r.Context()) { | ||||
| 		// If the fast-fail flag was set, just | ||||
| 		// attempt a single iteration instead of | ||||
| 		// following the below retry-backoff loop. | ||||
| 		rsp, _, err = c.DoOnce(&req) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("%w (fast fail)", err) | ||||
| 		} | ||||
| 		return rsp, nil | ||||
| 	} | ||||
| 
 | ||||
| 	// Start a log entry for this request | ||||
| 	l := log.WithContext(r.Context()). | ||||
| 		WithFields(kv.Fields{ | ||||
| 			{"method", r.Method}, | ||||
| 			{"url", r.URL.String()}, | ||||
| 		}...) | ||||
| 	for { | ||||
| 		var retry bool | ||||
| 
 | ||||
| 	for i := 0; i < maxRetries; i++ { | ||||
| 		var backoff time.Duration | ||||
| 		// Perform the http request. | ||||
| 		rsp, retry, err = c.DoOnce(&req) | ||||
| 		if err == nil { | ||||
| 			return rsp, nil | ||||
| 		} | ||||
| 
 | ||||
| 		l.Info("performing request") | ||||
| 		if !retry { | ||||
| 			// reached max retries, don't further backoff | ||||
| 			return nil, fmt.Errorf("%w (max retries)", err) | ||||
| 		} | ||||
| 
 | ||||
| 		// Perform the request. | ||||
| 		rsp, err = c.do(r) | ||||
| 		if err == nil { //nolint:gocritic | ||||
| 		// Start new backoff sleep timer. | ||||
| 		backoff := time.NewTimer(req.BackOff()) | ||||
| 
 | ||||
| 			// TooManyRequest means we need to slow | ||||
| 			// down and retry our request. Codes over | ||||
| 			// 500 generally indicate temp. outages. | ||||
| 			if code := rsp.StatusCode; code < 500 && | ||||
| 				code != http.StatusTooManyRequests { | ||||
| 				return rsp, nil | ||||
| 			} | ||||
| 		select { | ||||
| 		// Request ctx cancelled. | ||||
| 		case <-r.Context().Done(): | ||||
| 			backoff.Stop() | ||||
| 
 | ||||
| 			// Create loggable error from response status code. | ||||
| 			err = fmt.Errorf(`http response: %s`, rsp.Status) | ||||
| 			// Return context error. | ||||
| 			err = r.Context().Err() | ||||
| 			return nil, err | ||||
| 
 | ||||
| 			// Search for a provided "Retry-After" header value. | ||||
| 			if after := rsp.Header.Get("Retry-After"); after != "" { | ||||
| 		// Backoff for time. | ||||
| 		case <-backoff.C: | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 				// Get current time. | ||||
| 				now := time.Now() | ||||
| // DoOnce wraps an underlying http.Client{}.Do() to perform our wrapped request type: | ||||
| // rewinding response body to permit reuse, signing request data when SignFunc provided, | ||||
| // marking erroring hosts, updating retry attempt counts and setting backoff from header. | ||||
| func (c *Client) DoOnce(r *Request) (rsp *http.Response, retry bool, err error) { | ||||
| 	if r.attempts > c.retries { | ||||
| 		// Ensure request hasn't reached max number of attempts. | ||||
| 		err = fmt.Errorf("httpclient: reached max retries (%d)", c.retries) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 				if u, _ := strconv.ParseUint(after, 10, 32); u != 0 { | ||||
| 					// An integer number of backoff seconds was provided. | ||||
| 					backoff = time.Duration(u) * time.Second | ||||
| 				} else if at, _ := http.ParseTime(after); !at.Before(now) { | ||||
| 					// An HTTP formatted future date-time was provided. | ||||
| 					backoff = at.Sub(now) | ||||
| 				} | ||||
| 	// Update no. | ||||
| 	// attempts. | ||||
| 	r.attempts++ | ||||
| 
 | ||||
| 				// Don't let their provided backoff exceed our max. | ||||
| 				if max := baseBackoff * maxRetries; backoff > max { | ||||
| 					backoff = max | ||||
| 				} | ||||
| 			} | ||||
| 	// Reset backoff. | ||||
| 	r.backoff = 0 | ||||
| 
 | ||||
| 			// Close + unset rsp. | ||||
| 			_ = rsp.Body.Close() | ||||
| 			rsp = nil | ||||
| 	// Perform main routine. | ||||
| 	rsp, retry, err = c.do(r) | ||||
| 
 | ||||
| 		} else if errorsv2.IsV2(err, | ||||
| 	if rsp != nil { | ||||
| 		// Log successful rsp. | ||||
| 		r.Entry.Info(rsp.Status) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	// Log any errors. | ||||
| 	r.Entry.Error(err) | ||||
| 
 | ||||
| 	switch { | ||||
| 	case !retry: | ||||
| 		// If they were told not to | ||||
| 		// retry, also set number of | ||||
| 		// attempts to prevent retry. | ||||
| 		r.attempts = c.retries + 1 | ||||
| 
 | ||||
| 	case r.attempts > c.retries: | ||||
| 		// On max retries, mark this as | ||||
| 		// a "badhost", i.e. is erroring. | ||||
| 		c.badHosts.Set(r.Host, struct{}{}) | ||||
| 
 | ||||
| 		// Ensure retry flag is unset | ||||
| 		// when reached max attempts. | ||||
| 		retry = false | ||||
| 
 | ||||
| 	case c.badHosts.Has(r.Host): | ||||
| 		// When retry is still permitted, | ||||
| 		// check host hasn't been marked | ||||
| 		// as a "badhost", i.e. erroring. | ||||
| 		r.attempts = c.retries + 1 | ||||
| 		retry = false | ||||
| 	} | ||||
| 
 | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // do performs the "meat" of DoOnce(), but it's separated out to allow | ||||
| // easier wrapping of the response, retry, error returns with further logic. | ||||
| func (c *Client) do(r *Request) (rsp *http.Response, retry bool, err error) { | ||||
| 	// Perform the HTTP request. | ||||
| 	rsp, err = c.client.Do(r.Request) | ||||
| 	if err != nil { | ||||
| 
 | ||||
| 		if errorsv2.IsV2(err, | ||||
| 			context.DeadlineExceeded, | ||||
| 			context.Canceled, | ||||
| 			ErrBodyTooLarge, | ||||
| 			ErrReservedAddr, | ||||
| 		) { | ||||
| 			// Non-retryable errors. | ||||
| 			return nil, err | ||||
| 		} else if errstr := err.Error(); // nocollapse | ||||
| 			return nil, false, err | ||||
| 		} | ||||
| 
 | ||||
| 		if errstr := err.Error(); // | ||||
| 		strings.Contains(errstr, "stopped after 10 redirects") || | ||||
| 			strings.Contains(errstr, "tls: ") || | ||||
| 			strings.Contains(errstr, "x509: ") { | ||||
| 			// These error types aren't wrapped | ||||
| 			// so we have to check the error string. | ||||
| 			// All are unrecoverable! | ||||
| 			return nil, err | ||||
| 		} else if dnserr := (*net.DNSError)(nil); // nocollapse | ||||
| 		errors.As(err, &dnserr) && dnserr.IsNotFound { | ||||
| 			return nil, false, err | ||||
| 		} | ||||
| 
 | ||||
| 		if dnserr := errorsv2.AsV2[*net.DNSError](err); // | ||||
| 		dnserr != nil && dnserr.IsNotFound { | ||||
| 			// DNS lookup failure, this domain does not exist | ||||
| 			return nil, gtserror.SetNotFound(err) | ||||
| 			return nil, false, gtserror.SetNotFound(err) | ||||
| 		} | ||||
| 
 | ||||
| 		if fastFail { | ||||
| 			// on fast-fail, don't bother backoff/retry | ||||
| 			return nil, fmt.Errorf("%w (fast fail)", err) | ||||
| 		// A retryable error. | ||||
| 		return nil, true, err | ||||
| 
 | ||||
| 	} else if rsp.StatusCode > 500 || | ||||
| 		rsp.StatusCode == http.StatusTooManyRequests { | ||||
| 
 | ||||
| 		// Codes over 500 (and 429: too many requests) | ||||
| 		// are generally temporary errors. For these | ||||
| 		// we replace the response with a loggable error. | ||||
| 		err = fmt.Errorf(`http response: %s`, rsp.Status) | ||||
| 
 | ||||
| 		// Search for a provided "Retry-After" header value. | ||||
| 		if after := rsp.Header.Get("Retry-After"); after != "" { | ||||
| 
 | ||||
| 			// Get cur time. | ||||
| 			now := time.Now() | ||||
| 
 | ||||
| 			if u, _ := strconv.ParseUint(after, 10, 32); u != 0 { | ||||
| 				// An integer no. of backoff seconds was provided. | ||||
| 				r.backoff = time.Duration(u) * time.Second | ||||
| 			} else if at, _ := http.ParseTime(after); !at.Before(now) { | ||||
| 				// An HTTP formatted future date-time was provided. | ||||
| 				r.backoff = at.Sub(now) | ||||
| 			} | ||||
| 
 | ||||
| 			// Don't let their provided backoff exceed our max. | ||||
| 			if max := baseBackoff * time.Duration(c.retries); // | ||||
| 			r.backoff > max { | ||||
| 				r.backoff = max | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if backoff == 0 { | ||||
| 			// No retry-after found, set our predefined | ||||
| 			// backoff according to a multiplier of 2^n. | ||||
| 			backoff = baseBackoff * 1 << (i + 1) | ||||
| 		} | ||||
| 
 | ||||
| 		l.Errorf("backing off for %s after http request error: %v", backoff, err) | ||||
| 
 | ||||
| 		select { | ||||
| 		// Request ctx cancelled | ||||
| 		case <-r.Context().Done(): | ||||
| 			return nil, r.Context().Err() | ||||
| 
 | ||||
| 		// Backoff for some time | ||||
| 		case <-time.After(backoff): | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// Set error return to trigger setting "bad host". | ||||
| 	err = errors.New("transport reached max retries") | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // do wraps http.Client{}.Do() to provide safely limited response bodies. | ||||
| func (c *Client) do(req *http.Request) (*http.Response, error) { | ||||
| 	// Perform the HTTP request. | ||||
| 	rsp, err := c.client.Do(req) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 		// Unset + close rsp. | ||||
| 		_ = rsp.Body.Close() | ||||
| 		return nil, true, err | ||||
| 	} | ||||
| 
 | ||||
| 	// Seperate the body implementers. | ||||
|  | @ -364,11 +395,10 @@ func (c *Client) do(req *http.Request) (*http.Response, error) { | |||
| 
 | ||||
| 	// Check response body not too large. | ||||
| 	if rsp.ContentLength > c.bodyMax { | ||||
| 		_ = rsp.Body.Close() | ||||
| 		return nil, ErrBodyTooLarge | ||||
| 		return nil, false, ErrBodyTooLarge | ||||
| 	} | ||||
| 
 | ||||
| 	return rsp, nil | ||||
| 	return rsp, true, nil | ||||
| } | ||||
| 
 | ||||
| // cast discard writer to full interface it supports. | ||||
|  |  | |||
							
								
								
									
										69
									
								
								internal/httpclient/request.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								internal/httpclient/request.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,69 @@ | |||
| // GoToSocial | ||||
| // Copyright (C) GoToSocial Authors admin@gotosocial.org | ||||
| // SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| // | ||||
| // This program is free software: you can redistribute it and/or modify | ||||
| // it under the terms of the GNU Affero General Public License as published by | ||||
| // the Free Software Foundation, either version 3 of the License, or | ||||
| // (at your option) any later version. | ||||
| // | ||||
| // This program is distributed in the hope that it will be useful, | ||||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| // GNU Affero General Public License for more details. | ||||
| // | ||||
| // You should have received a copy of the GNU Affero General Public License | ||||
| // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| package httpclient | ||||
| 
 | ||||
| import ( | ||||
| 	"net/http" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/log" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	// starting backoff duration. | ||||
| 	baseBackoff = 2 * time.Second | ||||
| ) | ||||
| 
 | ||||
| // Request wraps an HTTP request | ||||
| // to add our own retry / backoff. | ||||
| type Request struct { | ||||
| 	// Current backoff dur. | ||||
| 	backoff time.Duration | ||||
| 
 | ||||
| 	// Delivery attempts. | ||||
| 	attempts uint | ||||
| 
 | ||||
| 	// log fields. | ||||
| 	log.Entry | ||||
| 
 | ||||
| 	// underlying request. | ||||
| 	*http.Request | ||||
| } | ||||
| 
 | ||||
| // WrapRequest wraps an existing http.Request within | ||||
| // our own httpclient.Request with retry / backoff tracking. | ||||
| func WrapRequest(r *http.Request) Request { | ||||
| 	var rr Request | ||||
| 	rr.Request = r | ||||
| 	rr.Entry = log.WithContext(r.Context()). | ||||
| 		WithField("method", r.Method). | ||||
| 		WithField("url", r.URL.String()). | ||||
| 		WithField("contentType", r.Header.Get("Content-Type")) | ||||
| 	return rr | ||||
| } | ||||
| 
 | ||||
| // GetBackOff returns the currently set backoff duration, | ||||
| // (using a default according to no. attempts if needed). | ||||
| func (r *Request) BackOff() time.Duration { | ||||
| 	if r.backoff <= 0 { | ||||
| 		// No backoff dur found, set our predefined | ||||
| 		// backoff according to a multiplier of 2^n. | ||||
| 		r.backoff = baseBackoff * 1 << (r.attempts + 1) | ||||
| 	} | ||||
| 	return r.backoff | ||||
| } | ||||
|  | @ -32,9 +32,7 @@ type SignFunc func(r *http.Request) error | |||
| // (RoundTripper implementer) to check request | ||||
| // context for a signing function and using for | ||||
| // all subsequent trips through RoundTrip(). | ||||
| type signingtransport struct { | ||||
| 	http.Transport // underlying transport | ||||
| } | ||||
| type signingtransport struct{ http.Transport } | ||||
| 
 | ||||
| func (t *signingtransport) RoundTrip(r *http.Request) (*http.Response, error) { | ||||
| 	// Ensure updated host always set. | ||||
|  |  | |||
|  | @ -38,7 +38,7 @@ func ValidateRequest(r *http.Request) error { | |||
| 		return fmt.Errorf("%w: empty url host", ErrInvalidRequest) | ||||
| 	case r.URL.Scheme != "http" && r.URL.Scheme != "https": | ||||
| 		return fmt.Errorf("%w: unsupported protocol %q", ErrInvalidRequest, r.URL.Scheme) | ||||
| 	case strings.IndexFunc(r.Method, func(r rune) bool { return !httpguts.IsTokenRune(r) }) != -1: | ||||
| 	case strings.IndexFunc(r.Method, isNotTokenRune) != -1: | ||||
| 		return fmt.Errorf("%w: invalid method %q", ErrInvalidRequest, r.Method) | ||||
| 	} | ||||
| 
 | ||||
|  | @ -60,3 +60,8 @@ func ValidateRequest(r *http.Request) error { | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // isNotTokenRune wraps IsTokenRune to inverse result. | ||||
| func isNotTokenRune(r rune) bool { | ||||
| 	return !httpguts.IsTokenRune(r) | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue