| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | /* | 
					
						
							|  |  |  |    GoToSocial | 
					
						
							|  |  |  |    Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  |    it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  |    the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  |    (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |    GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  |    along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package httpclient | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"errors" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"net" | 
					
						
							|  |  |  | 	"net/http" | 
					
						
							|  |  |  | 	"net/netip" | 
					
						
							|  |  |  | 	"runtime" | 
					
						
							|  |  |  | 	"time" | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	"codeberg.org/gruf/go-bytesize" | 
					
						
							|  |  |  | 	"codeberg.org/gruf/go-kv" | 
					
						
							|  |  |  | 	"github.com/cornelk/hashmap" | 
					
						
							|  |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/log" | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 13:38:41 +02:00
										 |  |  | // ErrInvalidRequest is returned if a given HTTP request is invalid and cannot be performed. | 
					
						
							|  |  |  | var ErrInvalidRequest = errors.New("invalid http request") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | // ErrReservedAddr is returned if a dialed address resolves to an IP within a blocked or reserved net. | 
					
						
							|  |  |  | var ErrReservedAddr = errors.New("dial within blocked / reserved IP range") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ErrBodyTooLarge is returned when a received response body is above predefined limit (default 40MB). | 
					
						
							|  |  |  | var ErrBodyTooLarge = errors.New("body size too large") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Config provides configuration details for setting up a new | 
					
						
							|  |  |  | // instance of httpclient.Client{}. Within are a subset of the | 
					
						
							|  |  |  | // configuration values passed to initialized http.Transport{} | 
					
						
							|  |  |  | // and http.Client{}, along with httpclient.Client{} specific. | 
					
						
							|  |  |  | type Config struct { | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	// MaxOpenConnsPerHost limits the max number of open connections to a host. | 
					
						
							|  |  |  | 	MaxOpenConnsPerHost int | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// MaxIdleConns: see http.Transport{}.MaxIdleConns. | 
					
						
							|  |  |  | 	MaxIdleConns int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// ReadBufferSize: see http.Transport{}.ReadBufferSize. | 
					
						
							|  |  |  | 	ReadBufferSize int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// WriteBufferSize: see http.Transport{}.WriteBufferSize. | 
					
						
							|  |  |  | 	WriteBufferSize int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// MaxBodySize determines the maximum fetchable body size. | 
					
						
							|  |  |  | 	MaxBodySize int64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Timeout: see http.Client{}.Timeout. | 
					
						
							|  |  |  | 	Timeout time.Duration | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// DisableCompression: see http.Transport{}.DisableCompression. | 
					
						
							|  |  |  | 	DisableCompression bool | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// AllowRanges allows outgoing communications to given IP nets. | 
					
						
							|  |  |  | 	AllowRanges []netip.Prefix | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// BlockRanges blocks outgoing communiciations to given IP nets. | 
					
						
							|  |  |  | 	BlockRanges []netip.Prefix | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Client wraps an underlying http.Client{} to provide the following: | 
					
						
							| 
									
										
										
										
											2022-09-28 18:30:40 +01:00
										 |  |  | //   - setting a maximum received request body size, returning error on | 
					
						
							|  |  |  | //     large content lengths, and using a limited reader in all other | 
					
						
							|  |  |  | //     cases to protect against forged / unknown content-lengths | 
					
						
							|  |  |  | //   - protection from server side request forgery (SSRF) by only dialing | 
					
						
							|  |  |  | //     out to known public IP prefixes, configurable with allows/blocks | 
					
						
							|  |  |  | //   - limit number of concurrent requests, else blocking until a slot | 
					
						
							|  |  |  | //     is available (context channels still respected) | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | type Client struct { | 
					
						
							|  |  |  | 	client http.Client | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	queue  *hashmap.Map[string, chan struct{}] | 
					
						
							|  |  |  | 	bmax   int64 // max response body size | 
					
						
							|  |  |  | 	cmax   int   // max open conns per host | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // New returns a new instance of Client initialized using configuration. | 
					
						
							|  |  |  | func New(cfg Config) *Client { | 
					
						
							|  |  |  | 	var c Client | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-18 11:25:26 +02:00
										 |  |  | 	d := &net.Dialer{ | 
					
						
							| 
									
										
										
										
											2022-11-23 22:40:07 +01:00
										 |  |  | 		Timeout:   15 * time.Second, | 
					
						
							| 
									
										
										
										
											2022-07-18 11:25:26 +02:00
										 |  |  | 		KeepAlive: 30 * time.Second, | 
					
						
							|  |  |  | 		Resolver:  &net.Resolver{}, | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	if cfg.MaxOpenConnsPerHost <= 0 { | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 		// By default base this value on GOMAXPROCS. | 
					
						
							|  |  |  | 		maxprocs := runtime.GOMAXPROCS(0) | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 		cfg.MaxOpenConnsPerHost = maxprocs * 20 | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if cfg.MaxIdleConns <= 0 { | 
					
						
							|  |  |  | 		// By default base this value on MaxOpenConns | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 		cfg.MaxIdleConns = cfg.MaxOpenConnsPerHost * 10 | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if cfg.MaxBodySize <= 0 { | 
					
						
							|  |  |  | 		// By default set this to a reasonable 40MB | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 		cfg.MaxBodySize = int64(40 * bytesize.MiB) | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Protect dialer with IP range sanitizer | 
					
						
							|  |  |  | 	d.Control = (&sanitizer{ | 
					
						
							|  |  |  | 		allow: cfg.AllowRanges, | 
					
						
							|  |  |  | 		block: cfg.BlockRanges, | 
					
						
							|  |  |  | 	}).Sanitize | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Prepare client fields | 
					
						
							|  |  |  | 	c.client.Timeout = cfg.Timeout | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	c.cmax = cfg.MaxOpenConnsPerHost | 
					
						
							|  |  |  | 	c.bmax = cfg.MaxBodySize | 
					
						
							|  |  |  | 	c.queue = hashmap.New[string, chan struct{}]() | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Set underlying HTTP client roundtripper | 
					
						
							|  |  |  | 	c.client.Transport = &http.Transport{ | 
					
						
							|  |  |  | 		Proxy:                 http.ProxyFromEnvironment, | 
					
						
							|  |  |  | 		ForceAttemptHTTP2:     true, | 
					
						
							|  |  |  | 		DialContext:           d.DialContext, | 
					
						
							|  |  |  | 		MaxIdleConns:          cfg.MaxIdleConns, | 
					
						
							|  |  |  | 		IdleConnTimeout:       90 * time.Second, | 
					
						
							|  |  |  | 		TLSHandshakeTimeout:   10 * time.Second, | 
					
						
							|  |  |  | 		ExpectContinueTimeout: 1 * time.Second, | 
					
						
							|  |  |  | 		ReadBufferSize:        cfg.ReadBufferSize, | 
					
						
							|  |  |  | 		WriteBufferSize:       cfg.WriteBufferSize, | 
					
						
							|  |  |  | 		DisableCompression:    cfg.DisableCompression, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return &c | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Do will perform given request when an available slot in the queue is available, | 
					
						
							|  |  |  | // and block until this time. For returned values, this follows the same semantics | 
					
						
							|  |  |  | // as the standard http.Client{}.Do() implementation except that response body will | 
					
						
							|  |  |  | // be wrapped by an io.LimitReader() to limit response body sizes. | 
					
						
							|  |  |  | func (c *Client) Do(req *http.Request) (*http.Response, error) { | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	// Get host's wait queue | 
					
						
							|  |  |  | 	wait := c.wait(req.Host) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var ok bool | 
					
						
							| 
									
										
										
										
											2022-08-27 12:00:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 	// Quickly try grab a spot | 
					
						
							| 
									
										
										
										
											2022-08-27 12:00:19 +02:00
										 |  |  | 	case wait <- struct{}{}: | 
					
						
							|  |  |  | 		// it's our turn! | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 		ok = true | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		// NOTE: | 
					
						
							|  |  |  | 		// Ideally here we would set the slot release to happen either | 
					
						
							|  |  |  | 		// on error return, or via callback from the response body closer. | 
					
						
							|  |  |  | 		// However when implementing this, there appear deadlocks between | 
					
						
							|  |  |  | 		// the channel queue here and the media manager worker pool. So | 
					
						
							|  |  |  | 		// currently we only place a limit on connections dialing out, but | 
					
						
							|  |  |  | 		// there may still be more connections open than len(c.queue) given | 
					
						
							|  |  |  | 		// that connections may not be closed until response body is closed. | 
					
						
							|  |  |  | 		// The current implementation will reduce the viability of denial of | 
					
						
							|  |  |  | 		// service attacks, but if there are future issues heed this advice :] | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 		defer func() { <-wait }() | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if !ok { | 
					
						
							|  |  |  | 		// No spot acquired, log warning | 
					
						
							|  |  |  | 		log.WithFields(kv.Fields{ | 
					
						
							|  |  |  | 			{K: "queue", V: len(wait)}, | 
					
						
							|  |  |  | 			{K: "method", V: req.Method}, | 
					
						
							|  |  |  | 			{K: "host", V: req.Host}, | 
					
						
							|  |  |  | 			{K: "uri", V: req.URL.RequestURI()}, | 
					
						
							|  |  |  | 		}...).Warn("full request queue") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		select { | 
					
						
							|  |  |  | 		case <-req.Context().Done(): | 
					
						
							|  |  |  | 			// the request was canceled before we | 
					
						
							|  |  |  | 			// got to our turn: no need to release | 
					
						
							|  |  |  | 			return nil, req.Context().Err() | 
					
						
							|  |  |  | 		case wait <- struct{}{}: | 
					
						
							|  |  |  | 			defer func() { <-wait }() | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 13:38:41 +02:00
										 |  |  | 	// Firstly, ensure this is a valid request | 
					
						
							|  |  |  | 	if err := ValidateRequest(req); err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-15 10:16:43 +01:00
										 |  |  | 	// Perform the HTTP request | 
					
						
							|  |  |  | 	rsp, err := c.client.Do(req) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Check response body not too large | 
					
						
							|  |  |  | 	if rsp.ContentLength > c.bmax { | 
					
						
							|  |  |  | 		return nil, ErrBodyTooLarge | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Seperate the body implementers | 
					
						
							|  |  |  | 	rbody := (io.Reader)(rsp.Body) | 
					
						
							|  |  |  | 	cbody := (io.Closer)(rsp.Body) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var limit int64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if limit = rsp.ContentLength; limit < 0 { | 
					
						
							|  |  |  | 		// If unknown, use max as reader limit | 
					
						
							|  |  |  | 		limit = c.bmax | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Don't trust them, limit body reads | 
					
						
							|  |  |  | 	rbody = io.LimitReader(rbody, limit) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Wrap body with limit | 
					
						
							|  |  |  | 	rsp.Body = &struct { | 
					
						
							|  |  |  | 		io.Reader | 
					
						
							|  |  |  | 		io.Closer | 
					
						
							|  |  |  | 	}{rbody, cbody} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return rsp, nil | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-11-08 09:35:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | // wait acquires the 'wait' queue for the given host string, or allocates new. | 
					
						
							|  |  |  | func (c *Client) wait(host string) chan struct{} { | 
					
						
							|  |  |  | 	// Look for an existing queue | 
					
						
							|  |  |  | 	queue, ok := c.queue.Get(host) | 
					
						
							|  |  |  | 	if ok { | 
					
						
							|  |  |  | 		return queue | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Allocate a new host queue (or return a sneaky existing one). | 
					
						
							|  |  |  | 	queue, _ = c.queue.GetOrInsert(host, make(chan struct{}, c.cmax)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return queue | 
					
						
							|  |  |  | } |