mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 07:42:26 -05:00 
			
		
		
		
	
		
			
	
	
		
			92 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			92 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|  | // GoToSocial | ||
|  | // Copyright (C) GoToSocial Authors admin@gotosocial.org | ||
|  | // SPDX-License-Identifier: AGPL-3.0-or-later | ||
|  | // | ||
|  | // This program is free software: you can redistribute it and/or modify | ||
|  | // it under the terms of the GNU Affero General Public License as published by | ||
|  | // the Free Software Foundation, either version 3 of the License, or | ||
|  | // (at your option) any later version. | ||
|  | // | ||
|  | // This program is distributed in the hope that it will be useful, | ||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||
|  | // GNU Affero General Public License for more details. | ||
|  | // | ||
|  | // You should have received a copy of the GNU Affero General Public License | ||
|  | // along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||
|  | 
 | ||
|  | package transport | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"context" | ||
|  | 	"net/http" | ||
|  | 	"net/url" | ||
|  | 
 | ||
|  | 	"codeberg.org/gruf/go-bytesize" | ||
|  | 	"codeberg.org/gruf/go-iotools" | ||
|  | 	apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util" | ||
|  | 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | ||
|  | 	"github.com/temoto/robotstxt" | ||
|  | ) | ||
|  | 
 | ||
|  | func (t *transport) DereferenceRobots(ctx context.Context, protocol string, host string) (*robotstxt.RobotsData, error) { | ||
|  | 	robotsIRI := &url.URL{ | ||
|  | 		Scheme: protocol, | ||
|  | 		Host:   host, | ||
|  | 		Path:   "robots.txt", | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Build IRI just once | ||
|  | 	iriStr := robotsIRI.String() | ||
|  | 
 | ||
|  | 	// Prepare new HTTP request to endpoint | ||
|  | 	req, err := http.NewRequestWithContext(ctx, "GET", iriStr, nil) | ||
|  | 	if err != nil { | ||
|  | 		return nil, err | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// We want text/plain utf-8 encoding. | ||
|  | 	// | ||
|  | 	// https://www.rfc-editor.org/rfc/rfc9309.html#name-access-method | ||
|  | 	req.Header.Add("Accept", apiutil.TextPlain) | ||
|  | 	req.Header.Add("Accept-Charset", apiutil.UTF8) | ||
|  | 
 | ||
|  | 	// Perform the HTTP request | ||
|  | 	rsp, err := t.GET(req) | ||
|  | 	if err != nil { | ||
|  | 		return nil, err | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Ensure a non-error status response. | ||
|  | 	if rsp.StatusCode != http.StatusOK { | ||
|  | 		err := gtserror.NewFromResponse(rsp) | ||
|  | 		_ = rsp.Body.Close() // close early. | ||
|  | 		return nil, err | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Ensure that the incoming request content-type is expected. | ||
|  | 	if ct := rsp.Header.Get("Content-Type"); !apiutil.TextPlainContentType(ct) { | ||
|  | 		err := gtserror.Newf("non text/plain response: %s", ct) | ||
|  | 		_ = rsp.Body.Close() // close early. | ||
|  | 		return nil, gtserror.SetMalformed(err) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Limit the robots.txt size to 500KiB | ||
|  | 	// | ||
|  | 	// https://www.rfc-editor.org/rfc/rfc9309.html#name-limits | ||
|  | 	const maxsz = int64(500 * bytesize.KiB) | ||
|  | 
 | ||
|  | 	// Check body claims to be within size limit. | ||
|  | 	if rsp.ContentLength > maxsz { | ||
|  | 		_ = rsp.Body.Close()       // close early. | ||
|  | 		sz := bytesize.Size(maxsz) //nolint:gosec | ||
|  | 		return nil, gtserror.Newf("robots.txt body exceeds max size %s", sz) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Update response body with maximum size. | ||
|  | 	rsp.Body, _, _ = iotools.UpdateReadCloserLimit(rsp.Body, maxsz) | ||
|  | 	defer rsp.Body.Close() | ||
|  | 
 | ||
|  | 	return robotstxt.FromResponse(rsp) | ||
|  | } |