mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-30 19:52:25 -05:00 
			
		
		
		
	[feature] Beef up our AI opt-outs (#3165)
* [chore] Synchronise our robots.txt with upstream * [feature] Add headers to escape AI crawlers This adds 2 headers that a number of AI crawlers respect to signal that content should not be included in their datasets.
This commit is contained in:
		
					parent
					
						
							
								e5e996b28a
							
						
					
				
			
			
				commit
				
					
						9b50151f17
					
				
			
		
					 2 changed files with 16 additions and 0 deletions
				
			
		|  | @ -44,5 +44,12 @@ func ExtraHeaders() gin.HandlerFunc { | ||||||
| 		// | 		// | ||||||
| 		// See: https://github.com/patcg-individual-drafts/topics | 		// See: https://github.com/patcg-individual-drafts/topics | ||||||
| 		c.Header("Permissions-Policy", "browsing-topics=()") | 		c.Header("Permissions-Policy", "browsing-topics=()") | ||||||
|  | 
 | ||||||
|  | 		// Some AI scrapers respect the following tags to opt-out | ||||||
|  | 		// of their crawling and datasets. | ||||||
|  | 		c.Header("X-Robots-Tag", "noimageai") | ||||||
|  | 		// c.Header calls .Set(), but we want to emit the header | ||||||
|  | 		// twice, not override it. | ||||||
|  | 		c.Writer.Header().Add("X-Robots-Tag", "noai") | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -43,15 +43,24 @@ User-agent: Claude-Web | ||||||
| User-agent: cohere-ai | User-agent: cohere-ai | ||||||
| User-agent: Diffbot | User-agent: Diffbot | ||||||
| User-agent: FacebookBot | User-agent: FacebookBot | ||||||
|  | User-agent: facebookexternalhit | ||||||
| User-agent: FriendlyCrawler | User-agent: FriendlyCrawler | ||||||
| User-agent: Google-Extended | User-agent: Google-Extended | ||||||
| User-agent: GoogleOther | User-agent: GoogleOther | ||||||
|  | User-agent: GoogleOther-Image | ||||||
|  | User-agent: GoogleOther-Video | ||||||
| User-agent: GPTBot | User-agent: GPTBot | ||||||
| User-agent: ImagesiftBot | User-agent: ImagesiftBot | ||||||
| User-agent: img2dataset | User-agent: img2dataset | ||||||
|  | User-agent: Meta-ExternalAgent | ||||||
|  | User-agent: OAI-SearchBot | ||||||
| User-agent: omgili | User-agent: omgili | ||||||
| User-agent: omgilibot | User-agent: omgilibot | ||||||
| User-agent: PerplexityBot | User-agent: PerplexityBot | ||||||
|  | User-agent: PetalBot | ||||||
|  | User-agent: Scrapy | ||||||
|  | User-agent: Timpibot | ||||||
|  | User-agent: VelenPublicWebCrawler | ||||||
| User-agent: YouBot | User-agent: YouBot | ||||||
| Disallow: / | Disallow: / | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue