| 
									
										
										
										
											2023-03-12 16:00:57 +01:00
										 |  |  | // GoToSocial | 
					
						
							|  |  |  | // Copyright (C) GoToSocial Authors admin@gotosocial.org | 
					
						
							|  |  |  | // SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | // GNU Affero General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-26 20:25:54 +02:00
										 |  |  | package text | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2022-07-19 15:21:17 +02:00
										 |  |  | 	"html" | 
					
						
							| 
									
										
										
										
											2021-08-16 19:17:56 +02:00
										 |  |  | 	"regexp" | 
					
						
							| 
									
										
										
										
											2022-07-19 15:21:17 +02:00
										 |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2021-08-16 19:17:56 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | 	"github.com/microcosm-cc/bluemonday" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content. | 
					
						
							|  |  |  | // Note that this policy does not allow iframes, object, embed, styles, script, etc. | 
					
						
							|  |  |  | // An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.' | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // Source: https://github.com/microcosm-cc/bluemonday#usage | 
					
						
							|  |  |  | var regular *bluemonday.Policy = bluemonday.UGCPolicy(). | 
					
						
							|  |  |  | 	RequireNoReferrerOnLinks(true). | 
					
						
							| 
									
										
										
										
											2022-11-07 13:25:36 +00:00
										 |  |  | 	RequireNoFollowOnLinks(false).              // remove the global default which adds rel="nofollow" to all links including local relative | 
					
						
							|  |  |  | 	RequireNoFollowOnFullyQualifiedLinks(true). // add rel="nofollow" on all external links | 
					
						
							| 
									
										
										
										
											2021-07-29 13:18:22 +02:00
										 |  |  | 	RequireCrossOriginAnonymous(true). | 
					
						
							| 
									
										
										
										
											2021-08-16 19:17:56 +02:00
										 |  |  | 	AddTargetBlankToFullyQualifiedLinks(true). | 
					
						
							| 
									
										
										
										
											2021-07-29 13:18:22 +02:00
										 |  |  | 	AllowAttrs("class", "href", "rel").OnElements("a"). | 
					
						
							| 
									
										
										
										
											2021-08-16 19:17:56 +02:00
										 |  |  | 	AllowAttrs("class").OnElements("span"). | 
					
						
							|  |  |  | 	AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code"). | 
					
						
							|  |  |  | 	SkipElementsContent("code", "pre") | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. | 
					
						
							|  |  |  | // An example usage scenario would be blog post titles where HTML tags are not expected at all | 
					
						
							|  |  |  | // and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.' | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // Source: https://github.com/microcosm-cc/bluemonday#usage | 
					
						
							|  |  |  | var strict *bluemonday.Policy = bluemonday.StrictPolicy() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 11:37:13 +02:00
										 |  |  | // removeHTML strictly removes *all* recognized HTML elements from the given string. | 
					
						
							|  |  |  | func removeHTML(in string) string { | 
					
						
							|  |  |  | 	return strict.Sanitize(in) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | func SanitizeHTML(in string) string { | 
					
						
							|  |  |  | 	return regular.Sanitize(in) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 11:37:13 +02:00
										 |  |  | // SanitizePlaintext runs text through basic sanitization. This removes | 
					
						
							|  |  |  | // any html elements that were in the string, and returns clean plaintext. | 
					
						
							|  |  |  | func SanitizePlaintext(in string) string { | 
					
						
							| 
									
										
										
										
											2022-07-19 15:21:17 +02:00
										 |  |  | 	content := html.UnescapeString(in) | 
					
						
							| 
									
										
										
										
											2022-05-26 11:37:13 +02:00
										 |  |  | 	content = removeHTML(content) | 
					
						
							| 
									
										
										
										
											2022-07-19 15:21:17 +02:00
										 |  |  | 	content = html.UnescapeString(content) | 
					
						
							|  |  |  | 	return strings.TrimSpace(content) | 
					
						
							| 
									
										
										
										
											2021-07-13 16:03:51 +02:00
										 |  |  | } |