[feature] Parse content warning as HTML, serialize via API to plaintext

This commit is contained in:
tobi 2025-03-06 15:36:58 +01:00
commit 22ce924129
47 changed files with 947 additions and 553 deletions

View file

@ -163,27 +163,21 @@ var regular *bluemonday.Policy = func() *bluemonday.Policy {
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
// removeHTML strictly removes *all* recognized
// HTML elements from the given string.
func removeHTML(in string) string {
return strict.Sanitize(in)
}
// SanitizeToHTML sanitizes only risky html elements
// SanitizeHTML sanitizes only risky html elements
// from the given string, allowing safe ones through.
func SanitizeToHTML(in string) string {
func SanitizeHTML(in string) string {
return regular.Sanitize(in)
}
// SanitizeToPlaintext runs text through basic sanitization.
// This removes any html elements that were in the string,
// and returns clean plaintext.
func SanitizeToPlaintext(in string) string {
// RemoveHTML runs text through strict sanitization.
// This removes any html elements that were in the
// string, and returns pruned plaintext.
func RemoveHTML(in string) string {
// Unescape first to catch any tricky critters.
content := html.UnescapeString(in)
// Remove all detected HTML.
content = removeHTML(content)
content = strict.Sanitize(content)
// Unescape again to return plaintext.
content = html.UnescapeString(content)