mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-10 08:17:30 -06:00
rename some of the text functions for clarity
This commit is contained in:
parent
a05d4bac53
commit
c8556cfd97
16 changed files with 145 additions and 124 deletions
|
|
@ -339,7 +339,7 @@ func NormalizeIncomingName(item WithName, rawJSON map[string]interface{}) {
|
|||
//
|
||||
// todo: We probably want to update this to allow
|
||||
// *escaped* HTML markup, but for now just nuke it.
|
||||
name = text.RemoveHTML(name)
|
||||
name = text.StripHTMLFromText(name)
|
||||
|
||||
// Set normalized name property from the raw string; this
|
||||
// will replace any existing name property on the item.
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ func OGBase(instance *apimodel.InstanceV1) *OGMeta {
|
|||
}
|
||||
|
||||
og := &OGMeta{
|
||||
Title: text.RemoveHTML(instance.Title) + " - GoToSocial",
|
||||
Title: text.StripHTMLFromText(instance.Title) + " - GoToSocial",
|
||||
Type: "website",
|
||||
Locale: locale,
|
||||
URL: instance.URI,
|
||||
|
|
@ -161,7 +161,7 @@ func AccountTitle(account *apimodel.WebAccount, accountDomain string) string {
|
|||
// ParseDescription returns a string description which is
|
||||
// safe to use as a template.HTMLAttr inside templates.
|
||||
func ParseDescription(in string) string {
|
||||
i := text.RemoveHTML(in)
|
||||
i := text.StripHTMLFromText(in)
|
||||
i = strings.ReplaceAll(i, "\n", " ")
|
||||
i = strings.Join(strings.Fields(i), " ")
|
||||
i = html.EscapeString(i)
|
||||
|
|
|
|||
|
|
@ -97,8 +97,8 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
|
||||
// Parse new display name (always from plaintext).
|
||||
account.DisplayName = text.RemoveHTML(displayName)
|
||||
// HTML tags not allowed in display name.
|
||||
account.DisplayName = text.StripHTMLFromText(displayName)
|
||||
acctColumns = append(acctColumns, "display_name")
|
||||
}
|
||||
|
||||
|
|
@ -145,7 +145,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
}
|
||||
|
||||
if form.AvatarDescription != nil {
|
||||
desc := text.RemoveHTML(*form.AvatarDescription)
|
||||
desc := text.StripHTMLFromText(*form.AvatarDescription)
|
||||
form.AvatarDescription = &desc
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
}
|
||||
|
||||
if form.HeaderDescription != nil {
|
||||
desc := text.RemoveHTML(*form.HeaderDescription)
|
||||
desc := text.StripHTMLFromText(*form.HeaderDescription)
|
||||
form.HeaderDescription = util.Ptr(desc)
|
||||
}
|
||||
|
||||
|
|
@ -265,7 +265,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
|
||||
account.Settings.CustomCSS = text.RemoveHTML(customCSS)
|
||||
account.Settings.CustomCSS = text.StripHTMLFromText(customCSS)
|
||||
settingsColumns = append(settingsColumns, "custom_css")
|
||||
}
|
||||
|
||||
|
|
@ -356,8 +356,8 @@ func (p *Processor) updateFields(
|
|||
|
||||
// Sanitize raw field values.
|
||||
fieldRaw := >smodel.Field{
|
||||
Name: text.RemoveHTML(name),
|
||||
Value: text.RemoveHTML(value),
|
||||
Name: text.StripHTMLFromText(name),
|
||||
Value: text.StripHTMLFromText(value),
|
||||
}
|
||||
fieldsRaw = append(fieldsRaw, fieldRaw)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ func (p *Processor) createDomainAllow(
|
|||
ID: id.NewULID(),
|
||||
Domain: domain,
|
||||
CreatedByAccountID: adminAcct.ID,
|
||||
PrivateComment: text.RemoveHTML(privateComment),
|
||||
PublicComment: text.RemoveHTML(publicComment),
|
||||
PrivateComment: text.StripHTMLFromText(privateComment),
|
||||
PublicComment: text.StripHTMLFromText(publicComment),
|
||||
Obfuscate: &obfuscate,
|
||||
SubscriptionID: subscriptionID,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ func (p *Processor) createDomainBlock(
|
|||
ID: id.NewULID(),
|
||||
Domain: domain,
|
||||
CreatedByAccountID: adminAcct.ID,
|
||||
PrivateComment: text.RemoveHTML(privateComment),
|
||||
PublicComment: text.RemoveHTML(publicComment),
|
||||
PrivateComment: text.StripHTMLFromText(privateComment),
|
||||
PublicComment: text.StripHTMLFromText(publicComment),
|
||||
Obfuscate: &obfuscate,
|
||||
SubscriptionID: subscriptionID,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
}
|
||||
|
||||
// Don't allow html in site title.
|
||||
instance.Title = text.RemoveHTML(title)
|
||||
instance.Title = text.StripHTMLFromText(title)
|
||||
columns = append(columns, "title")
|
||||
}
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
|
||||
instance.CustomCSS = text.RemoveHTML(customCSS)
|
||||
instance.CustomCSS = text.StripHTMLFromText(customCSS)
|
||||
columns = append(columns, []string{"custom_css"}...)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, media
|
|||
|
||||
// processDescription will sanitize and valid description against server configuration.
|
||||
func processDescription(description string) (string, gtserror.WithCode) {
|
||||
description = text.RemoveHTML(description)
|
||||
description = text.StripHTMLFromText(description)
|
||||
chars := len([]rune(description))
|
||||
|
||||
if min := config.GetMediaDescriptionMinChars(); chars < min {
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ func (p *Processor) processContent(
|
|||
// Strip each poll option and format.
|
||||
//
|
||||
// For polls just use basic formatting.
|
||||
option = text.RemoveHTML(option)
|
||||
option = text.StripHTMLFromText(option)
|
||||
optionRes := formatInput(p.formatter.FromPlainBasic, option)
|
||||
|
||||
// Gather results of the formatted.
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ func (p *Processor) Create(
|
|||
Username: form.Username,
|
||||
Email: form.Email,
|
||||
Password: form.Password,
|
||||
Reason: text.RemoveHTML(reason),
|
||||
Reason: text.StripHTMLFromText(reason),
|
||||
SignUpIP: form.IP,
|
||||
Locale: form.Locale,
|
||||
AppID: app.ID,
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ package text
|
|||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
gohtml "html"
|
||||
"strings"
|
||||
|
||||
"codeberg.org/gruf/go-byteutil"
|
||||
|
|
@ -193,9 +194,22 @@ func (f *Formatter) fromPlain(
|
|||
return result
|
||||
}
|
||||
|
||||
// HTMLToPlain parses the given HTML and then outputs
|
||||
// it to close-as-possible equivalent plaintext.
|
||||
func HTMLToPlain(html string) string {
|
||||
// ParseHTMLToPlain parses the given HTML string, then
|
||||
// outputs it to equivalent plaintext while trying to
|
||||
// keep as much of the smenantic intent of the input
|
||||
// HTML as possible, ie., titles are placed on separate
|
||||
// lines, `<br>`s are converted to newlines, text inside
|
||||
// `<strong>` and `<em>` tags is retained, but without
|
||||
// emphasis, `<a>` links are unnested and the URL they
|
||||
// link to is placed in angle brackets next to them,
|
||||
// lists are replaced with newline-separated indented
|
||||
// items, etc.
|
||||
//
|
||||
// This function is useful when you need to filter on
|
||||
// HTML and want to avoid catching tags in the filter,
|
||||
// or when you want to serve something in a plaintext
|
||||
// format that may contain HTML tags (eg., CWs).
|
||||
func ParseHTMLToPlain(html string) string {
|
||||
plain := html2text.HTML2TextWithOptions(
|
||||
html,
|
||||
html2text.WithLinksInnerText(),
|
||||
|
|
@ -204,3 +218,21 @@ func HTMLToPlain(html string) string {
|
|||
)
|
||||
return strings.TrimSpace(plain)
|
||||
}
|
||||
|
||||
// StripHTMLFromText runs text through strict sanitization
|
||||
// to completely remove any HTML from the input without
|
||||
// trying to preserve the semantic intent of any HTML tags.
|
||||
//
|
||||
// This is useful in cases where the input was not allowed
|
||||
// to contain HTML at all, and the output isn't either.
|
||||
func StripHTMLFromText(text string) string {
|
||||
// Unescape first to catch any tricky critters.
|
||||
content := gohtml.UnescapeString(text)
|
||||
|
||||
// Remove all detected HTML.
|
||||
content = strict.Sanitize(content)
|
||||
|
||||
// Unescape again to return plaintext.
|
||||
content = gohtml.UnescapeString(content)
|
||||
return strings.TrimSpace(content)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ func (suite *PlainTestSuite) TestNumbersAreNotHashtags() {
|
|||
suite.Len(f.Tags, 0)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestHTMLToPlain() {
|
||||
func (suite *PlainTestSuite) TestParseHTMLToPlain() {
|
||||
for _, t := range []struct {
|
||||
html string
|
||||
expectedPlain string
|
||||
|
|
@ -246,11 +246,88 @@ See the domain permission subscription documentation <https://docs.gotosocial.or
|
|||
Thanks for reading! And seriously back up your database.`,
|
||||
},
|
||||
} {
|
||||
plain := text.HTMLToPlain(t.html)
|
||||
plain := text.ParseHTMLToPlain(t.html)
|
||||
suite.Equal(t.expectedPlain, plain)
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption1() {
|
||||
dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("this is just a normal caption ;)", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption2() {
|
||||
dodgyCaption := "<em>here's a LOUD caption</em>"
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("here's a LOUD caption", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption3() {
|
||||
dodgyCaption := ""
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption4() {
|
||||
dodgyCaption := `
|
||||
|
||||
|
||||
here is
|
||||
a multi line
|
||||
caption
|
||||
with some newlines
|
||||
|
||||
|
||||
|
||||
`
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("here is\na multi line\ncaption\nwith some newlines", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption5() {
|
||||
// html-escaped: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("hello world", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCaption6() {
|
||||
// html-encoded: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
stripped := text.StripHTMLFromText(dodgyCaption)
|
||||
suite.Equal("hello world", stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripCustomCSS() {
|
||||
customCSS := `.toot .username {
|
||||
color: var(--link_fg);
|
||||
line-height: 2rem;
|
||||
margin-top: -0.5rem;
|
||||
align-self: start;
|
||||
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}`
|
||||
stripped := text.StripHTMLFromText(customCSS)
|
||||
suite.Equal(customCSS, stripped) // should be the same as it was before
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripNaughtyCustomCSS1() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "</style><title>pee pee poo poo</title><style>"
|
||||
stripped := text.StripHTMLFromText(customCSS)
|
||||
suite.Empty(stripped)
|
||||
}
|
||||
|
||||
func (suite *PlainTestSuite) TestStripNaughtyCustomCSS2() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "pee pee poo poo</style><title></title><style>"
|
||||
stripped := text.StripHTMLFromText(customCSS)
|
||||
suite.Equal("pee pee poo poo", stripped)
|
||||
}
|
||||
|
||||
func TestPlainTestSuite(t *testing.T) {
|
||||
suite.Run(t, new(PlainTestSuite))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,9 +18,7 @@
|
|||
package text
|
||||
|
||||
import (
|
||||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
)
|
||||
|
|
@ -165,21 +163,8 @@ var strict *bluemonday.Policy = bluemonday.StrictPolicy()
|
|||
|
||||
// SanitizeHTML sanitizes only risky html elements
|
||||
// from the given string, allowing safe ones through.
|
||||
func SanitizeHTML(in string) string {
|
||||
return regular.Sanitize(in)
|
||||
}
|
||||
|
||||
// RemoveHTML runs text through strict sanitization.
|
||||
// This removes any html elements that were in the
|
||||
// string, and returns pruned plaintext.
|
||||
func RemoveHTML(in string) string {
|
||||
// Unescape first to catch any tricky critters.
|
||||
content := html.UnescapeString(in)
|
||||
|
||||
// Remove all detected HTML.
|
||||
content = strict.Sanitize(content)
|
||||
|
||||
// Unescape again to return plaintext.
|
||||
content = html.UnescapeString(content)
|
||||
return strings.TrimSpace(content)
|
||||
//
|
||||
// It returns an HTML string.
|
||||
func SanitizeHTML(html string) string {
|
||||
return regular.Sanitize(html)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,83 +45,6 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() {
|
|||
suite.Equal(sanitizedHTML, s)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption1() {
|
||||
dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("this is just a normal caption ;)", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption2() {
|
||||
dodgyCaption := "<em>here's a LOUD caption</em>"
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("here's a LOUD caption", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption3() {
|
||||
dodgyCaption := ""
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption4() {
|
||||
dodgyCaption := `
|
||||
|
||||
|
||||
here is
|
||||
a multi line
|
||||
caption
|
||||
with some newlines
|
||||
|
||||
|
||||
|
||||
`
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption5() {
|
||||
// html-escaped: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("hello world", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption6() {
|
||||
// html-encoded: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
sanitized := text.RemoveHTML(dodgyCaption)
|
||||
suite.Equal("hello world", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCustomCSS() {
|
||||
customCSS := `.toot .username {
|
||||
color: var(--link_fg);
|
||||
line-height: 2rem;
|
||||
margin-top: -0.5rem;
|
||||
align-self: start;
|
||||
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}`
|
||||
sanitized := text.RemoveHTML(customCSS)
|
||||
suite.Equal(customCSS, sanitized) // should be the same as it was before
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeNaughtyCustomCSS1() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "</style><title>pee pee poo poo</title><style>"
|
||||
sanitized := text.RemoveHTML(customCSS)
|
||||
suite.Empty(sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeNaughtyCustomCSS2() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "pee pee poo poo</style><title></title><style>"
|
||||
sanitized := text.RemoveHTML(customCSS)
|
||||
suite.Equal("pee pee poo poo", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeInlineImg() {
|
||||
withInlineImg := "<p>Here's an inline image: <img class=\"fixed-size-img svelte-uci8eb\" aria-hidden=\"false\" alt=\"A black-and-white photo of an Oblique Strategy card. The card reads: 'Define an area as 'safe' and use it as an anchor'.\" title=\"A black-and-white photo of an Oblique Strategy card. The card reads: 'Define an area as 'safe' and use it as an anchor'.\" width=\"0\" height=\"0\" src=\"https://example.org/fileserver/01H7J83147QMCE17C0RS9P10Y9/attachment/small/01H7J8365XXRTCP6CAMGEM49ZE.jpg\" style=\"object-position: 50% 50%;\"></p>"
|
||||
sanitized := text.SanitizeHTML(withInlineImg)
|
||||
|
|
|
|||
|
|
@ -1376,7 +1376,6 @@ func (c *Converter) baseStatusToFrontend(
|
|||
InReplyToID: nil, // Set below.
|
||||
InReplyToAccountID: nil, // Set below.
|
||||
Sensitive: *s.Sensitive,
|
||||
SpoilerText: text.HTMLToPlain(s.ContentWarning),
|
||||
Visibility: c.VisToAPIVis(ctx, s.Visibility),
|
||||
LocalOnly: s.IsLocalOnly(),
|
||||
Language: nil, // Set below.
|
||||
|
|
@ -1397,6 +1396,11 @@ func (c *Converter) baseStatusToFrontend(
|
|||
Text: s.Text,
|
||||
ContentType: ContentTypeToAPIContentType(s.ContentType),
|
||||
InteractionPolicy: *apiInteractionPolicy,
|
||||
|
||||
// Mastodon API says spoiler_text should be *text*, not HTML, so
|
||||
// parse any HTML back to plaintext when serializing via the API,
|
||||
// attempting to preserve semantic intent to keep it readable.
|
||||
SpoilerText: text.ParseHTMLToPlain(s.ContentWarning),
|
||||
}
|
||||
|
||||
if at := s.EditedAt; !at.IsZero() {
|
||||
|
|
|
|||
|
|
@ -383,7 +383,7 @@ func filterableFields(s *gtsmodel.Status) []string {
|
|||
// remove markdown-formatting characters
|
||||
// and ensure more consistent filtering.
|
||||
if s.Content != "" {
|
||||
text := text.HTMLToPlain(s.Content)
|
||||
text := text.ParseHTMLToPlain(s.Content)
|
||||
if text != "" {
|
||||
fields = append(fields, text)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -361,10 +361,10 @@ func formatNotificationBody(apiNotification *apimodel.Notification) string {
|
|||
if apiNotification.Status.SpoilerText != "" {
|
||||
body = apiNotification.Status.SpoilerText
|
||||
} else {
|
||||
body = text.RemoveHTML(apiNotification.Status.Content)
|
||||
body = text.StripHTMLFromText(apiNotification.Status.Content)
|
||||
}
|
||||
} else {
|
||||
body = text.RemoveHTML(apiNotification.Account.Note)
|
||||
body = text.StripHTMLFromText(apiNotification.Account.Note)
|
||||
}
|
||||
return firstNBytesTrimSpace(body, bodyMaxLen)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue