[bugfix] Fix HTML escaping in instance title (#607)

* move caption sanitization -> sanitize.go

* use sanitizeplaintext rather than removehtml

* rename sanitizecaption to sanitizeplaintext

* avoid removing html twice from statuses

* unexport remoteHTML
it's no longer used outside the text package so this
makes it less confusing

* test instance PATCH
This commit is contained in:
tobi 2022-05-26 11:37:13 +02:00 committed by GitHub
commit 5668ce1ec7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 381 additions and 151 deletions

View file

@ -1,29 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization.
// It returns plain text rather than HTML, in contrast to other functions in this package.
func SanitizeCaption(in string) string {
content := preformat(in)
content = RemoveHTML(content)
return postformat(content)
}

View file

@ -1,82 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text_test
import (
"testing"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/text"
)
type CaptionTestSuite struct {
suite.Suite
}
func (suite *CaptionTestSuite) TestSanitizeCaption1() {
dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("this is just a normal caption ;)", sanitized)
}
func (suite *CaptionTestSuite) TestSanitizeCaption2() {
dodgyCaption := "<em>here's a LOUD caption</em>"
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("here's a LOUD caption", sanitized)
}
func (suite *CaptionTestSuite) TestSanitizeCaption3() {
dodgyCaption := ""
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("", sanitized)
}
func (suite *CaptionTestSuite) TestSanitizeCaption4() {
dodgyCaption := `
here is
a multi line
caption
with some newlines
`
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
}
func (suite *CaptionTestSuite) TestSanitizeCaption5() {
// html-escaped: "<script>console.log('aha!')</script> hello world"
dodgyCaption := `&lt;script&gt;console.log(&apos;aha!&apos;)&lt;/script&gt; hello world`
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("hello world", sanitized)
}
func (suite *CaptionTestSuite) TestSanitizeCaption6() {
// html-encoded: "<script>console.log('aha!')</script> hello world"
dodgyCaption := `&lt;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#99;&#111;&#110;&#115;&#111;&#108;&#101;&period;&#108;&#111;&#103;&lpar;&apos;&#97;&#104;&#97;&excl;&apos;&rpar;&lt;&sol;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#32;&#104;&#101;&#108;&#108;&#111;&#32;&#119;&#111;&#114;&#108;&#100;`
sanitized := text.SanitizeCaption(dodgyCaption)
suite.Equal("hello world", sanitized)
}
func TestCaptionTestSuite(t *testing.T) {
suite.Run(t, new(CaptionTestSuite))
}

View file

@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts
content := preformat(plain)
// sanitize any html elements
content = RemoveHTML(content)
content = removeHTML(content)
// format links nicely
content = f.ReplaceLinks(ctx, content)

View file

@ -0,0 +1,57 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"testing"
"github.com/stretchr/testify/suite"
)
const (
test_removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
test_removedHTML = `Another test @foss_satan#HashtagText`
test_withEscapedLiteral = `it\u0026amp;#39;s its it is`
test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
test_withEscaped = "it\u0026amp;#39;s its it is"
test_withEscapedExpected = "it&amp;#39;s its it is"
)
type RemoveHTMLTestSuite struct {
suite.Suite
}
func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() {
s := removeHTML(test_withEscapedLiteral)
suite.Equal(test_withEscapedLiteralExpected, s)
}
func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() {
s := removeHTML(test_withEscaped)
suite.Equal(test_withEscapedExpected, s)
}
func (suite *RemoveHTMLTestSuite) TestRemoveHTML() {
s := removeHTML(test_removeHTML)
suite.Equal(test_removedHTML, s)
}
func TestRemoveHTMLTestSuite(t *testing.T) {
suite.Run(t, &RemoveHTMLTestSuite{})
}

View file

@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy().
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements.
// removeHTML strictly removes *all* recognized HTML elements from the given string.
func removeHTML(in string) string {
return strict.Sanitize(in)
}
// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
func SanitizeHTML(in string) string {
return regular.Sanitize(in)
}
// RemoveHTML removes all HTML from the given string.
func RemoveHTML(in string) string {
return strict.Sanitize(in)
// SanitizePlaintext runs text through basic sanitization. This removes
// any html elements that were in the string, and returns clean plaintext.
func SanitizePlaintext(in string) string {
content := preformat(in)
content = removeHTML(content)
return postformat(content)
}

View file

@ -26,17 +26,8 @@ import (
)
const (
removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
removedHTML = `Another test @foss_satan#HashtagText`
sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!`
sanitizedHTML = `here&#39;s some naughty html: !!!`
withEscapedLiteral = `it\u0026amp;#39;s its it is`
withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
withEscaped = "it\u0026amp;#39;s its it is"
withEscapedExpected = "it&amp;#39;s its it is"
sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!`
sanitizedHTML = `here&#39;s some naughty html: !!!`
sanitizeOutgoing = `<p>gotta test some fucking &#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39; marks</p>`
sanitizedOutgoing = `<p>gotta test some fucking &#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39; marks</p>`
)
@ -45,11 +36,6 @@ type SanitizeTestSuite struct {
suite.Suite
}
func (suite *SanitizeTestSuite) TestRemoveHTML() {
s := text.RemoveHTML(removeHTML)
suite.Equal(removedHTML, s)
}
func (suite *SanitizeTestSuite) TestSanitizeOutgoing() {
s := text.SanitizeHTML(sanitizeOutgoing)
suite.Equal(sanitizedOutgoing, s)
@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() {
suite.Equal(sanitizedHTML, s)
}
func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() {
s := text.RemoveHTML(withEscapedLiteral)
suite.Equal(withEscapedLiteralExpected, s)
func (suite *SanitizeTestSuite) TestSanitizeCaption1() {
dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("this is just a normal caption ;)", sanitized)
}
func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() {
s := text.RemoveHTML(withEscaped)
suite.Equal(withEscapedExpected, s)
func (suite *SanitizeTestSuite) TestSanitizeCaption2() {
dodgyCaption := "<em>here's a LOUD caption</em>"
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("here's a LOUD caption", sanitized)
}
func (suite *SanitizeTestSuite) TestSanitizeCaption3() {
dodgyCaption := ""
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("", sanitized)
}
func (suite *SanitizeTestSuite) TestSanitizeCaption4() {
dodgyCaption := `
here is
a multi line
caption
with some newlines
`
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
}
func (suite *SanitizeTestSuite) TestSanitizeCaption5() {
// html-escaped: "<script>console.log('aha!')</script> hello world"
dodgyCaption := `&lt;script&gt;console.log(&apos;aha!&apos;)&lt;/script&gt; hello world`
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("hello world", sanitized)
}
func (suite *SanitizeTestSuite) TestSanitizeCaption6() {
// html-encoded: "<script>console.log('aha!')</script> hello world"
dodgyCaption := `&lt;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#99;&#111;&#110;&#115;&#111;&#108;&#101;&period;&#108;&#111;&#103;&lpar;&apos;&#97;&#104;&#97;&excl;&apos;&rpar;&lt;&sol;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#32;&#104;&#101;&#108;&#108;&#111;&#32;&#119;&#111;&#114;&#108;&#100;`
sanitized := text.SanitizePlaintext(dodgyCaption)
suite.Equal("hello world", sanitized)
}
func TestSanitizeTestSuite(t *testing.T) {