update + test markdown parsing

This commit is contained in:
tsmethurst 2021-08-16 19:05:49 +02:00
commit 04b080303f
13 changed files with 108 additions and 25 deletions

View file

@ -165,7 +165,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() {
err = json.Unmarshal(b, statusReply)
assert.NoError(suite.T(), err)
assert.Equal(suite.T(), "\u003cp\u003e\u003ca href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003etest\u003c/span\u003e\u003c/a\u003e alright, should be able to post \u003ca href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003elinks\u003c/span\u003e\u003c/a\u003e with fragments in them now, let\u0026#39;s see........\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\"\u003edocs.gotosocial.org/en/latest/user_guide/posts/#links\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003egotosocial\u003c/span\u003e\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e(tobi remember to pull the docker image challenge)\u003c/p\u003e", statusReply.Content)
assert.Equal(suite.T(), "<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br><br><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br><br><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br><br>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)
}
func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
@ -198,7 +198,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {
assert.NoError(suite.T(), err)
assert.Equal(suite.T(), "", statusReply.SpoilerText)
assert.Equal(suite.T(), "<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: <br/> here&#39;s an emoji that isn&#39;t in the db: :test_emoji:</p>", statusReply.Content)
assert.Equal(suite.T(), "<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:<br>here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content)
assert.Len(suite.T(), statusReply.Emojis, 1)
mastoEmoji := statusReply.Emojis[0]
@ -314,7 +314,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() {
assert.NoError(suite.T(), err)
assert.Equal(suite.T(), "", statusResponse.SpoilerText)
assert.Equal(suite.T(), "<p>here&#39;s an image attachment</p>", statusResponse.Content)
assert.Equal(suite.T(), "<p>here's an image attachment</p>", statusResponse.Content)
assert.False(suite.T(), statusResponse.Sensitive)
assert.Equal(suite.T(), model.VisibilityPublic, statusResponse.Visibility)

View file

@ -17,8 +17,8 @@ const statusText1 = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
const statusText1ExpectedFull = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
const statusText1ExpectedPartial = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>Text</p>`
const statusText1ExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>"
const statusText1ExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br>#Hashtag<br><br>Text</p>"
const statusText2 = `Another test @foss_satan@fossbros-anonymous.io
@ -26,7 +26,7 @@ const statusText2 = `Another test @foss_satan@fossbros-anonymous.io
#hashTAG`
const status2TextExpectedFull = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>hashTAG</span></a></p>`
const status2TextExpectedFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>"
type UtilTestSuite struct {
StatusStandardTestSuite

View file

@ -20,6 +20,7 @@ package text
import (
"fmt"
"html"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@ -29,21 +30,33 @@ import (
// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text.
func preformat(in string) string {
// do some preformatting of the text
// 1. Trim all the whitespace
s := strings.TrimSpace(in)
// 1. unescape everything that might be html escaped
s := html.UnescapeString(in)
// 2. trim leading or trailing whitespace
s = strings.TrimSpace(s)
return s
}
// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace
func postformat(in string) string {
// do some postformatting of the text
// 1. remove any cheeky newlines
s := strings.ReplaceAll(in, "\n", "")
// 2. remove any whitespace added as a result of the formatting
s = strings.TrimSpace(s)
// 3. sanitize
s = regular.Sanitize(s)
return s
// 1. sanitize html to remove potentially dangerous elements
s := SanitizeHTML(in)
// 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again
s = html.UnescapeString(s)
// 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc
mini, err := minifyHTML(s)
if err != nil {
// if the minify failed, just return what we have
return s
}
// return minified version of the html
return mini
}
func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string {

View file

@ -27,7 +27,7 @@ func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags [
content := preformat(md)
// do the markdown parsing *first*
contentBytes := blackfriday.Run([]byte(md))
contentBytes := blackfriday.Run([]byte(content))
// format tags nicely
content = f.ReplaceTags(string(contentBytes), tags)

View file

@ -19,6 +19,7 @@
package text_test
import (
"fmt"
"testing"
"github.com/stretchr/testify/suite"
@ -36,13 +37,31 @@ Here's a [link](https://example.org).`
simpleMarkdownExpected = "<h1>Title</h1><p>Heres a simple text in markdown.</p><p>Heres a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>"
withCodeBlock = "# Title\n\n``` text\nhere's some code!\n```\n\nthat was some code :)"
withCodeBlockExpected = "<h1>Title</h1><pre><code class=\"language-text\">here&#39;s some code!</code></pre><p>that was some code :)</p>"
withCodeBlockExpected = "<h1>Title</h1><p>Below is some JSON.</p><pre><code class=\"language-json\">{\n \"key\": \"value\",\n \"another_key\": [\n \"value1\",\n \"value2\"\n ]\n}\n</code></pre><p>that was some JSON :)</p>"
withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
withHashtagExpected = "<h1>Title</h1><p>heres a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>"
)
var (
withCodeBlock = `# Title
Below is some JSON.
` + "```" + `json
{
"key": "value",
"another_key": [
"value1",
"value2"
]
}
` + "```" + `
that was some JSON :)
`
)
type MarkdownTestSuite struct {
TextStandardTestSuite
}
@ -78,6 +97,7 @@ func (suite *MarkdownTestSuite) TestParseSimple() {
}
func (suite *MarkdownTestSuite) TestParseWithCodeBlock() {
fmt.Println(withCodeBlock)
s := suite.formatter.FromMarkdown(withCodeBlock, nil, nil)
suite.Equal(withCodeBlockExpected, s)
}

39
internal/text/minify.go Normal file
View file

@ -0,0 +1,39 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
)
var m *minify.M
// minifyHTML runs html through a minifier, reducing it in size.
func minifyHTML(in string) (string, error) {
if m == nil {
m = minify.New()
m.Add("text/html", &html.Minifier{
KeepQuotes: true,
KeepEndTags: true,
KeepDocumentTags: true,
})
}
return m.String("text/html", in)
}

View file

@ -46,7 +46,5 @@ func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags [
// wrap the whole thing in a pee
content = fmt.Sprintf(`<p>%s</p>`, content)
content = SanitizeHTML(content)
return postformat(content)
}

View file

@ -34,14 +34,14 @@ const (
simpleExpected = "<p>this is a plain and simple status</p>"
withTag = "here's a simple status that uses hashtag #welcome!"
withTagExpected = "<p>here&#39;s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
moreComplex = `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
moreComplexFull = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>"
)
type PlainTestSuite struct {

View file

@ -36,7 +36,8 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy().
AddTargetBlankToFullyQualifiedLinks(true).
AllowAttrs("class", "href", "rel").OnElements("a").
AllowAttrs("class").OnElements("span").
AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code").
SkipElementsContent("code", "pre")
// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
// An example usage scenario would be blog post titles where HTML tags are not expected at all