[chore] Text formatting overhaul (#1406)

* Implement goldmark debug print for hashtags and mentions

* Minify HTML in FromPlain

* Convert plaintext status parser to goldmark

* Move mention/tag/emoji finding logic into formatter

* Combine mention and hashtag boundary characters

* Normalize unicode when rendering hashtags
This commit is contained in:
Autumn! 2023-02-03 10:58:58 +00:00 committed by GitHub
commit 49beb17a8f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 826 additions and 1314 deletions

View file

@ -1,112 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"bytes"
"context"
"strings"
"unicode"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string {
spans := util.FindHashtagSpansInText(in)
if len(spans) == 0 {
return in
}
var b strings.Builder
i := 0
spans:
for _, t := range spans {
b.WriteString(in[i:t.First])
i = t.Second
tagAsEntered := in[t.First+1 : t.Second]
for _, tag := range tags {
if strings.EqualFold(tagAsEntered, tag.Name) {
// replace the #tag with the formatted tag content
// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a>
b.WriteString(`<a href="`)
b.WriteString(tag.URL)
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
b.WriteString(tagAsEntered)
b.WriteString(`</span></a>`)
continue spans
}
}
b.WriteString(in[t.First:t.Second])
}
// Get the last bits.
i = spans[len(spans)-1].Second
b.WriteString(in[i:])
return b.String()
}
func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string {
return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string {
// we have a match, trim any spaces
matchTrimmed := strings.TrimSpace(match)
// check through mentions to find what we're matching
for _, menchie := range mentions {
if strings.EqualFold(matchTrimmed, menchie.NameString) {
// make sure we have an account attached to this mention
if menchie.TargetAccount == nil {
a, err := f.db.GetAccountByID(ctx, menchie.TargetAccountID)
if err != nil {
log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err)
return match
}
menchie.TargetAccount = a
}
// The mention's target is our target
targetAccount := menchie.TargetAccount
// Add any dropped space from match
if unicode.IsSpace(rune(match[0])) {
buf.WriteByte(match[0])
}
// replace the mention with the formatted mention content
// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span>
buf.WriteString(`<span class="h-card"><a href="`)
buf.WriteString(targetAccount.URL)
buf.WriteString(`" class="u-url mention">@<span>`)
buf.WriteString(targetAccount.Username)
buf.WriteString(`</span></a></span>`)
return buf.String()
}
}
// the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes
return match
})
}

View file

@ -1,106 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text_test
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const (
replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
replaceMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText"
replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText"
replaceHashtagsAfterMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText"
replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060"
replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
replaceMemtionsWithLinkSelfExpected = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR"
)
type CommonTestSuite struct {
TextStandardTestSuite
}
func (suite *CommonTestSuite) TestReplaceMentions() {
foundMentions := []*gtsmodel.Mention{
suite.testMentions["zork_mention_foss_satan"],
}
f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsString, foundMentions)
suite.Equal(replaceMentionsExpected, f)
}
func (suite *CommonTestSuite) TestReplaceHashtags() {
foundTags := []*gtsmodel.Tag{
suite.testTags["Hashtag"],
}
f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsString, foundTags)
suite.Equal(replaceHashtagsExpected, f)
}
func (suite *CommonTestSuite) TestReplaceHashtagsAfterReplaceMentions() {
foundTags := []*gtsmodel.Tag{
suite.testTags["Hashtag"],
}
f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsExpected, foundTags)
suite.Equal(replaceHashtagsAfterMentionsExpected, f)
}
func (suite *CommonTestSuite) TestReplaceMentionsWithLink() {
foundMentions := []*gtsmodel.Mention{
suite.testMentions["zork_mention_foss_satan"],
}
f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkString, foundMentions)
suite.Equal(replaceMentionsWithLinkStringExpected, f)
}
func (suite *CommonTestSuite) TestReplaceMentionsWithLinkSelf() {
mentioningAccount := suite.testAccounts["local_account_1"]
foundMentions := []*gtsmodel.Mention{
{
ID: "01FGXKN5F815DVFVD53PN9NYM6",
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
StatusID: "01FGXKP0S5THQXFC1D9R141DDR",
OriginAccountID: mentioningAccount.ID,
TargetAccountID: mentioningAccount.ID,
NameString: "@the_mighty_zork",
TargetAccountURI: mentioningAccount.URI,
TargetAccountURL: mentioningAccount.URL,
},
}
f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkSelfString, foundMentions)
suite.Equal(replaceMemtionsWithLinkSelfExpected, f)
}
func TestCommonTestSuite(t *testing.T) {
suite.Run(t, new(CommonTestSuite))
}

View file

@ -0,0 +1,71 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"bytes"
"context"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
result := &FormatResult{
Mentions: []*gtsmodel.Mention{},
Tags: []*gtsmodel.Tag{},
Emojis: []*gtsmodel.Emoji{},
}
// parse markdown text into html, using custom renderer to add hashtag/mention links
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
),
goldmark.WithParser(
parser.NewParser(
parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500),
),
),
),
goldmark.WithExtensions(
&customRenderer{f, ctx, pmf, authorID, statusID, true, result},
),
)
var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(plain), &htmlContentBytes)
if err != nil {
log.Errorf("error formatting plaintext to HTML: %s", err)
}
result.HTML = htmlContentBytes.String()
// clean anything dangerous out of the HTML
result.HTML = SanitizeHTML(result.HTML)
// shrink ray
result.HTML = minifyHTML(result.HTML)
return result
}

View file

@ -26,20 +26,19 @@ import (
)
// Formatter wraps some logic and functions for parsing statuses and other text input into nice html.
// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and
// emoji that were found in the text.
type Formatter interface {
// FromPlain parses an HTML text from a plaintext.
FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
// FromMarkdown parses an HTML text from a markdown-formatted text.
FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string
// ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs.
ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string
// ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs.
ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string
// ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs.
ReplaceLinks(ctx context.Context, in string) string
FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult
// FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc.
FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult
}
type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult
type formatter struct {
db db.DB
}
@ -50,3 +49,10 @@ func NewFormatter(db db.DB) Formatter {
db: db,
}
}
type FormatResult struct {
HTML string
Mentions []*gtsmodel.Mention
Tags []*gtsmodel.Tag
Emojis []*gtsmodel.Emoji
}

View file

@ -19,9 +19,13 @@
package text_test
import (
"context"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/processing"
"github.com/superseriousbusiness/gotosocial/internal/text"
"github.com/superseriousbusiness/gotosocial/testrig"
)
@ -29,7 +33,8 @@ import (
type TextStandardTestSuite struct {
// standard suite interfaces
suite.Suite
db db.DB
db db.DB
parseMention gtsmodel.ParseMentionFunc
// standard suite models
testTokens map[string]*gtsmodel.Token
@ -41,6 +46,7 @@ type TextStandardTestSuite struct {
testStatuses map[string]*gtsmodel.Status
testTags map[string]*gtsmodel.Tag
testMentions map[string]*gtsmodel.Mention
testEmojis map[string]*gtsmodel.Emoji
// module being tested
formatter text.Formatter
@ -56,6 +62,7 @@ func (suite *TextStandardTestSuite) SetupSuite() {
suite.testStatuses = testrig.NewTestStatuses()
suite.testTags = testrig.NewTestTags()
suite.testMentions = testrig.NewTestMentions()
suite.testEmojis = testrig.NewTestEmojis()
}
func (suite *TextStandardTestSuite) SetupTest() {
@ -63,6 +70,11 @@ func (suite *TextStandardTestSuite) SetupTest() {
testrig.InitTestConfig()
suite.db = testrig.NewTestDB()
fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1)
federator := testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, fedWorker), nil, nil, fedWorker)
suite.parseMention = processing.GetParseMentionFunc(suite.db, federator)
suite.formatter = text.NewFormatter(suite.db)
testrig.StandardDBSetup(suite.db, nil)
@ -71,3 +83,11 @@ func (suite *TextStandardTestSuite) SetupTest() {
func (suite *TextStandardTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
}
func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult {
return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
}
func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult {
return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text)
}

View file

@ -17,8 +17,10 @@ package text
import (
"context"
"unicode"
"fmt"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
@ -46,8 +48,14 @@ type hashtag struct {
Segment text.Segment
}
type emoji struct {
ast.BaseInline
Segment text.Segment
}
var kindMention = ast.NewNodeKind("Mention")
var kindHashtag = ast.NewNodeKind("Hashtag")
var kindEmoji = ast.NewNodeKind("Emoji")
func (n *mention) Kind() ast.NodeKind {
return kindMention
@ -57,14 +65,21 @@ func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}
// Dump is used by goldmark for debugging. It is implemented only minimally because
// it is not used in our code.
func (n *emoji) Kind() ast.NodeKind {
return kindEmoji
}
// Dump can be used for debugging.
func (n *mention) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *hashtag) Dump(source []byte, level int) {
ast.DumpHelper(n, source, level, nil, nil)
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *emoji) Dump(source []byte, level int) {
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
@ -83,6 +98,13 @@ func newHashtag(s text.Segment) *hashtag {
}
}
func newEmoji(s text.Segment) *emoji {
return &emoji{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct {
}
@ -90,6 +112,9 @@ type mentionParser struct {
type hashtagParser struct {
}
type emojiParser struct {
}
func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}
@ -98,11 +123,15 @@ func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
func (p *emojiParser) Trigger() []byte {
return []byte{':'}
}
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
if !unicode.IsSpace(before) {
if !util.IsMentionOrHashtagBoundary(before) {
return nil
}
@ -124,59 +153,88 @@ func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
line, segment := block.PeekLine()
s := string(line)
if !util.IsHashtagBoundary(before) {
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
return nil
}
for i, r := range s {
switch {
case r == '#' && i == 0:
// ignore initial #
continue
case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r):
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
case util.IsHashtagBoundary(r):
case util.IsMentionOrHashtagBoundary(r):
if i <= 1 {
// empty
return nil
}
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
// If we don't find invalid characters before the end of the line then it's good
block.Advance(len(s))
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
block.Advance(segment.Len())
return newHashtag(segment)
}
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, segment := block.PeekLine()
// unideal for performance but makes use of existing regex
loc := regexes.EmojiFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newEmoji(segment.WithStop(segment.Start + loc[1]))
}
}
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
// It is created in FromMarkdown to be used a goldmark extension, and the fields are used
// when rendering mentions and tags.
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
// fields are used to report tags and mentions to the caller for use as metadata.
type customRenderer struct {
f *formatter
ctx context.Context
mentions []*gtsmodel.Mention
tags []*gtsmodel.Tag
f *formatter
ctx context.Context
parseMention gtsmodel.ParseMentionFunc
accountID string
statusID string
emojiOnly bool
result *FormatResult
}
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
reg.Register(kindEmoji, r.renderEmoji)
}
func (r *customRenderer) Extend(m goldmark.Markdown) {
// 1000 is set as the lowest priority, but it's arbitrary
m.Parser().AddOptions(parser.WithInlineParsers(
// 500 is pretty arbitrary here, it was copied from example goldmark extension code.
// https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111
mdutil.Prioritized(&mentionParser{}, 500),
mdutil.Prioritized(&hashtagParser{}, 500),
mdutil.Prioritized(&emojiParser{}, 1000),
))
if !r.emojiOnly {
m.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(&mentionParser{}, 1000),
mdutil.Prioritized(&hashtagParser{}, 1000),
))
}
m.Renderer().AddOptions(renderer.WithNodeRenderers(
mdutil.Prioritized(r, 500),
mdutil.Prioritized(r, 1000),
))
}
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
return ast.WalkSkipChildren, nil
}
n, ok := node.(*mention) // this function is only registered for kindMention
@ -185,18 +243,18 @@ func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node a
}
text := string(n.Segment.Value(source))
html := r.f.ReplaceMentions(r.ctx, text, r.mentions)
html := r.replaceMention(text)
// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
log.Errorf("error writing HTML: %s", err)
}
return ast.WalkContinue, nil
return ast.WalkSkipChildren, nil
}
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
return ast.WalkSkipChildren, nil
}
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
@ -205,11 +263,50 @@ func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node a
}
text := string(n.Segment.Value(source))
html := r.f.ReplaceTags(r.ctx, text, r.tags)
html := r.replaceHashtag(text)
_, err := w.WriteString(html)
// we don't have much recourse if this fails
if err != nil {
log.Errorf("error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*emoji) // this function is only registered for kindEmoji
if !ok {
log.Errorf("type assertion failed")
}
text := string(n.Segment.Value(source))
shortcode := text[1 : len(text)-1]
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
if err != nil {
if err != db.ErrNoEntries {
log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err)
}
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
listed := false
for _, e := range r.result.Emojis {
if e.Shortcode == emoji.Shortcode {
listed = true
break
}
}
if !listed {
r.result.Emojis = append(r.result.Emojis, emoji)
}
}
// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
log.Errorf("error outputting markdown text: %s", err)
if _, err := w.WriteString(text); err != nil {
log.Errorf("error writing HTML: %s", err)
}
return ast.WalkContinue, nil
return ast.WalkSkipChildren, nil
}

View file

@ -0,0 +1,64 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
)
// plaintextParser implements goldmark.parser.BlockParser
type plaintextParser struct {
}
var defaultPlaintextParser = &plaintextParser{}
func newPlaintextParser() parser.BlockParser {
return defaultPlaintextParser
}
func (b *plaintextParser) Trigger() []byte {
return nil
}
func (b *plaintextParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) {
_, segment := reader.PeekLine()
node := ast.NewParagraph()
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return node, parser.NoChildren
}
func (b *plaintextParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State {
_, segment := reader.PeekLine()
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return parser.Continue | parser.NoChildren
}
func (b *plaintextParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {}
func (b *plaintextParser) CanInterruptParagraph() bool {
return false
}
func (b *plaintextParser) CanAcceptIndentedLine() bool {
return true
}

View file

@ -1,86 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"bytes"
"context"
"net/url"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
)
// FindLinks parses the given string looking for recognizable URLs (including scheme).
// It returns a list of those URLs, without changing the string, or an error if something goes wrong.
// If no URLs are found within the given string, an empty slice and nil will be returned.
func FindLinks(in string) []*url.URL {
var urls []*url.URL
// bail already if we don't find anything
found := regexes.LinkScheme.FindAllString(in, -1)
if len(found) == 0 {
return nil
}
urlmap := map[string]struct{}{}
// for each string we find, we want to parse it into a URL if we can
// if we fail to parse it, just ignore this match and continue
for _, f := range found {
u, err := url.Parse(f)
if err != nil {
continue
}
// Calculate string
ustr := u.String()
if _, ok := urlmap[ustr]; !ok {
// Has not been encountered yet
urls = append(urls, u)
urlmap[ustr] = struct{}{}
}
}
return urls
}
// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents.
// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted
// href will end up double-formatted, if the text you pass here contains one or more hrefs already.
// To avoid this, you should sanitize any HTML out of text before you pass it into this function.
func (f *formatter) ReplaceLinks(ctx context.Context, in string) string {
return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string {
thisURL, err := url.Parse(urlString)
if err != nil {
return urlString // we can't parse it as a URL so don't replace it
}
// <a href="thisURL.String()" rel="noopener">urlString</a>
urlString = thisURL.String()
buf.WriteString(`<a href="`)
buf.WriteString(thisURL.String())
buf.WriteString(`" rel="noopener">`)
urlString = strings.TrimPrefix(urlString, thisURL.Scheme)
urlString = strings.TrimPrefix(urlString, "://")
buf.WriteString(urlString)
buf.WriteString(`</a>`)
return buf.String()
})
}

View file

@ -1,157 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text_test
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/text"
)
const text1 = `
This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment
Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh
https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it
really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme
https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK
`
const text2 = `
this is one link: https://example.org
this is the same link again: https://example.org
these should be deduplicated
`
const text3 = `
here's a mailto link: mailto:whatever@test.org
`
const text4 = `
two similar links:
https://example.org
https://example.org/test
`
const text5 = `
what happens when we already have a link within an href?
<a href="https://example.org">https://example.org</a>
`
type LinkTestSuite struct {
TextStandardTestSuite
}
func (suite *LinkTestSuite) TestParseSimple() {
f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
suite.Equal(simpleExpected, f)
}
func (suite *LinkTestSuite) TestParseURLsFromText1() {
urls := text.FindLinks(text1)
suite.Equal("https://example.org/link/to/something#fragment", urls[0].String())
suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String())
suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String())
suite.Equal("https://example.orghttps://google.com", urls[3].String())
}
func (suite *LinkTestSuite) TestParseURLsFromText2() {
urls := text.FindLinks(text2)
// assert length 1 because the found links will be deduplicated
assert.Len(suite.T(), urls, 1)
}
func (suite *LinkTestSuite) TestParseURLsFromText3() {
urls := text.FindLinks(text3)
// assert length 0 because `mailto:` isn't accepted
assert.Len(suite.T(), urls, 0)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText1() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text1)
suite.Equal(`
This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a>
Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a>
<a href="https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it" rel="noopener">another.link.example.org/with/a/pretty/long/path/at/the/end/of/it</a>
really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme
<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps://google.com</a> <-- this shouldn't work either, but it does?! OK
`, replaced)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText2() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text2)
suite.Equal(`
this is one link: <a href="https://example.org" rel="noopener">example.org</a>
this is the same link again: <a href="https://example.org" rel="noopener">example.org</a>
these should be deduplicated
`, replaced)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText3() {
// we know mailto links won't be replaced with hrefs -- we only accept https and http
replaced := suite.formatter.ReplaceLinks(context.Background(), text3)
suite.Equal(`
here's a mailto link: mailto:whatever@test.org
`, replaced)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText4() {
replaced := suite.formatter.ReplaceLinks(context.Background(), text4)
suite.Equal(`
two similar links:
<a href="https://example.org" rel="noopener">example.org</a>
<a href="https://example.org/test" rel="noopener">example.org/test</a>
`, replaced)
}
func (suite *LinkTestSuite) TestReplaceLinksFromText5() {
// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function
replaced := suite.formatter.ReplaceLinks(context.Background(), text5)
suite.Equal(`
what happens when we already have a link within an href?
<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a>
`, replaced)
}
func TestLinkTestSuite(t *testing.T) {
suite.Run(t, new(LinkTestSuite))
}

View file

@ -21,32 +21,19 @@ package text
import (
"bytes"
"context"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
minifyHtml "github.com/tdewolff/minify/v2/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
var (
m *minify.M
)
func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string {
// Temporarily replace all found emoji shortcodes in the markdown text with
// their ID so that they're not parsed as anything by the markdown parser -
// this fixes cases where emojis with some underscores in them are parsed as
// words with emphasis, eg `:_some_emoji:` becomes `:<em>some</em>emoji:`
//
// Since the IDs of the emojis are just uppercase letters + numbers they should
// be safe to pass through the markdown parser without unexpected effects.
for _, e := range emojis {
markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":")
func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult {
result := &FormatResult{
Mentions: []*gtsmodel.Mention{},
Tags: []*gtsmodel.Tag{},
Emojis: []*gtsmodel.Emoji{},
}
// parse markdown text into html, using custom renderer to add hashtag/mention links
@ -57,7 +44,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
html.WithUnsafe(), // allows raw HTML
),
goldmark.WithExtensions(
&customRenderer{f, ctx, mentions, tags},
&customRenderer{f, ctx, pmf, authorID, statusID, false, result},
extension.Linkify, // turns URLs into links
extension.Strikethrough,
),
@ -66,30 +53,15 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(markdownText), &htmlContentBytes)
if err != nil {
log.Errorf("error rendering markdown to HTML: %s", err)
log.Errorf("error formatting markdown to HTML: %s", err)
}
htmlContent := htmlContentBytes.String()
result.HTML = htmlContentBytes.String()
// Replace emoji IDs in the parsed html content with their shortcodes again
for _, e := range emojis {
htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":")
}
// clean anything dangerous out of the HTML
result.HTML = SanitizeHTML(result.HTML)
// clean anything dangerous out of the html
htmlContent = SanitizeHTML(htmlContent)
// shrink ray
result.HTML = minifyHTML(result.HTML)
if m == nil {
m = minify.New()
m.Add("text/html", &minifyHtml.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
}
minified, err := m.String("text/html", htmlContent)
if err != nil {
log.Errorf("error minifying markdown text: %s", err)
}
return minified
return result
}

View file

@ -19,11 +19,9 @@
package text_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
var withCodeBlock = `# Title
@ -77,6 +75,16 @@ const (
mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>"
mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"
mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you&#39;ve been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>"
mdItalicHashtag = "_#hashtag_"
mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
mdItalicHashtags = "_#hashtag #hashtag #hashtag_"
mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>"
// BEWARE: sneaky unicode business going on.
// the first ö is one rune, the second ö is an o with a combining diacritic.
mdUnnormalizedHashtag = "#hellöthere #hellöthere"
mdUnnormalizedHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a> <a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a></p>"
)
type MarkdownTestSuite struct {
@ -84,101 +92,112 @@ type MarkdownTestSuite struct {
}
func (suite *MarkdownTestSuite) TestParseSimple() {
s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil)
suite.Equal(simpleMarkdownExpected, s)
formatted := suite.FromMarkdown(simpleMarkdown)
suite.Equal(simpleMarkdownExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithCodeBlock() {
s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil)
suite.Equal(withCodeBlockExpected, s)
formatted := suite.FromMarkdown(withCodeBlock)
suite.Equal(withCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithInlineCode() {
s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil)
suite.Equal(withInlineCodeExpected, s)
formatted := suite.FromMarkdown(withInlineCode)
suite.Equal(withInlineCodeExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithInlineCode2() {
s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil)
suite.Equal(withInlineCode2Expected, s)
formatted := suite.FromMarkdown(withInlineCode2)
suite.Equal(withInlineCode2Expected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHashtag() {
foundTags := []*gtsmodel.Tag{
suite.testTags["Hashtag"],
}
s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil)
suite.Equal(withHashtagExpected, s)
formatted := suite.FromMarkdown(withHashtag)
suite.Equal(withHashtagExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHTML() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil)
suite.Equal(mdWithHTMLExpected, s)
formatted := suite.FromMarkdown(mdWithHTML)
suite.Equal(mdWithHTMLExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil)
suite.Equal(mdWithCheekyHTMLExpected, s)
formatted := suite.FromMarkdown(mdWithCheekyHTML)
suite.Equal(mdWithCheekyHTMLExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{
suite.testTags["Hashtag"],
suite.testTags["welcome"],
}, nil)
suite.Equal(mdWithHashtagInitialExpected, s)
formatted := suite.FromMarkdown(mdWithHashtagInitial)
suite.Equal(mdWithHashtagInitialExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() {
s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil)
suite.Equal(mdCodeBlockWithNewlinesExpected, s)
formatted := suite.FromMarkdown(mdCodeBlockWithNewlines)
suite.Equal(mdCodeBlockWithNewlinesExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithFootnote() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil)
suite.Equal(mdWithFootnoteExpected, s)
formatted := suite.FromMarkdown(mdWithFootnote)
suite.Equal(mdWithFootnoteExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseWithBlockquote() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil)
suite.Equal(mdWithBlockQuoteExpected, s)
formatted := suite.FromMarkdown(mdWithBlockQuote)
suite.Equal(mdWithBlockQuoteExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() {
s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{
suite.testTags["Hashtag"],
}, nil)
suite.Equal(mdHashtagAndCodeBlockExpected, s)
formatted := suite.FromMarkdown(mdHashtagAndCodeBlock)
suite.Equal(mdHashtagAndCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() {
s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{
suite.testMentions["local_user_2_mention_zork"],
}, nil, nil)
suite.Equal(mdMentionAndCodeBlockExpected, s)
formatted := suite.FromMarkdown(mdMentionAndCodeBlock)
suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseSmartypants() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithSmartypants, []*gtsmodel.Mention{
suite.testMentions["local_user_2_mention_zork"],
}, nil, nil)
suite.Equal(mdWithSmartypantsExpected, s)
formatted := suite.FromMarkdown(mdWithSmartypants)
suite.Equal(mdWithSmartypantsExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseAsciiHeart() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil)
suite.Equal(mdWithAsciiHeartExpected, s)
formatted := suite.FromMarkdown(mdWithAsciiHeart)
suite.Equal(mdWithAsciiHeartExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseStrikethrough() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil)
suite.Equal(mdWithStrikethroughExpected, s)
formatted := suite.FromMarkdown(mdWithStrikethrough)
suite.Equal(mdWithStrikethroughExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseLink() {
s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil)
suite.Equal(mdWithLinkExpected, s)
formatted := suite.FromMarkdown(mdWithLink)
suite.Equal(mdWithLinkExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() {
formatted := suite.FromMarkdown(mdObjectInCodeBlock)
suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML)
suite.Len(formatted.Mentions, 1)
suite.Equal("@foss_satan@fossbros-anonymous.io", formatted.Mentions[0].NameString)
suite.Empty(formatted.Tags)
suite.Empty(formatted.Emojis)
}
func (suite *MarkdownTestSuite) TestParseItalicHashtag() {
formatted := suite.FromMarkdown(mdItalicHashtag)
suite.Equal(mdItalicHashtagExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseItalicHashtags() {
formatted := suite.FromMarkdown(mdItalicHashtags)
suite.Equal(mdItalicHashtagsExpected, formatted.HTML)
}
func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() {
formatted := suite.FromMarkdown(mdUnnormalizedHashtag)
suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)
}
func TestMarkdownTestSuite(t *testing.T) {

45
internal/text/minify.go Normal file
View file

@ -0,0 +1,45 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/html"
)
var (
m *minify.M
)
func minifyHTML(content string) string {
if m == nil {
m = minify.New()
m.Add("text/html", &html.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
}
minified, err := m.String("text/html", content)
if err != nil {
log.Errorf("error minifying HTML: %s", err)
}
return minified
}

View file

@ -19,40 +19,56 @@
package text
import (
"bytes"
"context"
"html"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
// breakReplacer replaces new-lines with HTML breaks.
var breakReplacer = strings.NewReplacer(
"\r\n", "<br/>",
"\n", "<br/>",
)
func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult {
result := &FormatResult{
Mentions: []*gtsmodel.Mention{},
Tags: []*gtsmodel.Tag{},
Emojis: []*gtsmodel.Emoji{},
}
func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
// trim any crap
content := strings.TrimSpace(plain)
// parse markdown text into html, using custom renderer to add hashtag/mention links
md := goldmark.New(
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithHardWraps(),
),
goldmark.WithParser(
parser.NewParser(
parser.WithBlockParsers(
util.Prioritized(newPlaintextParser(), 500),
),
),
),
goldmark.WithExtensions(
&customRenderer{f, ctx, pmf, authorID, statusID, false, result},
extension.Linkify, // turns URLs into links
),
)
// clean 'er up
content = html.EscapeString(content)
var htmlContentBytes bytes.Buffer
err := md.Convert([]byte(plain), &htmlContentBytes)
if err != nil {
log.Errorf("error formatting plaintext to HTML: %s", err)
}
result.HTML = htmlContentBytes.String()
// format links nicely
content = f.ReplaceLinks(ctx, content)
// clean anything dangerous out of the HTML
result.HTML = SanitizeHTML(result.HTML)
// format tags nicely
content = f.ReplaceTags(ctx, content, tags)
// shrink ray
result.HTML = minifyHTML(result.HTML)
// format mentions nicely
content = f.ReplaceMentions(ctx, content, mentions)
// replace newlines with breaks
content = breakReplacer.Replace(content)
// wrap the whole thing in a pee
content = `<p>` + content + `</p>`
return SanitizeHTML(content)
return result
}

View file

@ -19,22 +19,21 @@
package text_test
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const (
simple = "this is a plain and simple status"
simpleExpected = "<p>this is a plain and simple status</p>"
withTag = "here's a simple status that uses hashtag #welcome!"
withTagExpected = "<p>here&#39;s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
withHTML = "<div>blah this should just be html escaped blah</div>"
withHTMLExpected = "<p>&lt;div&gt;blah this should just be html escaped blah&lt;/div&gt;</p>"
moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>"
simple = "this is a plain and simple status"
simpleExpected = "<p>this is a plain and simple status</p>"
withTag = "here's a simple status that uses hashtag #welcome!"
withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
withHTML = "<div>blah this should just be html escaped blah</div>"
withHTMLExpected = "<p>&lt;div>blah this should just be html escaped blah&lt;/div></p>"
moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:"
moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"
)
type PlainTestSuite struct {
@ -42,35 +41,105 @@ type PlainTestSuite struct {
}
func (suite *PlainTestSuite) TestParseSimple() {
f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
suite.Equal(simpleExpected, f)
formatted := suite.FromPlain(simple)
suite.Equal(simpleExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseWithTag() {
foundTags := []*gtsmodel.Tag{
suite.testTags["welcome"],
}
f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags)
suite.Equal(withTagExpected, f)
formatted := suite.FromPlain(withTag)
suite.Equal(withTagExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseWithHTML() {
f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil)
suite.Equal(withHTMLExpected, f)
formatted := suite.FromPlain(withHTML)
suite.Equal(withHTMLExpected, formatted.HTML)
}
func (suite *PlainTestSuite) TestParseMoreComplex() {
foundTags := []*gtsmodel.Tag{
suite.testTags["Hashtag"],
}
formatted := suite.FromPlain(moreComplex)
suite.Equal(moreComplexExpected, formatted.HTML)
}
foundMentions := []*gtsmodel.Mention{
suite.testMentions["zork_mention_foss_satan"],
}
func (suite *PlainTestSuite) TestLinkNoMention() {
statusText := `here's a link to a post by zork
f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags)
suite.Equal(moreComplexFull, f)
https://example.com/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1
that link shouldn't come out formatted as a mention!`
menchies := suite.FromPlain(statusText).Mentions
suite.Empty(menchies)
}
func (suite *PlainTestSuite) TestDeriveMentionsEmpty() {
statusText := ``
menchies := suite.FromPlain(statusText).Mentions
assert.Len(suite.T(), menchies, 0)
}
func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
statusText := `weeeeeeee #testing123 #also testing
# testing this one shouldn't work
#thisshouldwork #dupe #dupe!! #dupe
here's a link with a fragment: https://example.org/whatever#ahhh
here's another link with a fragment: https://example.org/whatever/#ahhh
(#ThisShouldAlsoWork) #this_should_be_split
#111111 thisalsoshouldn'twork#### ##
#alimentación, #saúde, #lävistää, #ö, #
#ThisOneIsThirtyOneCharactersLon... ...ng
#ThisOneIsThirteyCharactersLong
`
tags := suite.FromPlain(statusText).Tags
assert.Len(suite.T(), tags, 13)
assert.Equal(suite.T(), "testing123", tags[0].Name)
assert.Equal(suite.T(), "also", tags[1].Name)
assert.Equal(suite.T(), "thisshouldwork", tags[2].Name)
assert.Equal(suite.T(), "dupe", tags[3].Name)
assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name)
assert.Equal(suite.T(), "this", tags[5].Name)
assert.Equal(suite.T(), "111111", tags[6].Name)
assert.Equal(suite.T(), "alimentación", tags[7].Name)
assert.Equal(suite.T(), "saúde", tags[8].Name)
assert.Equal(suite.T(), "lävistää", tags[9].Name)
assert.Equal(suite.T(), "ö", tags[10].Name)
assert.Equal(suite.T(), "네", tags[11].Name)
assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name)
statusText = `#올빼미 hej`
tags = suite.FromPlain(statusText).Tags
assert.Equal(suite.T(), "올빼미", tags[0].Name)
}
func (suite *PlainTestSuite) TestDeriveMultiple() {
statusText := `Another test @foss_satan@fossbros-anonymous.io
#Hashtag
Text`
f := suite.FromPlain(statusText)
assert.Len(suite.T(), f.Mentions, 1)
assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
assert.Len(suite.T(), f.Tags, 1)
assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name)
assert.Len(suite.T(), f.Emojis, 0)
}
func (suite *PlainTestSuite) TestZalgoHashtag() {
statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?`
f := suite.FromPlain(statusText)
assert.Len(suite.T(), f.Tags, 1)
assert.Equal(suite.T(), "praying", f.Tags[0].Name)
}
func TestPlainTestSuite(t *testing.T) {

141
internal/text/replace.go Normal file
View file

@ -0,0 +1,141 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
"errors"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
"golang.org/x/text/unicode/norm"
"strings"
)
const (
maximumHashtagLength = 30
)
// given a mention or a hashtag string, the methods in this file will attempt to parse it,
// add it to the database, and render it as HTML. If any of these steps fails, the method
// will just return the original string and log an error.
// replaceMention takes a string in the form @username@domain.com or @localusername
func (r *customRenderer) replaceMention(text string) string {
menchie, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
if err != nil {
log.Errorf("error parsing mention %s from status: %s", text, err)
return text
}
if r.statusID != "" {
if err := r.f.db.Put(r.ctx, menchie); err != nil {
log.Errorf("error putting mention in db: %s", err)
return text
}
}
// only append if it's not been listed yet
listed := false
for _, m := range r.result.Mentions {
if menchie.ID == m.ID {
listed = true
break
}
}
if !listed {
r.result.Mentions = append(r.result.Mentions, menchie)
}
// make sure we have an account attached to this mention
if menchie.TargetAccount == nil {
a, err := r.f.db.GetAccountByID(r.ctx, menchie.TargetAccountID)
if err != nil {
log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err)
return text
}
menchie.TargetAccount = a
}
// The mention's target is our target
targetAccount := menchie.TargetAccount
var b strings.Builder
// replace the mention with the formatted mention content
// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span>
b.WriteString(`<span class="h-card"><a href="`)
b.WriteString(targetAccount.URL)
b.WriteString(`" class="u-url mention">@<span>`)
b.WriteString(targetAccount.Username)
b.WriteString(`</span></a></span>`)
return b.String()
}
// replaceMention takes a string in the form #HashedTag, and will normalize it before
// adding it to the db and turning it into HTML.
func (r *customRenderer) replaceHashtag(text string) string {
// this normalization is specifically to avoid cases where visually-identical
// hashtags are stored with different unicode representations (e.g. with combining
// diacritics). It allows a tasteful number of combining diacritics to be used,
// as long as they can be combined with parent characters to form regular letter
// symbols.
normalized := norm.NFC.String(text[1:])
for i, r := range normalized {
if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
return text
}
}
tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID)
if err != nil {
log.Errorf("error generating hashtags from status: %s", err)
return text
}
// only append if it's not been listed yet
listed := false
for _, t := range r.result.Tags {
if tag.ID == t.ID {
listed = true
break
}
}
if !listed {
err = r.f.db.Put(r.ctx, tag)
if err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
log.Errorf("error putting tags in db: %s", err)
return text
}
}
r.result.Tags = append(r.result.Tags, tag)
}
var b strings.Builder
// replace the #tag with the formatted tag content
// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a>
b.WriteString(`<a href="`)
b.WriteString(tag.URL)
b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
b.WriteString(normalized)
b.WriteString(`</span></a>`)
return b.String()
}