mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-29 19:52:24 -05:00 
			
		
		
		
	Updates markdown parser + sanitizer to allow footnote anchors to work properly, with appropriate roles. Footnote anchor IDs and backrefs use the status ID as a prefix to avoid clashes, so that footnotes don't break when multiple footnoted statuses are rendered on the same page (eg., in a thread or on the account's home page). closes https://codeberg.org/superseriousbusiness/gotosocial/issues/4296 Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4298 Co-authored-by: tobi <tobi.smethurst@protonmail.com> Co-committed-by: tobi <tobi.smethurst@protonmail.com>
		
			
				
	
	
		
			211 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			211 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // GoToSocial
 | |
| // Copyright (C) GoToSocial Authors admin@gotosocial.org
 | |
| // SPDX-License-Identifier: AGPL-3.0-or-later
 | |
| //
 | |
| // This program is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Affero General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // This program is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| // GNU Affero General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Affero General Public License
 | |
| // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| package text
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"context"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 
 | |
| 	"code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
 | |
| 	"code.superseriousbusiness.org/gotosocial/internal/id"
 | |
| 	"code.superseriousbusiness.org/gotosocial/internal/log"
 | |
| 	"code.superseriousbusiness.org/gotosocial/internal/regexes"
 | |
| 	"codeberg.org/gruf/go-byteutil"
 | |
| 	"github.com/yuin/goldmark"
 | |
| 	"github.com/yuin/goldmark/extension"
 | |
| 	"github.com/yuin/goldmark/renderer"
 | |
| 	"github.com/yuin/goldmark/renderer/html"
 | |
| )
 | |
| 
 | |
| // FromMarkdown fulfils FormatFunc by parsing
 | |
| // the given markdown input into a FormatResult.
 | |
| //
 | |
| // Inline (aka unsafe) HTML elements are allowed,
 | |
| // as they should be sanitized afterwards anyway.
 | |
| func (f *Formatter) FromMarkdown(
 | |
| 	ctx context.Context,
 | |
| 	parseMention gtsmodel.ParseMentionFunc,
 | |
| 	authorID string,
 | |
| 	statusID string,
 | |
| 	input string,
 | |
| ) *FormatResult {
 | |
| 	return f.fromMarkdown(
 | |
| 		ctx,
 | |
| 		false, // basic = false
 | |
| 		parseMention,
 | |
| 		authorID,
 | |
| 		statusID,
 | |
| 		input,
 | |
| 	)
 | |
| }
 | |
| 
 | |
| // FromMarkdownBasic fulfils FormatFunc by parsing
 | |
| // the given markdown input into a FormatResult.
 | |
| //
 | |
| // Unlike FromMarkdown, it will only parse emojis with
 | |
| // the custom renderer, leaving aside mentions and tags.
 | |
| //
 | |
| // Inline (aka unsafe) HTML elements are not allowed.
 | |
| //
 | |
| // If the result is a single paragraph,
 | |
| // it will not be wrapped in <p> tags.
 | |
| func (f *Formatter) FromMarkdownBasic(
 | |
| 	ctx context.Context,
 | |
| 	parseMention gtsmodel.ParseMentionFunc,
 | |
| 	authorID string,
 | |
| 	statusID string,
 | |
| 	input string,
 | |
| ) *FormatResult {
 | |
| 	res := f.fromMarkdown(
 | |
| 		ctx,
 | |
| 		true, // basic = true
 | |
| 		parseMention,
 | |
| 		authorID,
 | |
| 		statusID,
 | |
| 		input,
 | |
| 	)
 | |
| 
 | |
| 	res.HTML = unwrapParagraph(res.HTML)
 | |
| 	return res
 | |
| }
 | |
| 
 | |
| // fromMarkdown parses the given input text either
 | |
| // with or without emojis, and returns the result.
 | |
| func (f *Formatter) fromMarkdown(
 | |
| 	ctx context.Context,
 | |
| 	basic bool,
 | |
| 	parseMention gtsmodel.ParseMentionFunc,
 | |
| 	authorID string,
 | |
| 	statusID string,
 | |
| 	input string,
 | |
| ) *FormatResult {
 | |
| 	var (
 | |
| 		result = new(FormatResult)
 | |
| 		opts   []renderer.Option
 | |
| 	)
 | |
| 
 | |
| 	if basic {
 | |
| 		// Don't allow raw HTML tags,
 | |
| 		// markdown syntax only.
 | |
| 		opts = []renderer.Option{
 | |
| 			html.WithXHTML(),
 | |
| 			html.WithHardWraps(),
 | |
| 		}
 | |
| 	} else {
 | |
| 		opts = []renderer.Option{
 | |
| 			html.WithXHTML(),
 | |
| 			html.WithHardWraps(),
 | |
| 
 | |
| 			// Allow raw HTML tags, we
 | |
| 			// sanitize at the end anyway.
 | |
| 			html.WithUnsafe(),
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Inject a footnote ID prefix to avoid
 | |
| 	// footnote ID clashes. StatusID isn't
 | |
| 	// always set (eg., when parsing instance
 | |
| 	// description markdown), so take a random
 | |
| 	// ULID if it's not.
 | |
| 	var footnoteIDPrefix string
 | |
| 	if statusID != "" {
 | |
| 		footnoteIDPrefix = statusID + "-"
 | |
| 	} else {
 | |
| 		footnoteIDPrefix = id.NewULID() + "-"
 | |
| 	}
 | |
| 
 | |
| 	// Instantiate goldmark parser for
 | |
| 	// markdown, using custom renderer
 | |
| 	// to add hashtag/mention links.
 | |
| 	md := goldmark.New(
 | |
| 		goldmark.WithRendererOptions(
 | |
| 			opts...,
 | |
| 		),
 | |
| 		goldmark.WithExtensions(
 | |
| 			&customRenderer{
 | |
| 				ctx,
 | |
| 				f.db,
 | |
| 				parseMention,
 | |
| 				authorID,
 | |
| 				statusID,
 | |
| 				// If basic, pass
 | |
| 				// emojiOnly = true.
 | |
| 				basic,
 | |
| 				result,
 | |
| 			},
 | |
| 			// Turns URLs into links.
 | |
| 			extension.NewLinkify(
 | |
| 				extension.WithLinkifyURLRegexp(regexes.URLLike),
 | |
| 			),
 | |
| 			extension.NewFootnote(
 | |
| 				extension.WithFootnoteIDPrefix(footnoteIDPrefix),
 | |
| 			),
 | |
| 			extension.Strikethrough,
 | |
| 		),
 | |
| 	)
 | |
| 
 | |
| 	// Convert input string to bytes
 | |
| 	// without performing any allocs.
 | |
| 	bInput := byteutil.S2B(input)
 | |
| 
 | |
| 	// Parse input into HTML.
 | |
| 	var htmlBytes bytes.Buffer
 | |
| 	if err := md.Convert(
 | |
| 		bInput,
 | |
| 		&htmlBytes,
 | |
| 	); err != nil {
 | |
| 		log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
 | |
| 	}
 | |
| 
 | |
| 	// Clean and shrink HTML.
 | |
| 	result.HTML = byteutil.B2S(htmlBytes.Bytes())
 | |
| 	result.HTML = SanitizeHTML(result.HTML)
 | |
| 	result.HTML = MinifyHTML(result.HTML)
 | |
| 
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| var parasRegexp = regexp.MustCompile(`</?p>`)
 | |
| 
 | |
| // unwrapParagraph removes opening and closing paragraph tags
 | |
| // of input HTML, if input html is a single paragraph only.
 | |
| func unwrapParagraph(html string) string {
 | |
| 	if !strings.HasPrefix(html, "<p>") {
 | |
| 		return html
 | |
| 	}
 | |
| 
 | |
| 	if !strings.HasSuffix(html, "</p>") {
 | |
| 		return html
 | |
| 	}
 | |
| 
 | |
| 	// Make a substring excluding the
 | |
| 	// opening and closing paragraph tags.
 | |
| 	sub := html[3 : len(html)-4]
 | |
| 
 | |
| 	// If there are still other paragraph tags left
 | |
| 	// inside the substring, return html unchanged.
 | |
| 	containsOtherParas := parasRegexp.MatchString(sub)
 | |
| 	if containsOtherParas {
 | |
| 		return html
 | |
| 	}
 | |
| 
 | |
| 	// Return the substring.
 | |
| 	return sub
 | |
| }
 |