mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-04 03:52:24 -06:00 
			
		
		
		
	* [feature] Parse content warning as HTML, serialize via API to plaintext * tidy up some cruft * whoops * oops * i'm da joker baybee * clemency muy lorde * rename some of the text functions for clarity * jiggle the opts * fiddle de deee * hopefully the last test fix i ever have to do in my beautiful life
		
			
				
	
	
		
			195 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			195 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// GoToSocial
 | 
						|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
 | 
						|
// SPDX-License-Identifier: AGPL-3.0-or-later
 | 
						|
//
 | 
						|
// This program is free software: you can redistribute it and/or modify
 | 
						|
// it under the terms of the GNU Affero General Public License as published by
 | 
						|
// the Free Software Foundation, either version 3 of the License, or
 | 
						|
// (at your option) any later version.
 | 
						|
//
 | 
						|
// This program is distributed in the hope that it will be useful,
 | 
						|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
// GNU Affero General Public License for more details.
 | 
						|
//
 | 
						|
// You should have received a copy of the GNU Affero General Public License
 | 
						|
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
package text
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"context"
 | 
						|
	"regexp"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"codeberg.org/gruf/go-byteutil"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/log"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/regexes"
 | 
						|
	"github.com/yuin/goldmark"
 | 
						|
	"github.com/yuin/goldmark/extension"
 | 
						|
	"github.com/yuin/goldmark/renderer"
 | 
						|
	"github.com/yuin/goldmark/renderer/html"
 | 
						|
)
 | 
						|
 | 
						|
// FromMarkdown fulfils FormatFunc by parsing
 | 
						|
// the given markdown input into a FormatResult.
 | 
						|
//
 | 
						|
// Inline (aka unsafe) HTML elements are allowed,
 | 
						|
// as they should be sanitized afterwards anyway.
 | 
						|
func (f *Formatter) FromMarkdown(
 | 
						|
	ctx context.Context,
 | 
						|
	parseMention gtsmodel.ParseMentionFunc,
 | 
						|
	authorID string,
 | 
						|
	statusID string,
 | 
						|
	input string,
 | 
						|
) *FormatResult {
 | 
						|
	return f.fromMarkdown(
 | 
						|
		ctx,
 | 
						|
		false, // basic = false
 | 
						|
		parseMention,
 | 
						|
		authorID,
 | 
						|
		statusID,
 | 
						|
		input,
 | 
						|
	)
 | 
						|
}
 | 
						|
 | 
						|
// FromMarkdownBasic fulfils FormatFunc by parsing
 | 
						|
// the given markdown input into a FormatResult.
 | 
						|
//
 | 
						|
// Unlike FromMarkdown, it will only parse emojis with
 | 
						|
// the custom renderer, leaving aside mentions and tags.
 | 
						|
//
 | 
						|
// Inline (aka unsafe) HTML elements are not allowed.
 | 
						|
//
 | 
						|
// If the result is a single paragraph,
 | 
						|
// it will not be wrapped in <p> tags.
 | 
						|
func (f *Formatter) FromMarkdownBasic(
 | 
						|
	ctx context.Context,
 | 
						|
	parseMention gtsmodel.ParseMentionFunc,
 | 
						|
	authorID string,
 | 
						|
	statusID string,
 | 
						|
	input string,
 | 
						|
) *FormatResult {
 | 
						|
	res := f.fromMarkdown(
 | 
						|
		ctx,
 | 
						|
		true, // basic = true
 | 
						|
		parseMention,
 | 
						|
		authorID,
 | 
						|
		statusID,
 | 
						|
		input,
 | 
						|
	)
 | 
						|
 | 
						|
	res.HTML = unwrapParagraph(res.HTML)
 | 
						|
	return res
 | 
						|
}
 | 
						|
 | 
						|
// fromMarkdown parses the given input text either
 | 
						|
// with or without emojis, and returns the result.
 | 
						|
func (f *Formatter) fromMarkdown(
 | 
						|
	ctx context.Context,
 | 
						|
	basic bool,
 | 
						|
	parseMention gtsmodel.ParseMentionFunc,
 | 
						|
	authorID string,
 | 
						|
	statusID string,
 | 
						|
	input string,
 | 
						|
) *FormatResult {
 | 
						|
	var (
 | 
						|
		result = new(FormatResult)
 | 
						|
		opts   []renderer.Option
 | 
						|
	)
 | 
						|
 | 
						|
	if basic {
 | 
						|
		// Don't allow raw HTML tags,
 | 
						|
		// markdown syntax only.
 | 
						|
		opts = []renderer.Option{
 | 
						|
			html.WithXHTML(),
 | 
						|
			html.WithHardWraps(),
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		opts = []renderer.Option{
 | 
						|
			html.WithXHTML(),
 | 
						|
			html.WithHardWraps(),
 | 
						|
 | 
						|
			// Allow raw HTML tags, we
 | 
						|
			// sanitize at the end anyway.
 | 
						|
			html.WithUnsafe(),
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Instantiate goldmark parser for
 | 
						|
	// markdown, using custom renderer
 | 
						|
	// to add hashtag/mention links.
 | 
						|
	md := goldmark.New(
 | 
						|
		goldmark.WithRendererOptions(
 | 
						|
			opts...,
 | 
						|
		),
 | 
						|
		goldmark.WithExtensions(
 | 
						|
			&customRenderer{
 | 
						|
				ctx,
 | 
						|
				f.db,
 | 
						|
				parseMention,
 | 
						|
				authorID,
 | 
						|
				statusID,
 | 
						|
				// If basic, pass
 | 
						|
				// emojiOnly = true.
 | 
						|
				basic,
 | 
						|
				result,
 | 
						|
			},
 | 
						|
			// Turns URLs into links.
 | 
						|
			extension.NewLinkify(
 | 
						|
				extension.WithLinkifyURLRegexp(regexes.LinkScheme),
 | 
						|
			),
 | 
						|
			extension.Strikethrough,
 | 
						|
		),
 | 
						|
	)
 | 
						|
 | 
						|
	// Convert input string to bytes
 | 
						|
	// without performing any allocs.
 | 
						|
	bInput := byteutil.S2B(input)
 | 
						|
 | 
						|
	// Parse input into HTML.
 | 
						|
	var htmlBytes bytes.Buffer
 | 
						|
	if err := md.Convert(
 | 
						|
		bInput,
 | 
						|
		&htmlBytes,
 | 
						|
	); err != nil {
 | 
						|
		log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
 | 
						|
	}
 | 
						|
 | 
						|
	// Clean and shrink HTML.
 | 
						|
	result.HTML = byteutil.B2S(htmlBytes.Bytes())
 | 
						|
	result.HTML = SanitizeHTML(result.HTML)
 | 
						|
	result.HTML = MinifyHTML(result.HTML)
 | 
						|
 | 
						|
	return result
 | 
						|
}
 | 
						|
 | 
						|
var parasRegexp = regexp.MustCompile(`</?p>`)
 | 
						|
 | 
						|
// unwrapParagraph removes opening and closing paragraph tags
 | 
						|
// of input HTML, if input html is a single paragraph only.
 | 
						|
func unwrapParagraph(html string) string {
 | 
						|
	if !strings.HasPrefix(html, "<p>") {
 | 
						|
		return html
 | 
						|
	}
 | 
						|
 | 
						|
	if !strings.HasSuffix(html, "</p>") {
 | 
						|
		return html
 | 
						|
	}
 | 
						|
 | 
						|
	// Make a substring excluding the
 | 
						|
	// opening and closing paragraph tags.
 | 
						|
	sub := html[3 : len(html)-4]
 | 
						|
 | 
						|
	// If there are still other paragraph tags left
 | 
						|
	// inside the substring, return html unchanged.
 | 
						|
	containsOtherParas := parasRegexp.MatchString(sub)
 | 
						|
	if containsOtherParas {
 | 
						|
		return html
 | 
						|
	}
 | 
						|
 | 
						|
	// Return the substring.
 | 
						|
	return sub
 | 
						|
}
 |