| 
									
										
										
										
											2023-03-12 16:00:57 +01:00
										 |  |  | // GoToSocial | 
					
						
							|  |  |  | // Copyright (C) GoToSocial Authors admin@gotosocial.org | 
					
						
							|  |  |  | // SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | // GNU Affero General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							| 
									
										
										
										
											2021-07-26 20:25:54 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | package text | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2022-08-31 11:40:11 -04:00
										 |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2021-08-25 15:34:33 +02:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 	"regexp" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2021-08-25 15:34:33 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-05 13:22:40 +01:00
										 |  |  | 	"codeberg.org/gruf/go-byteutil" | 
					
						
							| 
									
										
										
										
											2021-07-26 20:25:54 +02:00
										 |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | 
					
						
							| 
									
										
										
										
											2022-08-07 18:19:16 +02:00
										 |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/log" | 
					
						
							| 
									
										
										
										
											2024-03-15 18:26:53 +01:00
										 |  |  | 	"github.com/superseriousbusiness/gotosocial/internal/regexes" | 
					
						
							| 
									
										
										
										
											2022-12-16 11:20:22 +00:00
										 |  |  | 	"github.com/yuin/goldmark" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/extension" | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 	"github.com/yuin/goldmark/renderer" | 
					
						
							| 
									
										
										
										
											2022-12-16 11:20:22 +00:00
										 |  |  | 	"github.com/yuin/goldmark/renderer/html" | 
					
						
							| 
									
										
										
										
											2021-07-26 20:25:54 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | // FromMarkdown fulfils FormatFunc by parsing | 
					
						
							|  |  |  | // the given markdown input into a FormatResult. | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | // | 
					
						
							|  |  |  | // Inline (aka unsafe) HTML elements are allowed, | 
					
						
							|  |  |  | // as they should be sanitized afterwards anyway. | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | func (f *Formatter) FromMarkdown( | 
					
						
							|  |  |  | 	ctx context.Context, | 
					
						
							|  |  |  | 	parseMention gtsmodel.ParseMentionFunc, | 
					
						
							|  |  |  | 	authorID string, | 
					
						
							|  |  |  | 	statusID string, | 
					
						
							|  |  |  | 	input string, | 
					
						
							|  |  |  | ) *FormatResult { | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 	return f.fromMarkdown( | 
					
						
							|  |  |  | 		ctx, | 
					
						
							|  |  |  | 		false, // basic = false | 
					
						
							|  |  |  | 		parseMention, | 
					
						
							|  |  |  | 		authorID, | 
					
						
							|  |  |  | 		statusID, | 
					
						
							|  |  |  | 		input, | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // FromMarkdownBasic fulfils FormatFunc by parsing | 
					
						
							|  |  |  | // the given markdown input into a FormatResult. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // Unlike FromMarkdown, it will only parse emojis with | 
					
						
							|  |  |  | // the custom renderer, leaving aside mentions and tags. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // Inline (aka unsafe) HTML elements are not allowed. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // If the result is a single paragraph, | 
					
						
							|  |  |  | // it will not be wrapped in <p> tags. | 
					
						
							|  |  |  | func (f *Formatter) FromMarkdownBasic( | 
					
						
							|  |  |  | 	ctx context.Context, | 
					
						
							|  |  |  | 	parseMention gtsmodel.ParseMentionFunc, | 
					
						
							|  |  |  | 	authorID string, | 
					
						
							|  |  |  | 	statusID string, | 
					
						
							|  |  |  | 	input string, | 
					
						
							|  |  |  | ) *FormatResult { | 
					
						
							|  |  |  | 	res := f.fromMarkdown( | 
					
						
							|  |  |  | 		ctx, | 
					
						
							|  |  |  | 		true, // basic = true | 
					
						
							|  |  |  | 		parseMention, | 
					
						
							|  |  |  | 		authorID, | 
					
						
							|  |  |  | 		statusID, | 
					
						
							|  |  |  | 		input, | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	res.HTML = unwrapParagraph(res.HTML) | 
					
						
							|  |  |  | 	return res | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // fromMarkdown parses the given input text either | 
					
						
							|  |  |  | // with or without emojis, and returns the result. | 
					
						
							|  |  |  | func (f *Formatter) fromMarkdown( | 
					
						
							|  |  |  | 	ctx context.Context, | 
					
						
							|  |  |  | 	basic bool, | 
					
						
							|  |  |  | 	parseMention gtsmodel.ParseMentionFunc, | 
					
						
							|  |  |  | 	authorID string, | 
					
						
							|  |  |  | 	statusID string, | 
					
						
							|  |  |  | 	input string, | 
					
						
							|  |  |  | ) *FormatResult { | 
					
						
							|  |  |  | 	var ( | 
					
						
							|  |  |  | 		result = new(FormatResult) | 
					
						
							|  |  |  | 		opts   []renderer.Option | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if basic { | 
					
						
							|  |  |  | 		// Don't allow raw HTML tags, | 
					
						
							|  |  |  | 		// markdown syntax only. | 
					
						
							|  |  |  | 		opts = []renderer.Option{ | 
					
						
							|  |  |  | 			html.WithXHTML(), | 
					
						
							|  |  |  | 			html.WithHardWraps(), | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		opts = []renderer.Option{ | 
					
						
							|  |  |  | 			html.WithXHTML(), | 
					
						
							|  |  |  | 			html.WithHardWraps(), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			// Allow raw HTML tags, we | 
					
						
							|  |  |  | 			// sanitize at the end anyway. | 
					
						
							|  |  |  | 			html.WithUnsafe(), | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2022-09-27 14:27:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 	// Instantiate goldmark parser for | 
					
						
							|  |  |  | 	// markdown, using custom renderer | 
					
						
							|  |  |  | 	// to add hashtag/mention links. | 
					
						
							| 
									
										
										
										
											2022-12-16 11:20:22 +00:00
										 |  |  | 	md := goldmark.New( | 
					
						
							|  |  |  | 		goldmark.WithRendererOptions( | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 			opts..., | 
					
						
							| 
									
										
										
										
											2022-12-16 11:20:22 +00:00
										 |  |  | 		), | 
					
						
							|  |  |  | 		goldmark.WithExtensions( | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 			&customRenderer{ | 
					
						
							|  |  |  | 				ctx, | 
					
						
							|  |  |  | 				f.db, | 
					
						
							|  |  |  | 				parseMention, | 
					
						
							|  |  |  | 				authorID, | 
					
						
							|  |  |  | 				statusID, | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 				// If basic, pass | 
					
						
							|  |  |  | 				// emojiOnly = true. | 
					
						
							|  |  |  | 				basic, | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 				result, | 
					
						
							|  |  |  | 			}, | 
					
						
							| 
									
										
										
										
											2024-03-15 18:26:53 +01:00
										 |  |  | 			// Turns URLs into links. | 
					
						
							|  |  |  | 			extension.NewLinkify( | 
					
						
							| 
									
										
										
										
											2025-03-24 14:13:32 +01:00
										 |  |  | 				extension.WithLinkifyURLRegexp(regexes.URLLike), | 
					
						
							| 
									
										
										
										
											2024-03-15 18:26:53 +01:00
										 |  |  | 			), | 
					
						
							| 
									
										
										
										
											2022-12-16 11:20:22 +00:00
										 |  |  | 			extension.Strikethrough, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-05 13:22:40 +01:00
										 |  |  | 	// Convert input string to bytes | 
					
						
							|  |  |  | 	// without performing any allocs. | 
					
						
							|  |  |  | 	bInput := byteutil.S2B(input) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 	// Parse input into HTML. | 
					
						
							|  |  |  | 	var htmlBytes bytes.Buffer | 
					
						
							|  |  |  | 	if err := md.Convert( | 
					
						
							| 
									
										
										
										
											2023-10-05 13:22:40 +01:00
										 |  |  | 		bInput, | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 		&htmlBytes, | 
					
						
							|  |  |  | 	); err != nil { | 
					
						
							|  |  |  | 		log.Errorf(ctx, "error formatting markdown input to HTML: %s", err) | 
					
						
							| 
									
										
										
										
											2022-09-27 14:27:53 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2022-08-07 18:19:16 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 	// Clean and shrink HTML. | 
					
						
							| 
									
										
										
										
											2023-10-05 13:22:40 +01:00
										 |  |  | 	result.HTML = byteutil.B2S(htmlBytes.Bytes()) | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 	result.HTML = SanitizeHTML(result.HTML) | 
					
						
							| 
									
										
										
										
											2023-08-11 14:40:11 +02:00
										 |  |  | 	result.HTML = MinifyHTML(result.HTML) | 
					
						
							| 
									
										
										
										
											2022-08-07 18:19:16 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-03 10:58:58 +00:00
										 |  |  | 	return result | 
					
						
							| 
									
										
										
										
											2021-07-26 20:25:54 +02:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2025-03-07 15:04:34 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | var parasRegexp = regexp.MustCompile(`</?p>`) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // unwrapParagraph removes opening and closing paragraph tags | 
					
						
							|  |  |  | // of input HTML, if input html is a single paragraph only. | 
					
						
							|  |  |  | func unwrapParagraph(html string) string { | 
					
						
							|  |  |  | 	if !strings.HasPrefix(html, "<p>") { | 
					
						
							|  |  |  | 		return html | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if !strings.HasSuffix(html, "</p>") { | 
					
						
							|  |  |  | 		return html | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Make a substring excluding the | 
					
						
							|  |  |  | 	// opening and closing paragraph tags. | 
					
						
							|  |  |  | 	sub := html[3 : len(html)-4] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// If there are still other paragraph tags left | 
					
						
							|  |  |  | 	// inside the substring, return html unchanged. | 
					
						
							|  |  |  | 	containsOtherParas := parasRegexp.MatchString(sub) | 
					
						
							|  |  |  | 	if containsOtherParas { | 
					
						
							|  |  |  | 		return html | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Return the substring. | 
					
						
							|  |  |  | 	return sub | 
					
						
							|  |  |  | } |