| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | // GoToSocial | 
					
						
							|  |  |  | // Copyright (C) GoToSocial Authors admin@gotosocial.org | 
					
						
							|  |  |  | // SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | // GNU Affero General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | // along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package text | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-26 15:34:10 +02:00
										 |  |  | 	"code.superseriousbusiness.org/gotosocial/internal/regexes" | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 	"github.com/yuin/goldmark/ast" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/parser" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/text" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* | 
					
						
							|  |  |  | 	MENTION PARSER STUFF | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // mention fulfils the goldmark | 
					
						
							|  |  |  | // ast.Node interface. | 
					
						
							|  |  |  | type mention struct { | 
					
						
							|  |  |  | 	ast.BaseInline | 
					
						
							|  |  |  | 	Segment text.Segment | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var kindMention = ast.NewNodeKind("Mention") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *mention) Kind() ast.NodeKind { | 
					
						
							|  |  |  | 	return kindMention | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *mention) Dump(source []byte, level int) { | 
					
						
							|  |  |  | 	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source))) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // newMention creates a goldmark ast.Node | 
					
						
							|  |  |  | // from a text.Segment. The contained segment | 
					
						
							|  |  |  | // is used in rendering. | 
					
						
							|  |  |  | func newMention(s text.Segment) *mention { | 
					
						
							|  |  |  | 	return &mention{ | 
					
						
							|  |  |  | 		BaseInline: ast.BaseInline{}, | 
					
						
							|  |  |  | 		Segment:    s, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // mentionParser fulfils the goldmark | 
					
						
							|  |  |  | // parser.InlineParser interface. | 
					
						
							|  |  |  | type mentionParser struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Mention parsing is triggered by the `@` symbol | 
					
						
							|  |  |  | // which appears at the beginning of a mention. | 
					
						
							|  |  |  | func (p *mentionParser) Trigger() []byte { | 
					
						
							|  |  |  | 	return []byte{'@'} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *mentionParser) Parse( | 
					
						
							|  |  |  | 	_ ast.Node, | 
					
						
							|  |  |  | 	block text.Reader, | 
					
						
							|  |  |  | 	_ parser.Context, | 
					
						
							|  |  |  | ) ast.Node { | 
					
						
							|  |  |  | 	// If preceding character is not a valid boundary | 
					
						
							|  |  |  | 	// character, then this cannot be a valid mention. | 
					
						
							|  |  |  | 	if !isMentionBoundary(block.PrecendingCharacter()) { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	line, segment := block.PeekLine() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Ascertain location of mention in the line | 
					
						
							|  |  |  | 	// that starts with the trigger character. | 
					
						
							|  |  |  | 	loc := regexes.MentionFinder.FindIndex(line) | 
					
						
							|  |  |  | 	if loc == nil || loc[0] != 0 { | 
					
						
							|  |  |  | 		// Noop if not found or | 
					
						
							|  |  |  | 		// not found at start. | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Advance the block to | 
					
						
							|  |  |  | 	// the end of the mention. | 
					
						
							|  |  |  | 	block.Advance(loc[1]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// mention ast.Node spans from the | 
					
						
							|  |  |  | 	// beginning of this segment up to | 
					
						
							|  |  |  | 	// the last character of the mention. | 
					
						
							|  |  |  | 	return newMention( | 
					
						
							|  |  |  | 		segment.WithStop( | 
					
						
							|  |  |  | 			segment.Start + loc[1], | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* | 
					
						
							|  |  |  | 	HASHTAG PARSER STUFF | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // hashtag fulfils the goldmark | 
					
						
							|  |  |  | // ast.Node interface. | 
					
						
							|  |  |  | type hashtag struct { | 
					
						
							|  |  |  | 	ast.BaseInline | 
					
						
							|  |  |  | 	Segment text.Segment | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var kindHashtag = ast.NewNodeKind("Hashtag") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *hashtag) Kind() ast.NodeKind { | 
					
						
							|  |  |  | 	return kindHashtag | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *hashtag) Dump(source []byte, level int) { | 
					
						
							|  |  |  | 	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source))) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // newHashtag creates a goldmark ast.Node | 
					
						
							|  |  |  | // from a text.Segment. The contained segment | 
					
						
							|  |  |  | // is used in rendering. | 
					
						
							|  |  |  | func newHashtag(s text.Segment) *hashtag { | 
					
						
							|  |  |  | 	return &hashtag{ | 
					
						
							|  |  |  | 		BaseInline: ast.BaseInline{}, | 
					
						
							|  |  |  | 		Segment:    s, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type hashtagParser struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Hashtag parsing is triggered by a '#' symbol | 
					
						
							|  |  |  | // which appears at the beginning of a hashtag. | 
					
						
							|  |  |  | func (p *hashtagParser) Trigger() []byte { | 
					
						
							|  |  |  | 	return []byte{'#'} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *hashtagParser) Parse( | 
					
						
							|  |  |  | 	_ ast.Node, | 
					
						
							|  |  |  | 	block text.Reader, | 
					
						
							|  |  |  | 	_ parser.Context, | 
					
						
							|  |  |  | ) ast.Node { | 
					
						
							|  |  |  | 	// If preceding character is not a valid boundary | 
					
						
							|  |  |  | 	// character, then this cannot be a valid hashtag. | 
					
						
							|  |  |  | 	if !isHashtagBoundary(block.PrecendingCharacter()) { | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var ( | 
					
						
							|  |  |  | 		line, segment = block.PeekLine() | 
					
						
							|  |  |  | 		lineStr       = string(line) | 
					
						
							|  |  |  | 		lineStrLen    = len(lineStr) | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if lineStrLen <= 1 { | 
					
						
							|  |  |  | 		// This is probably just | 
					
						
							|  |  |  | 		// a lonely '#' char. | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Iterate through the runes in the detected | 
					
						
							|  |  |  | 	// hashtag string until we reach either: | 
					
						
							|  |  |  | 	//   - A weird character (bad). | 
					
						
							|  |  |  | 	//   - The end of the hashtag (ok). | 
					
						
							|  |  |  | 	//   - The end of the string (also ok). | 
					
						
							|  |  |  | 	for i, r := range lineStr { | 
					
						
							|  |  |  | 		switch { | 
					
						
							|  |  |  | 		case r == '#' && i == 0: | 
					
						
							|  |  |  | 			// Ignore initial '#'. | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-31 02:42:55 -08:00
										 |  |  | 		case !isPermittedInHashtag(r) && | 
					
						
							| 
									
										
										
										
											2023-09-29 10:39:56 +02:00
										 |  |  | 			!isHashtagBoundary(r): | 
					
						
							|  |  |  | 			// Weird non-boundary character | 
					
						
							|  |  |  | 			// in the hashtag. Don't trust it. | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case isHashtagBoundary(r): | 
					
						
							|  |  |  | 			// Reached closing hashtag | 
					
						
							|  |  |  | 			// boundary. Advance block | 
					
						
							|  |  |  | 			// to the end of the hashtag. | 
					
						
							|  |  |  | 			block.Advance(i) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			// hashtag ast.Node spans from | 
					
						
							|  |  |  | 			// the beginning of this segment | 
					
						
							|  |  |  | 			// up to the boundary character. | 
					
						
							|  |  |  | 			return newHashtag( | 
					
						
							|  |  |  | 				segment.WithStop( | 
					
						
							|  |  |  | 					segment.Start + i, | 
					
						
							|  |  |  | 				), | 
					
						
							|  |  |  | 			) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// No invalid or boundary characters before the | 
					
						
							|  |  |  | 	// end of the line: it's all hashtag, baby 😎 | 
					
						
							|  |  |  | 	// | 
					
						
							|  |  |  | 	// Advance block to the end of the segment. | 
					
						
							|  |  |  | 	block.Advance(segment.Len()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// hashtag ast.Node spans | 
					
						
							|  |  |  | 	// the entire segment. | 
					
						
							|  |  |  | 	return newHashtag(segment) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* | 
					
						
							|  |  |  | 	EMOJI PARSER STUFF | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // emoji fulfils the goldmark | 
					
						
							|  |  |  | // ast.Node interface. | 
					
						
							|  |  |  | type emoji struct { | 
					
						
							|  |  |  | 	ast.BaseInline | 
					
						
							|  |  |  | 	Segment text.Segment | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var kindEmoji = ast.NewNodeKind("Emoji") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *emoji) Kind() ast.NodeKind { | 
					
						
							|  |  |  | 	return kindEmoji | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (n *emoji) Dump(source []byte, level int) { | 
					
						
							|  |  |  | 	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source))) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // newEmoji creates a goldmark ast.Node | 
					
						
							|  |  |  | // from a text.Segment. The contained | 
					
						
							|  |  |  | // segment is used in rendering. | 
					
						
							|  |  |  | func newEmoji(s text.Segment) *emoji { | 
					
						
							|  |  |  | 	return &emoji{ | 
					
						
							|  |  |  | 		BaseInline: ast.BaseInline{}, | 
					
						
							|  |  |  | 		Segment:    s, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type emojiParser struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Emoji parsing is triggered by a ':' char | 
					
						
							|  |  |  | // which appears at the start of the emoji. | 
					
						
							|  |  |  | func (p *emojiParser) Trigger() []byte { | 
					
						
							|  |  |  | 	return []byte{':'} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *emojiParser) Parse( | 
					
						
							|  |  |  | 	_ ast.Node, | 
					
						
							|  |  |  | 	block text.Reader, | 
					
						
							|  |  |  | 	_ parser.Context, | 
					
						
							|  |  |  | ) ast.Node { | 
					
						
							|  |  |  | 	line, segment := block.PeekLine() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Ascertain location of emoji in the line | 
					
						
							|  |  |  | 	// that starts with the trigger character. | 
					
						
							|  |  |  | 	loc := regexes.EmojiFinder.FindIndex(line) | 
					
						
							|  |  |  | 	if loc == nil || loc[0] != 0 { | 
					
						
							|  |  |  | 		// Noop if not found or | 
					
						
							|  |  |  | 		// not found at start. | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Advance the block to | 
					
						
							|  |  |  | 	// the end of the emoji. | 
					
						
							|  |  |  | 	block.Advance(loc[1]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// emoji ast.Node spans from the | 
					
						
							|  |  |  | 	// beginning of this segment up to | 
					
						
							|  |  |  | 	// the last character of the emoji. | 
					
						
							|  |  |  | 	return newEmoji( | 
					
						
							|  |  |  | 		segment.WithStop( | 
					
						
							|  |  |  | 			segment.Start + loc[1], | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | } |