mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-04 00:42:24 -06:00 
			
		
		
		
	
		
			
	
	
		
			282 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			282 lines
		
	
	
	
		
			6.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| 
								 | 
							
								// GoToSocial
							 | 
						||
| 
								 | 
							
								// Copyright (C) GoToSocial Authors admin@gotosocial.org
							 | 
						||
| 
								 | 
							
								// SPDX-License-Identifier: AGPL-3.0-or-later
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// This program is free software: you can redistribute it and/or modify
							 | 
						||
| 
								 | 
							
								// it under the terms of the GNU Affero General Public License as published by
							 | 
						||
| 
								 | 
							
								// the Free Software Foundation, either version 3 of the License, or
							 | 
						||
| 
								 | 
							
								// (at your option) any later version.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// This program is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								// but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						||
| 
								 | 
							
								// GNU Affero General Public License for more details.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// You should have received a copy of the GNU Affero General Public License
							 | 
						||
| 
								 | 
							
								// along with this program.  If not, see <http://www.gnu.org/licenses/>.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								package text
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import (
							 | 
						||
| 
								 | 
							
									"fmt"
							 | 
						||
| 
								 | 
							
									"strings"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									"github.com/superseriousbusiness/gotosocial/internal/regexes"
							 | 
						||
| 
								 | 
							
									"github.com/yuin/goldmark/ast"
							 | 
						||
| 
								 | 
							
									"github.com/yuin/goldmark/parser"
							 | 
						||
| 
								 | 
							
									"github.com/yuin/goldmark/text"
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
									MENTION PARSER STUFF
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// mention fulfils the goldmark
							 | 
						||
| 
								 | 
							
								// ast.Node interface.
							 | 
						||
| 
								 | 
							
								type mention struct {
							 | 
						||
| 
								 | 
							
									ast.BaseInline
							 | 
						||
| 
								 | 
							
									Segment text.Segment
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var kindMention = ast.NewNodeKind("Mention")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *mention) Kind() ast.NodeKind {
							 | 
						||
| 
								 | 
							
									return kindMention
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *mention) Dump(source []byte, level int) {
							 | 
						||
| 
								 | 
							
									fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// newMention creates a goldmark ast.Node
							 | 
						||
| 
								 | 
							
								// from a text.Segment. The contained segment
							 | 
						||
| 
								 | 
							
								// is used in rendering.
							 | 
						||
| 
								 | 
							
								func newMention(s text.Segment) *mention {
							 | 
						||
| 
								 | 
							
									return &mention{
							 | 
						||
| 
								 | 
							
										BaseInline: ast.BaseInline{},
							 | 
						||
| 
								 | 
							
										Segment:    s,
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// mentionParser fulfils the goldmark
							 | 
						||
| 
								 | 
							
								// parser.InlineParser interface.
							 | 
						||
| 
								 | 
							
								type mentionParser struct{}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Mention parsing is triggered by the `@` symbol
							 | 
						||
| 
								 | 
							
								// which appears at the beginning of a mention.
							 | 
						||
| 
								 | 
							
								func (p *mentionParser) Trigger() []byte {
							 | 
						||
| 
								 | 
							
									return []byte{'@'}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (p *mentionParser) Parse(
							 | 
						||
| 
								 | 
							
									_ ast.Node,
							 | 
						||
| 
								 | 
							
									block text.Reader,
							 | 
						||
| 
								 | 
							
									_ parser.Context,
							 | 
						||
| 
								 | 
							
								) ast.Node {
							 | 
						||
| 
								 | 
							
									// If preceding character is not a valid boundary
							 | 
						||
| 
								 | 
							
									// character, then this cannot be a valid mention.
							 | 
						||
| 
								 | 
							
									if !isMentionBoundary(block.PrecendingCharacter()) {
							 | 
						||
| 
								 | 
							
										return nil
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									line, segment := block.PeekLine()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Ascertain location of mention in the line
							 | 
						||
| 
								 | 
							
									// that starts with the trigger character.
							 | 
						||
| 
								 | 
							
									loc := regexes.MentionFinder.FindIndex(line)
							 | 
						||
| 
								 | 
							
									if loc == nil || loc[0] != 0 {
							 | 
						||
| 
								 | 
							
										// Noop if not found or
							 | 
						||
| 
								 | 
							
										// not found at start.
							 | 
						||
| 
								 | 
							
										return nil
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Advance the block to
							 | 
						||
| 
								 | 
							
									// the end of the mention.
							 | 
						||
| 
								 | 
							
									block.Advance(loc[1])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// mention ast.Node spans from the
							 | 
						||
| 
								 | 
							
									// beginning of this segment up to
							 | 
						||
| 
								 | 
							
									// the last character of the mention.
							 | 
						||
| 
								 | 
							
									return newMention(
							 | 
						||
| 
								 | 
							
										segment.WithStop(
							 | 
						||
| 
								 | 
							
											segment.Start + loc[1],
							 | 
						||
| 
								 | 
							
										),
							 | 
						||
| 
								 | 
							
									)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
									HASHTAG PARSER STUFF
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// hashtag fulfils the goldmark
							 | 
						||
| 
								 | 
							
								// ast.Node interface.
							 | 
						||
| 
								 | 
							
								type hashtag struct {
							 | 
						||
| 
								 | 
							
									ast.BaseInline
							 | 
						||
| 
								 | 
							
									Segment text.Segment
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var kindHashtag = ast.NewNodeKind("Hashtag")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *hashtag) Kind() ast.NodeKind {
							 | 
						||
| 
								 | 
							
									return kindHashtag
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *hashtag) Dump(source []byte, level int) {
							 | 
						||
| 
								 | 
							
									fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// newHashtag creates a goldmark ast.Node
							 | 
						||
| 
								 | 
							
								// from a text.Segment. The contained segment
							 | 
						||
| 
								 | 
							
								// is used in rendering.
							 | 
						||
| 
								 | 
							
								func newHashtag(s text.Segment) *hashtag {
							 | 
						||
| 
								 | 
							
									return &hashtag{
							 | 
						||
| 
								 | 
							
										BaseInline: ast.BaseInline{},
							 | 
						||
| 
								 | 
							
										Segment:    s,
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								type hashtagParser struct{}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Hashtag parsing is triggered by a '#' symbol
							 | 
						||
| 
								 | 
							
								// which appears at the beginning of a hashtag.
							 | 
						||
| 
								 | 
							
								func (p *hashtagParser) Trigger() []byte {
							 | 
						||
| 
								 | 
							
									return []byte{'#'}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (p *hashtagParser) Parse(
							 | 
						||
| 
								 | 
							
									_ ast.Node,
							 | 
						||
| 
								 | 
							
									block text.Reader,
							 | 
						||
| 
								 | 
							
									_ parser.Context,
							 | 
						||
| 
								 | 
							
								) ast.Node {
							 | 
						||
| 
								 | 
							
									// If preceding character is not a valid boundary
							 | 
						||
| 
								 | 
							
									// character, then this cannot be a valid hashtag.
							 | 
						||
| 
								 | 
							
									if !isHashtagBoundary(block.PrecendingCharacter()) {
							 | 
						||
| 
								 | 
							
										return nil
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									var (
							 | 
						||
| 
								 | 
							
										line, segment = block.PeekLine()
							 | 
						||
| 
								 | 
							
										lineStr       = string(line)
							 | 
						||
| 
								 | 
							
										lineStrLen    = len(lineStr)
							 | 
						||
| 
								 | 
							
									)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									if lineStrLen <= 1 {
							 | 
						||
| 
								 | 
							
										// This is probably just
							 | 
						||
| 
								 | 
							
										// a lonely '#' char.
							 | 
						||
| 
								 | 
							
										return nil
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Iterate through the runes in the detected
							 | 
						||
| 
								 | 
							
									// hashtag string until we reach either:
							 | 
						||
| 
								 | 
							
									//   - A weird character (bad).
							 | 
						||
| 
								 | 
							
									//   - The end of the hashtag (ok).
							 | 
						||
| 
								 | 
							
									//   - The end of the string (also ok).
							 | 
						||
| 
								 | 
							
									for i, r := range lineStr {
							 | 
						||
| 
								 | 
							
										switch {
							 | 
						||
| 
								 | 
							
										case r == '#' && i == 0:
							 | 
						||
| 
								 | 
							
											// Ignore initial '#'.
							 | 
						||
| 
								 | 
							
											continue
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										case !isPlausiblyInHashtag(r) &&
							 | 
						||
| 
								 | 
							
											!isHashtagBoundary(r):
							 | 
						||
| 
								 | 
							
											// Weird non-boundary character
							 | 
						||
| 
								 | 
							
											// in the hashtag. Don't trust it.
							 | 
						||
| 
								 | 
							
											return nil
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										case isHashtagBoundary(r):
							 | 
						||
| 
								 | 
							
											// Reached closing hashtag
							 | 
						||
| 
								 | 
							
											// boundary. Advance block
							 | 
						||
| 
								 | 
							
											// to the end of the hashtag.
							 | 
						||
| 
								 | 
							
											block.Advance(i)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// hashtag ast.Node spans from
							 | 
						||
| 
								 | 
							
											// the beginning of this segment
							 | 
						||
| 
								 | 
							
											// up to the boundary character.
							 | 
						||
| 
								 | 
							
											return newHashtag(
							 | 
						||
| 
								 | 
							
												segment.WithStop(
							 | 
						||
| 
								 | 
							
													segment.Start + i,
							 | 
						||
| 
								 | 
							
												),
							 | 
						||
| 
								 | 
							
											)
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// No invalid or boundary characters before the
							 | 
						||
| 
								 | 
							
									// end of the line: it's all hashtag, baby 😎
							 | 
						||
| 
								 | 
							
									//
							 | 
						||
| 
								 | 
							
									// Advance block to the end of the segment.
							 | 
						||
| 
								 | 
							
									block.Advance(segment.Len())
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// hashtag ast.Node spans
							 | 
						||
| 
								 | 
							
									// the entire segment.
							 | 
						||
| 
								 | 
							
									return newHashtag(segment)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
									EMOJI PARSER STUFF
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// emoji fulfils the goldmark
							 | 
						||
| 
								 | 
							
								// ast.Node interface.
							 | 
						||
| 
								 | 
							
								type emoji struct {
							 | 
						||
| 
								 | 
							
									ast.BaseInline
							 | 
						||
| 
								 | 
							
									Segment text.Segment
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var kindEmoji = ast.NewNodeKind("Emoji")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *emoji) Kind() ast.NodeKind {
							 | 
						||
| 
								 | 
							
									return kindEmoji
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (n *emoji) Dump(source []byte, level int) {
							 | 
						||
| 
								 | 
							
									fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// newEmoji creates a goldmark ast.Node
							 | 
						||
| 
								 | 
							
								// from a text.Segment. The contained
							 | 
						||
| 
								 | 
							
								// segment is used in rendering.
							 | 
						||
| 
								 | 
							
								func newEmoji(s text.Segment) *emoji {
							 | 
						||
| 
								 | 
							
									return &emoji{
							 | 
						||
| 
								 | 
							
										BaseInline: ast.BaseInline{},
							 | 
						||
| 
								 | 
							
										Segment:    s,
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								type emojiParser struct{}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Emoji parsing is triggered by a ':' char
							 | 
						||
| 
								 | 
							
								// which appears at the start of the emoji.
							 | 
						||
| 
								 | 
							
								func (p *emojiParser) Trigger() []byte {
							 | 
						||
| 
								 | 
							
									return []byte{':'}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (p *emojiParser) Parse(
							 | 
						||
| 
								 | 
							
									_ ast.Node,
							 | 
						||
| 
								 | 
							
									block text.Reader,
							 | 
						||
| 
								 | 
							
									_ parser.Context,
							 | 
						||
| 
								 | 
							
								) ast.Node {
							 | 
						||
| 
								 | 
							
									line, segment := block.PeekLine()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Ascertain location of emoji in the line
							 | 
						||
| 
								 | 
							
									// that starts with the trigger character.
							 | 
						||
| 
								 | 
							
									loc := regexes.EmojiFinder.FindIndex(line)
							 | 
						||
| 
								 | 
							
									if loc == nil || loc[0] != 0 {
							 | 
						||
| 
								 | 
							
										// Noop if not found or
							 | 
						||
| 
								 | 
							
										// not found at start.
							 | 
						||
| 
								 | 
							
										return nil
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Advance the block to
							 | 
						||
| 
								 | 
							
									// the end of the emoji.
							 | 
						||
| 
								 | 
							
									block.Advance(loc[1])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// emoji ast.Node spans from the
							 | 
						||
| 
								 | 
							
									// beginning of this segment up to
							 | 
						||
| 
								 | 
							
									// the last character of the emoji.
							 | 
						||
| 
								 | 
							
									return newEmoji(
							 | 
						||
| 
								 | 
							
										segment.WithStop(
							 | 
						||
| 
								 | 
							
											segment.Start + loc[1],
							 | 
						||
| 
								 | 
							
										),
							 | 
						||
| 
								 | 
							
									)
							 | 
						||
| 
								 | 
							
								}
							 |