gotosocial/internal/text/goldmark_parsers.go

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

package text

import (
	"fmt"
	"strings"

	"github.com/superseriousbusiness/gotosocial/internal/regexes"
	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/parser"
	"github.com/yuin/goldmark/text"
)

/*
	MENTION PARSER STUFF
*/

// mention fulfils the goldmark
// ast.Node interface.
type mention struct {
	ast.BaseInline
	Segment text.Segment
}

var kindMention = ast.NewNodeKind("Mention")

func (n *mention) Kind() ast.NodeKind {
	return kindMention
}

func (n *mention) Dump(source []byte, level int) {
	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
}

// newMention creates a goldmark ast.Node
// from a text.Segment. The contained segment
// is used in rendering.
func newMention(s text.Segment) *mention {
	return &mention{
		BaseInline: ast.BaseInline{},
		Segment:    s,
	}
}

// mentionParser fulfils the goldmark
// parser.InlineParser interface.
type mentionParser struct{}

// Mention parsing is triggered by the `@` symbol
// which appears at the beginning of a mention.
func (p *mentionParser) Trigger() []byte {
	return []byte{'@'}
}

func (p *mentionParser) Parse(
	_ ast.Node,
	block text.Reader,
	_ parser.Context,
) ast.Node {
	// If preceding character is not a valid boundary
	// character, then this cannot be a valid mention.
	if !isMentionBoundary(block.PrecendingCharacter()) {
		return nil
	}

	line, segment := block.PeekLine()

	// Ascertain location of mention in the line
	// that starts with the trigger character.
	loc := regexes.MentionFinder.FindIndex(line)
	if loc == nil || loc[0] != 0 {
		// Noop if not found or
		// not found at start.
		return nil
	}

	// Advance the block to
	// the end of the mention.
	block.Advance(loc[1])

	// mention ast.Node spans from the
	// beginning of this segment up to
	// the last character of the mention.
	return newMention(
		segment.WithStop(
			segment.Start + loc[1],
		),
	)
}

/*
	HASHTAG PARSER STUFF
*/

// hashtag fulfils the goldmark
// ast.Node interface.
type hashtag struct {
	ast.BaseInline
	Segment text.Segment
}

var kindHashtag = ast.NewNodeKind("Hashtag")

func (n *hashtag) Kind() ast.NodeKind {
	return kindHashtag
}

func (n *hashtag) Dump(source []byte, level int) {
	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
}

// newHashtag creates a goldmark ast.Node
// from a text.Segment. The contained segment
// is used in rendering.
func newHashtag(s text.Segment) *hashtag {
	return &hashtag{
		BaseInline: ast.BaseInline{},
		Segment:    s,
	}
}

type hashtagParser struct{}

// Hashtag parsing is triggered by a '#' symbol
// which appears at the beginning of a hashtag.
func (p *hashtagParser) Trigger() []byte {
	return []byte{'#'}
}

func (p *hashtagParser) Parse(
	_ ast.Node,
	block text.Reader,
	_ parser.Context,
) ast.Node {
	// If preceding character is not a valid boundary
	// character, then this cannot be a valid hashtag.
	if !isHashtagBoundary(block.PrecendingCharacter()) {
		return nil
	}

	var (
		line, segment = block.PeekLine()
		lineStr       = string(line)
		lineStrLen    = len(lineStr)
	)

	if lineStrLen <= 1 {
		// This is probably just
		// a lonely '#' char.
		return nil
	}

	// Iterate through the runes in the detected
	// hashtag string until we reach either:
	//   - A weird character (bad).
	//   - The end of the hashtag (ok).
	//   - The end of the string (also ok).
	for i, r := range lineStr {
		switch {
		case r == '#' && i == 0:
			// Ignore initial '#'.
			continue

		case !isPlausiblyInHashtag(r) &&
			!isHashtagBoundary(r):
			// Weird non-boundary character
			// in the hashtag. Don't trust it.
			return nil

		case isHashtagBoundary(r):
			// Reached closing hashtag
			// boundary. Advance block
			// to the end of the hashtag.
			block.Advance(i)

			// hashtag ast.Node spans from
			// the beginning of this segment
			// up to the boundary character.
			return newHashtag(
				segment.WithStop(
					segment.Start + i,
				),
			)
		}
	}

	// No invalid or boundary characters before the
	// end of the line: it's all hashtag, baby 😎
	//
	// Advance block to the end of the segment.
	block.Advance(segment.Len())

	// hashtag ast.Node spans
	// the entire segment.
	return newHashtag(segment)
}

/*
	EMOJI PARSER STUFF
*/

// emoji fulfils the goldmark
// ast.Node interface.
type emoji struct {
	ast.BaseInline
	Segment text.Segment
}

var kindEmoji = ast.NewNodeKind("Emoji")

func (n *emoji) Kind() ast.NodeKind {
	return kindEmoji
}

func (n *emoji) Dump(source []byte, level int) {
	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
}

// newEmoji creates a goldmark ast.Node
// from a text.Segment. The contained
// segment is used in rendering.
func newEmoji(s text.Segment) *emoji {
	return &emoji{
		BaseInline: ast.BaseInline{},
		Segment:    s,
	}
}

type emojiParser struct{}

// Emoji parsing is triggered by a ':' char
// which appears at the start of the emoji.
func (p *emojiParser) Trigger() []byte {
	return []byte{':'}
}

func (p *emojiParser) Parse(
	_ ast.Node,
	block text.Reader,
	_ parser.Context,
) ast.Node {
	line, segment := block.PeekLine()

	// Ascertain location of emoji in the line
	// that starts with the trigger character.
	loc := regexes.EmojiFinder.FindIndex(line)
	if loc == nil || loc[0] != 0 {
		// Noop if not found or
		// not found at start.
		return nil
	}

	// Advance the block to
	// the end of the emoji.
	block.Advance(loc[1])

	// emoji ast.Node spans from the
	// beginning of this segment up to
	// the last character of the emoji.
	return newEmoji(
		segment.WithStop(
			segment.Start + loc[1],
		),
	)
}
[chore/bugfix] Deinterface text.Formatter, allow underscores in hashtags (#2233) 2023-09-29 10:39:56 +02:00			`// GoToSocial`
			`// Copyright (C) GoToSocial Authors admin@gotosocial.org`
			`// SPDX-License-Identifier: AGPL-3.0-or-later`
			`//`
			`// This program is free software: you can redistribute it and/or modify`
			`// it under the terms of the GNU Affero General Public License as published by`
			`// the Free Software Foundation, either version 3 of the License, or`
			`// (at your option) any later version.`
			`//`
			`// This program is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`// GNU Affero General Public License for more details.`
			`//`
			`// You should have received a copy of the GNU Affero General Public License`
			`// along with this program. If not, see <http://www.gnu.org/licenses/>.`

			`package text`

			`import (`
			`"fmt"`
			`"strings"`

			`"github.com/superseriousbusiness/gotosocial/internal/regexes"`
			`"github.com/yuin/goldmark/ast"`
			`"github.com/yuin/goldmark/parser"`
			`"github.com/yuin/goldmark/text"`
			`)`

			`/*`
			`MENTION PARSER STUFF`
			`*/`

			`// mention fulfils the goldmark`
			`// ast.Node interface.`
			`type mention struct {`
			`ast.BaseInline`
			`Segment text.Segment`
			`}`

			`var kindMention = ast.NewNodeKind("Mention")`

			`func (n *mention) Kind() ast.NodeKind {`
			`return kindMention`
			`}`

			`func (n *mention) Dump(source []byte, level int) {`
			`fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))`
			`}`

			`// newMention creates a goldmark ast.Node`
			`// from a text.Segment. The contained segment`
			`// is used in rendering.`
			`func newMention(s text.Segment) *mention {`
			`return &mention{`
			`BaseInline: ast.BaseInline{},`
			`Segment: s,`
			`}`
			`}`

			`// mentionParser fulfils the goldmark`
			`// parser.InlineParser interface.`
			`type mentionParser struct{}`

			// Mention parsing is triggered by the `@` symbol
			`// which appears at the beginning of a mention.`
			`func (p *mentionParser) Trigger() []byte {`
			`return []byte{'@'}`
			`}`

			`func (p *mentionParser) Parse(`
			`_ ast.Node,`
			`block text.Reader,`
			`_ parser.Context,`
			`) ast.Node {`
			`// If preceding character is not a valid boundary`
			`// character, then this cannot be a valid mention.`
			`if !isMentionBoundary(block.PrecendingCharacter()) {`
			`return nil`
			`}`

			`line, segment := block.PeekLine()`

			`// Ascertain location of mention in the line`
			`// that starts with the trigger character.`
			`loc := regexes.MentionFinder.FindIndex(line)`
			`if loc == nil \|\| loc[0] != 0 {`
			`// Noop if not found or`
			`// not found at start.`
			`return nil`
			`}`

			`// Advance the block to`
			`// the end of the mention.`
			`block.Advance(loc[1])`

			`// mention ast.Node spans from the`
			`// beginning of this segment up to`
			`// the last character of the mention.`
			`return newMention(`
			`segment.WithStop(`
			`segment.Start + loc[1],`
			`),`
			`)`
			`}`

			`/*`
			`HASHTAG PARSER STUFF`
			`*/`

			`// hashtag fulfils the goldmark`
			`// ast.Node interface.`
			`type hashtag struct {`
			`ast.BaseInline`
			`Segment text.Segment`
			`}`

			`var kindHashtag = ast.NewNodeKind("Hashtag")`

			`func (n *hashtag) Kind() ast.NodeKind {`
			`return kindHashtag`
			`}`

			`func (n *hashtag) Dump(source []byte, level int) {`
			`fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))`
			`}`

			`// newHashtag creates a goldmark ast.Node`
			`// from a text.Segment. The contained segment`
			`// is used in rendering.`
			`func newHashtag(s text.Segment) *hashtag {`
			`return &hashtag{`
			`BaseInline: ast.BaseInline{},`
			`Segment: s,`
			`}`
			`}`

			`type hashtagParser struct{}`

			`// Hashtag parsing is triggered by a '#' symbol`
			`// which appears at the beginning of a hashtag.`
			`func (p *hashtagParser) Trigger() []byte {`
			`return []byte{'#'}`
			`}`

			`func (p *hashtagParser) Parse(`
			`_ ast.Node,`
			`block text.Reader,`
			`_ parser.Context,`
			`) ast.Node {`
			`// If preceding character is not a valid boundary`
			`// character, then this cannot be a valid hashtag.`
			`if !isHashtagBoundary(block.PrecendingCharacter()) {`
			`return nil`
			`}`

			`var (`
			`line, segment = block.PeekLine()`
			`lineStr = string(line)`
			`lineStrLen = len(lineStr)`
			`)`

			`if lineStrLen <= 1 {`
			`// This is probably just`
			`// a lonely '#' char.`
			`return nil`
			`}`

			`// Iterate through the runes in the detected`
			`// hashtag string until we reach either:`
			`// - A weird character (bad).`
			`// - The end of the hashtag (ok).`
			`// - The end of the string (also ok).`
			`for i, r := range lineStr {`
			`switch {`
			`case r == '#' && i == 0:`
			`// Ignore initial '#'.`
			`continue`

			`case !isPlausiblyInHashtag(r) &&`
			`!isHashtagBoundary(r):`
			`// Weird non-boundary character`
			`// in the hashtag. Don't trust it.`
			`return nil`

			`case isHashtagBoundary(r):`
			`// Reached closing hashtag`
			`// boundary. Advance block`
			`// to the end of the hashtag.`
			`block.Advance(i)`

			`// hashtag ast.Node spans from`
			`// the beginning of this segment`
			`// up to the boundary character.`
			`return newHashtag(`
			`segment.WithStop(`
			`segment.Start + i,`
			`),`
			`)`
			`}`
			`}`

			`// No invalid or boundary characters before the`
			`// end of the line: it's all hashtag, baby 😎`
			`//`
			`// Advance block to the end of the segment.`
			`block.Advance(segment.Len())`

			`// hashtag ast.Node spans`
			`// the entire segment.`
			`return newHashtag(segment)`
			`}`

			`/*`
			`EMOJI PARSER STUFF`
			`*/`

			`// emoji fulfils the goldmark`
			`// ast.Node interface.`
			`type emoji struct {`
			`ast.BaseInline`
			`Segment text.Segment`
			`}`

			`var kindEmoji = ast.NewNodeKind("Emoji")`

			`func (n *emoji) Kind() ast.NodeKind {`
			`return kindEmoji`
			`}`

			`func (n *emoji) Dump(source []byte, level int) {`
			`fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))`
			`}`

			`// newEmoji creates a goldmark ast.Node`
			`// from a text.Segment. The contained`
			`// segment is used in rendering.`
			`func newEmoji(s text.Segment) *emoji {`
			`return &emoji{`
			`BaseInline: ast.BaseInline{},`
			`Segment: s,`
			`}`
			`}`

			`type emojiParser struct{}`

			`// Emoji parsing is triggered by a ':' char`
			`// which appears at the start of the emoji.`
			`func (p *emojiParser) Trigger() []byte {`
			`return []byte{':'}`
			`}`

			`func (p *emojiParser) Parse(`
			`_ ast.Node,`
			`block text.Reader,`
			`_ parser.Context,`
			`) ast.Node {`
			`line, segment := block.PeekLine()`

			`// Ascertain location of emoji in the line`
			`// that starts with the trigger character.`
			`loc := regexes.EmojiFinder.FindIndex(line)`
			`if loc == nil \|\| loc[0] != 0 {`
			`// Noop if not found or`
			`// not found at start.`
			`return nil`
			`}`

			`// Advance the block to`
			`// the end of the emoji.`
			`block.Advance(loc[1])`

			`// emoji ast.Node spans from the`
			`// beginning of this segment up to`
			`// the last character of the emoji.`
			`return newEmoji(`
			`segment.WithStop(`
			`segment.Start + loc[1],`
			`),`
			`)`
			`}`