mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 09:52:26 -05:00
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex Go `\b` does not care for Unicode, and without lookahead, the workarounds got very ugly. So I replaced the regex with a parser. The parser runs in O(n) time and performance should not be affected. * [bugfix] Add back hashtag max length and add tests for it
This commit is contained in:
parent
fece7fa706
commit
52109776f6
4 changed files with 146 additions and 45 deletions
|
|
@ -47,7 +47,6 @@ const (
|
|||
const (
|
||||
maximumUsernameLength = 64
|
||||
maximumEmojiShortcodeLength = 30
|
||||
maximumHashtagLength = 30
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -66,17 +65,11 @@ var (
|
|||
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
|
||||
MentionName = regexp.MustCompile(mentionName)
|
||||
|
||||
// mention regex can be played around with here: https://regex101.com/r/G1oGR0/1
|
||||
// mention regex can be played around with here: https://regex101.com/r/P0vpYG/1
|
||||
mentionFinder = `(?:^|\s)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)`
|
||||
// MentionFinder extracts mentions from a piece of text.
|
||||
MentionFinder = regexp.MustCompile(mentionFinder)
|
||||
|
||||
// hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1
|
||||
hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength)
|
||||
// HashtagFinder finds possible hashtags in a string.
|
||||
// It returns just the string part of the hashtag, not the # symbol.
|
||||
HashtagFinder = regexp.MustCompile(hashtagFinder)
|
||||
|
||||
emojiShortcode = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength)
|
||||
// EmojiShortcode validates an emoji name.
|
||||
EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue