[bugfix] Make hashtag regex work with non-ascii characters (#682)

This commit is contained in:
tobi 2022-07-03 11:03:03 +02:00 committed by GitHub
commit 664713ddd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 4 deletions

View file

@ -71,8 +71,8 @@ var (
// MentionFinder extracts mentions from a piece of text.
MentionFinder = regexp.MustCompile(mentionFinder)
// hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength)
// hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1
hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength)
// HashtagFinder finds possible hashtags in a string.
// It returns just the string part of the hashtag, not the # symbol.
HashtagFinder = regexp.MustCompile(hashtagFinder)