more work on parsing statuses

This commit is contained in:
tsmethurst 2021-05-12 17:31:48 +02:00
commit 884d0ecc8f
9 changed files with 251 additions and 32 deletions

View file

@ -35,6 +35,9 @@ const (
)
var (
mentionNameRegexString = `@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)`
mentionNameRegex = regexp.MustCompile(fmt.Sprintf(`^%s$`, mentionNameRegexString))
// mention regex can be played around with here: https://regex101.com/r/qwM9D3/1
mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)`
mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString)

View file

@ -19,17 +19,18 @@
package util
import (
"fmt"
"strings"
)
// DeriveMentions takes a plaintext (ie., not html-formatted) status,
// DeriveMentionsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of accounts
// mentioned in that status.
//
// It will look for fully-qualified account names in the form "@user@example.org".
// or the form "@username" for local users.
// The case of the returned mentions will be lowered, for consistency.
func DeriveMentions(status string) []string {
func DeriveMentionsFromStatus(status string) []string {
mentionedAccounts := []string{}
for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) {
mentionedAccounts = append(mentionedAccounts, m[1])
@ -37,11 +38,11 @@ func DeriveMentions(status string) []string {
return lower(unique(mentionedAccounts))
}
// DeriveHashtags takes a plaintext (ie., not html-formatted) status,
// DeriveHashtagsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of hashtags
// used in that status, without the leading #. The case of the returned
// tags will be lowered, for consistency.
func DeriveHashtags(status string) []string {
func DeriveHashtagsFromStatus(status string) []string {
tags := []string{}
for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) {
tags = append(tags, m[1])
@ -49,11 +50,11 @@ func DeriveHashtags(status string) []string {
return lower(unique(tags))
}
// DeriveEmojis takes a plaintext (ie., not html-formatted) status,
// DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of emojis
// used in that status, without the surround ::. The case of the returned
// emojis will be lowered, for consistency.
func DeriveEmojis(status string) []string {
func DeriveEmojisFromStatus(status string) []string {
emojis := []string{}
for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) {
emojis = append(emojis, m[1])
@ -61,6 +62,26 @@ func DeriveEmojis(status string) []string {
return lower(unique(emojis))
}
// ExtractMentionParts extracts the username @test_user and the domain @example.org
// from a mention string like @test_user@example.org.
//
// If no domain is provided, it will return just the username part.
//
// If nothing is matched, it will return an error.
func ExtractMentionParts(mention string) (username, domain string, err error) {
matches := mentionNameRegex.FindStringSubmatch(mention)
if matches == nil {
err = fmt.Errorf("could't match mention %s", mention)
return
}
fmt.Println(matches)
username = matches[1]
if len(matches) == 2 {
domain = matches[2]
}
return
}
// unique returns a deduplicated version of a given string slice.
func unique(s []string) []string {
keys := make(map[string]bool)

View file

@ -42,7 +42,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
here is a duplicate mention: @hello@test.lgbt
`
menchies := util.DeriveMentions(statusText)
menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 4)
assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
@ -52,7 +52,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
statusText := ``
menchies := util.DeriveMentions(statusText)
menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 0)
}
@ -67,7 +67,7 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
#111111 thisalsoshouldn'twork#### ##`
tags := util.DeriveHashtags(statusText)
tags := util.DeriveHashtagsFromStatus(statusText)
assert.Len(suite.T(), tags, 5)
assert.Equal(suite.T(), "testing123", tags[0])
assert.Equal(suite.T(), "also", tags[1])
@ -90,7 +90,7 @@ Here's some normal text with an :emoji: at the end
:underscores_ok_too:
`
tags := util.DeriveEmojis(statusText)
tags := util.DeriveEmojisFromStatus(statusText)
assert.Len(suite.T(), tags, 7)
assert.Equal(suite.T(), "test", tags[0])
assert.Equal(suite.T(), "another", tags[1])