Link hashtag bug (#121)

* link + hashtag bug

* remove printlns

* tidy up some duplicated code
This commit is contained in:
Tobi Smethurst 2021-07-29 13:18:22 +02:00 committed by GitHub
commit a940a520d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 349 additions and 97 deletions

View file

@ -30,25 +30,26 @@ const (
)
var (
mentionNameRegexString = `^@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)$`
mentionNameRegexString = `^@(\w+)(?:@([a-zA-Z0-9_\-\.]+)?)$`
// mention name regex captures the username and domain part from a mention string
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
mentionNameRegex = regexp.MustCompile(mentionNameRegexString)
// mention regex can be played around with here: https://regex101.com/r/qwM9D3/1
mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?:[^a-zA-Z0-9]|\W|$)?`
mentionFinderRegexString = `(?:\B)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)(?:\B)?`
mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString)
// hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1
hashtagFinderRegexString = fmt.Sprintf(`(?:\b)?#(\w{1,%d})(?:\b)`, maximumHashtagLength)
hashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString)
// hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
hashtagFinderRegexString = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength)
// HashtagFinderRegex finds possible hashtags in a string.
// It returns just the string part of the hashtag, not the # symbol.
HashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString)
// emoji shortcode regex can be played with here: https://regex101.com/r/zMDRaG/1
emojiShortcodeRegexString = fmt.Sprintf(`[a-z0-9_]{2,%d}`, maximumEmojiShortcodeLength)
emojiShortcodeRegexString = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength)
emojiShortcodeValidationRegex = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcodeRegexString))
// emoji regex can be played with here: https://regex101.com/r/478XGM/1
emojiFinderRegexString = fmt.Sprintf(`(?: |^|\W)?:(%s):(?:\b|\r)?`, emojiShortcodeRegexString)
emojiFinderRegexString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcodeRegexString)
emojiFinderRegex = regexp.MustCompile(emojiFinderRegexString)
// usernameRegexString defines an acceptable username on this instance

View file

@ -29,7 +29,6 @@ import (
//
// It will look for fully-qualified account names in the form "@user@example.org".
// or the form "@username" for local users.
// The case of the returned mentions will be lowered, for consistency.
func DeriveMentionsFromStatus(status string) []string {
mentionedAccounts := []string{}
for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) {
@ -44,16 +43,15 @@ func DeriveMentionsFromStatus(status string) []string {
// tags will be lowered, for consistency.
func DeriveHashtagsFromStatus(status string) []string {
tags := []string{}
for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) {
tags = append(tags, m[1])
for _, m := range HashtagFinderRegex.FindAllStringSubmatch(status, -1) {
tags = append(tags, strings.TrimPrefix(m[1], "#"))
}
return unique(tags)
return uniqueLower(tags)
}
// DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of emojis
// used in that status, without the surround ::. The case of the returned
// emojis will be lowered, for consistency.
// used in that status, without the surround ::.
func DeriveEmojisFromStatus(status string) []string {
emojis := []string{}
for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) {
@ -94,3 +92,17 @@ func unique(s []string) []string {
}
return list
}
// uniqueLower returns a deduplicated version of a given string slice, with all entries converted to lowercase
func uniqueLower(s []string) []string {
keys := make(map[string]bool)
list := []string{}
for _, entry := range s {
eLower := strings.ToLower(entry)
if _, value := keys[eLower]; !value {
keys[eLower] = true
list = append(list, eLower)
}
}
return list
}

View file

@ -37,17 +37,22 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
@someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt
@thisisalocaluser ! @NORWILL@THIS.one!!
@thisisalocaluser!
here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt
@account1@whatever.com @account2@whatever.com
here is a duplicate mention: @hello@test.lgbt
`
menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 4)
assert.Len(suite.T(), menchies, 6)
assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2])
assert.Equal(suite.T(), "@thisisalocaluser", menchies[3])
assert.Equal(suite.T(), "@account1@whatever.com", menchies[4])
assert.Equal(suite.T(), "@account2@whatever.com", menchies[5])
}
func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
@ -57,12 +62,14 @@ func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
}
func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
statusText := `#testing123 #also testing
statusText := `weeeeeeee #testing123 #also testing
# testing this one shouldn't work
#thisshouldwork
here's a link with a fragment: https://example.org/whatever#ahhh
#ThisShouldAlsoWork #not_this_though
#111111 thisalsoshouldn'twork#### ##`

View file

@ -102,32 +102,32 @@ func (suite *ValidationTestSuite) TestValidateUsername() {
err = util.ValidateUsername(tooLong)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("username should be no more than 64 chars but '%s' was 66", tooLong), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", tooLong), err)
}
err = util.ValidateUsername(withSpaces)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", withSpaces), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", withSpaces), err)
}
err = util.ValidateUsername(weirdChars)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", weirdChars), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", weirdChars), err)
}
err = util.ValidateUsername(leadingSpace)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", leadingSpace), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", leadingSpace), err)
}
err = util.ValidateUsername(trailingSpace)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", trailingSpace), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", trailingSpace), err)
}
err = util.ValidateUsername(newlines)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", newlines), err)
assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", newlines), err)
}
err = util.ValidateUsername(goodUsername)
@ -141,7 +141,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() {
notAnEmailAddress := "this-is-no-email-address!"
almostAnEmailAddress := "@thisisalmostan@email.address"
aWebsite := "https://thisisawebsite.com"
tooLong := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhh@gmail.com"
emailAddress := "thisis.actually@anemail.address"
var err error
@ -165,11 +164,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() {
assert.Equal(suite.T(), errors.New("mail: missing '@' or angle-addr"), err)
}
err = util.ValidateEmail(tooLong)
if assert.Error(suite.T(), err) {
assert.Equal(suite.T(), fmt.Errorf("email address should be no more than 256 chars but '%s' was 286", tooLong), err)
}
err = util.ValidateEmail(emailAddress)
if assert.NoError(suite.T(), err) {
assert.Equal(suite.T(), nil, err)