Disallow just numbers + marks + underscore as hashtag

This commit is contained in:
Vyr Cossont 2025-01-30 15:15:54 -08:00
commit 636a4d9ad0
2 changed files with 22 additions and 18 deletions

View file

@ -118,20 +118,20 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
` `
tags := suite.FromPlain(statusText).Tags tags := suite.FromPlain(statusText).Tags
suite.Len(tags, 13) if suite.Len(tags, 12) {
suite.Equal("testing123", tags[0].Name) suite.Equal("testing123", tags[0].Name)
suite.Equal("also", tags[1].Name) suite.Equal("also", tags[1].Name)
suite.Equal("thisshouldwork", tags[2].Name) suite.Equal("thisshouldwork", tags[2].Name)
suite.Equal("dupe", tags[3].Name) suite.Equal("dupe", tags[3].Name)
suite.Equal("ThisShouldAlsoWork", tags[4].Name) suite.Equal("ThisShouldAlsoWork", tags[4].Name)
suite.Equal("this_should_not_be_split", tags[5].Name) suite.Equal("this_should_not_be_split", tags[5].Name)
suite.Equal("111111", tags[6].Name) suite.Equal("alimentación", tags[6].Name)
suite.Equal("alimentación", tags[7].Name) suite.Equal("saúde", tags[7].Name)
suite.Equal("saúde", tags[8].Name) suite.Equal("lävistää", tags[8].Name)
suite.Equal("lävistää", tags[9].Name) suite.Equal("ö", tags[9].Name)
suite.Equal("ö", tags[10].Name) suite.Equal("", tags[10].Name)
suite.Equal("", tags[11].Name) suite.Equal("ThisOneIsThirteyCharactersLong", tags[11].Name)
suite.Equal("ThisOneIsThirteyCharactersLong", tags[12].Name) }
statusText = `#올빼미 hej` statusText = `#올빼미 hej`
tags = suite.FromPlain(statusText).Tags tags = suite.FromPlain(statusText).Tags
@ -177,6 +177,12 @@ func (suite *PlainTestSuite) TestZalgoHashtag() {
} }
} }
func (suite *PlainTestSuite) TestNumbersAreNotHashtags() {
statusText := `yo who else thinks #19_98 is #1?`
f := suite.FromPlain(statusText)
suite.Len(f.Tags, 0)
}
func TestPlainTestSuite(t *testing.T) { func TestPlainTestSuite(t *testing.T) {
suite.Run(t, new(PlainTestSuite)) suite.Run(t, new(PlainTestSuite))
} }

View file

@ -20,15 +20,13 @@ package text
import "unicode" import "unicode"
func isPermittedInHashtag(r rune) bool { func isPermittedInHashtag(r rune) bool {
return unicode.IsLetter(r) || return unicode.IsLetter(r) || isPermittedIfNotEntireHashtag(r)
unicode.IsNumber(r) ||
isPermittedIfNotEntireHashtag(r)
} }
// isPermittedIfNotEntireHashtag is true for characters that may be in a hashtag // isPermittedIfNotEntireHashtag is true for characters that may be in a hashtag
// but are not allowed to be the only characters making up the hashtag. // but are not allowed to be the only characters making up the hashtag.
func isPermittedIfNotEntireHashtag(r rune) bool { func isPermittedIfNotEntireHashtag(r rune) bool {
return unicode.IsMark(r) || r == '_' return unicode.IsNumber(r) || unicode.IsMark(r) || r == '_'
} }
// isHashtagBoundary returns true if rune r // isHashtagBoundary returns true if rune r