From ffba8211a8cd91571a120a65001d62906ab59d2d Mon Sep 17 00:00:00 2001 From: tobi Date: Sun, 8 Dec 2024 15:35:02 +0100 Subject: [PATCH] [feature] Allow partial-word hashtags using non-breaking spaces --- internal/text/plain_test.go | 13 +++++++++++++ internal/text/util.go | 30 ++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 48280bb44..fac54a38e 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -36,6 +36,8 @@ const ( moreComplexExpected = "

Another test @foss_satan

#Hashtag

Text

:rainbow:

" withUTF8Link = "here's a link with utf-8 characters in it: https://example.org/söme_url" withUTF8LinkExpected = "

here's a link with utf-8 characters in it: https://example.org/söme_url

" + withFunkyTags = "#hashtag1 pee #hashtag2\u200Bpee #hashtag3|poo #hashtag4\uFEFFpoo" + withFunkyTagsExpected = "

#hashtag1 pee #hashtag2\u200bpee #hashtag3|poo #hashtag4\ufeffpoo

" ) type PlainTestSuite struct { @@ -136,6 +138,17 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() { suite.Equal("올빼미", tags[0].Name) } +func (suite *PlainTestSuite) TestFunkyTags() { + formatted := suite.FromPlain(withFunkyTags) + suite.Equal(withFunkyTagsExpected, formatted.HTML) + + tags := formatted.Tags + suite.Equal("hashtag1", tags[0].Name) + suite.Equal("hashtag2", tags[1].Name) + suite.Equal("hashtag3", tags[2].Name) + suite.Equal("hashtag4", tags[3].Name) +} + func (suite *PlainTestSuite) TestDeriveMultiple() { statusText := `Another test @foss_satan@fossbros-anonymous.io diff --git a/internal/text/util.go b/internal/text/util.go index 204c64838..af45cfaf0 100644 --- a/internal/text/util.go +++ b/internal/text/util.go @@ -38,8 +38,34 @@ func isPermittedInHashtag(r rune) bool { // is a recognized break character for before // or after a #hashtag. func isHashtagBoundary(r rune) bool { - return unicode.IsSpace(r) || - (unicode.IsPunct(r) && r != '_') + switch { + + // Zero width space. + case r == '\u200B': + return true + + // Zero width no-break space. + case r == '\uFEFF': + return true + + // Pipe character sometimes + // used as workaround. + case r == '|': + return true + + // Standard Unicode white space. + case unicode.IsSpace(r): + return true + + // Non-underscore punctuation. + case unicode.IsPunct(r) && r != '_': + return true + + // Not recognized + // hashtag boundary. + default: + return false + } } // isMentionBoundary returns true if rune r