mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 21:02:26 -05:00
[bugfix] Extend parser to handle more non-Latin hashtags (#3700)
* Allow marks after NFC normalization Includes regression test for the Tamil example from #3618 * Disallow just numbers + marks + underscore as hashtag
This commit is contained in:
parent
ab758cc233
commit
b9e0689359
5 changed files with 48 additions and 37 deletions
|
|
@ -50,17 +50,16 @@ func NormalizeHashtag(text string) (string, bool) {
|
|||
|
||||
// Validate normalized result.
|
||||
var (
|
||||
notJustUnderscores = false
|
||||
onlyPermittedChars = true
|
||||
lengthOK = true
|
||||
atLeastOneRequiredChar = false
|
||||
onlyPermittedChars = true
|
||||
lengthOK = true
|
||||
)
|
||||
|
||||
for i, r := range normalized {
|
||||
if r != '_' {
|
||||
// This isn't an underscore,
|
||||
// so the whole hashtag isn't
|
||||
// just underscores.
|
||||
notJustUnderscores = true
|
||||
if !isPermittedIfNotEntireHashtag(r) {
|
||||
// This isn't an underscore, mark, etc,
|
||||
// so the hashtag contains at least one
|
||||
atLeastOneRequiredChar = true
|
||||
}
|
||||
|
||||
if i >= maximumHashtagLength {
|
||||
|
|
@ -74,5 +73,5 @@ func NormalizeHashtag(text string) (string, bool) {
|
|||
}
|
||||
}
|
||||
|
||||
return normalized, (lengthOK && onlyPermittedChars && notJustUnderscores)
|
||||
return normalized, lengthOK && onlyPermittedChars && atLeastOneRequiredChar
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue