more work on parsing statuses

This commit is contained in:
tsmethurst 2021-05-12 17:31:48 +02:00
commit 884d0ecc8f
9 changed files with 251 additions and 32 deletions

View file

@ -30,10 +30,22 @@ type Mention struct {
CreatedAt time.Time `pg:"type:timestamp,notnull,default:now()"` CreatedAt time.Time `pg:"type:timestamp,notnull,default:now()"`
// When was this mention last updated? // When was this mention last updated?
UpdatedAt time.Time `pg:"type:timestamp,notnull,default:now()"` UpdatedAt time.Time `pg:"type:timestamp,notnull,default:now()"`
// Who created this mention? // What's the internal account ID of the originator of the mention?
OriginAccountID string `pg:",notnull"` OriginAccountID string `pg:",notnull"`
// Who does this mention target? // What's the AP URI of the originator of the mention?
OriginAccountURI string `pg:",notnull"`
// What's the internal account ID of the mention target?
TargetAccountID string `pg:",notnull"` TargetAccountID string `pg:",notnull"`
// Prevent this mention from generating a notification? // Prevent this mention from generating a notification?
Silent bool Silent bool
// NameString is for putting in the namestring of the mentioned user
// before the mention is dereferenced. Should be in a form along the lines of:
// @whatever_username@example.org
//
// This will not be put in the database, it's just for convenience.
NameString string `pg:"-"`
// Href is the web URL (not AP uri!) of the user mentioned.
//
// This will not be put in the database, it's just for convenience.
Href string `pg:"-"`
} }

View file

@ -179,7 +179,7 @@ func (p *processor) processLanguage(form *apimodel.AdvancedStatusCreateForm, acc
func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
menchies := []string{} menchies := []string{}
gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentions(form.Status), accountID, status.ID) gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentionsFromStatus(form.Status), accountID, status.ID)
if err != nil { if err != nil {
return fmt.Errorf("error generating mentions from status: %s", err) return fmt.Errorf("error generating mentions from status: %s", err)
} }
@ -198,7 +198,7 @@ func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, acc
func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
tags := []string{} tags := []string{}
gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtags(form.Status), accountID, status.ID) gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtagsFromStatus(form.Status), accountID, status.ID)
if err != nil { if err != nil {
return fmt.Errorf("error generating hashtags from status: %s", err) return fmt.Errorf("error generating hashtags from status: %s", err)
} }
@ -217,7 +217,7 @@ func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, account
func (p *processor) processEmojis(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { func (p *processor) processEmojis(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {
emojis := []string{} emojis := []string{}
gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojis(form.Status), accountID, status.ID) gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojisFromStatus(form.Status), accountID, status.ID)
if err != nil { if err != nil {
return fmt.Errorf("error generating emojis from status: %s", err) return fmt.Errorf("error generating emojis from status: %s", err)
} }

View file

@ -30,6 +30,7 @@ import (
"github.com/go-fed/activity/pub" "github.com/go-fed/activity/pub"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/util"
) )
func extractPreferredUsername(i withPreferredUsername) (string, error) { func extractPreferredUsername(i withPreferredUsername) (string, error) {
@ -184,12 +185,12 @@ func extractImageURL(i withImage) (*url.URL, error) {
// here in order to find the first one that meets these criteria: // here in order to find the first one that meets these criteria:
// 1. is an image // 1. is an image
// 2. has a URL so we can grab it // 2. has a URL so we can grab it
for imageIter := imageProp.Begin(); imageIter != imageProp.End(); imageIter = imageIter.Next() { for iter := imageProp.Begin(); iter != imageProp.End(); iter = iter.Next() {
// 1. is an image // 1. is an image
if !imageIter.IsActivityStreamsImage() { if !iter.IsActivityStreamsImage() {
continue continue
} }
imageValue := imageIter.GetActivityStreamsImage() imageValue := iter.GetActivityStreamsImage()
if imageValue == nil { if imageValue == nil {
continue continue
} }
@ -210,9 +211,9 @@ func extractSummary(i withSummary) (string, error) {
return "", errors.New("summary property was nil") return "", errors.New("summary property was nil")
} }
for summaryIter := summaryProp.Begin(); summaryIter != summaryProp.End(); summaryIter = summaryIter.Next() { for iter := summaryProp.Begin(); iter != summaryProp.End(); iter = iter.Next() {
if summaryIter.IsXMLSchemaString() && summaryIter.GetXMLSchemaString() != "" { if iter.IsXMLSchemaString() && iter.GetXMLSchemaString() != "" {
return summaryIter.GetXMLSchemaString(), nil return iter.GetXMLSchemaString(), nil
} }
} }
@ -232,9 +233,9 @@ func extractURL(i withURL) (*url.URL, error) {
return nil, errors.New("url property was nil") return nil, errors.New("url property was nil")
} }
for urlIter := urlProp.Begin(); urlIter != urlProp.End(); urlIter = urlIter.Next() { for iter := urlProp.Begin(); iter != urlProp.End(); iter = iter.Next() {
if urlIter.IsIRI() && urlIter.GetIRI() != nil { if iter.IsIRI() && iter.GetIRI() != nil {
return urlIter.GetIRI(), nil return iter.GetIRI(), nil
} }
} }
@ -247,8 +248,8 @@ func extractPublicKeyForOwner(i withPublicKey, forOwner *url.URL) (*rsa.PublicKe
return nil, nil, errors.New("public key property was nil") return nil, nil, errors.New("public key property was nil")
} }
for publicKeyIter := publicKeyProp.Begin(); publicKeyIter != publicKeyProp.End(); publicKeyIter = publicKeyIter.Next() { for iter := publicKeyProp.Begin(); iter != publicKeyProp.End(); iter = iter.Next() {
pkey := publicKeyIter.Get() pkey := iter.Get()
if pkey == nil { if pkey == nil {
continue continue
} }
@ -449,7 +450,79 @@ func extractEmoji(i Emojiable) (*gtsmodel.Emoji, error) {
if idProp == nil || !idProp.IsIRI() { if idProp == nil || !idProp.IsIRI() {
return nil, errors.New("no id for emoji") return nil, errors.New("no id for emoji")
} }
emoji.URI = idProp.GetIRI().String() uri := idProp.GetIRI()
emoji.URI = uri.String()
emoji.Domain = uri.Host
name, err := extractName(i)
if err != nil {
return nil, err
}
emoji.Shortcode = strings.Trim(name, ":")
if i.GetActivityStreamsIcon() == nil {
return nil, errors.New("no icon for emoji")
}
imageURL, err := extractIconURL(i)
if err != nil {
return nil, errors.New("no url for emoji image")
}
emoji.ImageRemoteURL = imageURL.String()
return emoji, nil return emoji, nil
} }
func extractMentions(i withTag) ([]*gtsmodel.Mention, error) {
mentions := []*gtsmodel.Mention{}
tagsProp := i.GetActivityStreamsTag()
for iter := tagsProp.Begin(); iter != tagsProp.End(); iter = iter.Next() {
t := iter.GetType()
if t == nil {
continue
}
if t.GetTypeName() != "Mention" {
continue
}
mentionable, ok := t.(Mentionable)
if !ok {
continue
}
mention, err := extractMention(mentionable)
if err != nil {
continue
}
mentions = append(mentions, mention)
}
return mentions, nil
}
func extractMention(i Mentionable) (*gtsmodel.Mention, error) {
mention := &gtsmodel.Mention{}
mentionString, err := extractName(i)
if err != nil {
return nil, err
}
// just make sure the mention string is valid so we can handle it properly later on...
username, domain, err := util.ExtractMentionParts(mentionString)
if err != nil {
return nil, err
}
if username == "" || domain == "" {
return nil, errors.New("username or domain was empty")
}
// the href prop should be the URL of a user we know, eg https://example.org/@whatever_user
hrefProp := i.GetActivityStreamsHref()
if hrefProp == nil || !hrefProp.IsIRI() {
return nil, errors.New("no href prop")
}
mention.Href = hrefProp.GetIRI().String()
return mention, nil
}

View file

@ -73,13 +73,14 @@ type Attachmentable interface {
withFocalPoint withFocalPoint
} }
// Hashtaggable represents the minimum activitypub interface for representing a 'hashtag'. // Hashtaggable represents the minimum activitypub interface for representing a 'hashtag' tag.
type Hashtaggable interface { type Hashtaggable interface {
withTypeName withTypeName
withHref withHref
withName withName
} }
// Emojiable represents the minimum interface for an 'emoji' tag.
type Emojiable interface { type Emojiable interface {
withJSONLDId withJSONLDId
withTypeName withTypeName
@ -88,6 +89,12 @@ type Emojiable interface {
withIcon withIcon
} }
// Mentionable represents the minimum interface for a 'mention' tag.
type Mentionable interface {
withName
withHref
}
type withJSONLDId interface { type withJSONLDId interface {
GetJSONLDId() vocab.JSONLDIdProperty GetJSONLDId() vocab.JSONLDIdProperty
} }

View file

@ -185,9 +185,15 @@ func (c *converter) ASStatusToStatus(statusable Statusable) (*gtsmodel.Status, e
status.GTSTags = hashtags status.GTSTags = hashtags
} }
// emojis, err := extractEmojis(statusable) emojis, err := extractEmojis(statusable)
if err == nil {
status.GTSEmojis = emojis
}
// mentions, err := extractMentions(statusable) mentions, err := extractMentions(statusable)
if err == nil {
status.GTSMentions = mentions
}
cw, err := extractSummary(statusable) cw, err := extractSummary(statusable)
if err == nil && cw != "" { if err == nil && cw != "" {

View file

@ -37,7 +37,74 @@ type ASToInternalTestSuite struct {
} }
const ( const (
statusAsActivityJson = `{ statusWithMentionsActivityJson = `{
"@context": [
"https://www.w3.org/ns/activitystreams",
{
"ostatus": "http://ostatus.org#",
"atomUri": "ostatus:atomUri",
"inReplyToAtomUri": "ostatus:inReplyToAtomUri",
"conversation": "ostatus:conversation",
"sensitive": "as:sensitive",
"toot": "http://joinmastodon.org/ns#",
"votersCount": "toot:votersCount"
}
],
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/activity",
"type": "Create",
"actor": "https://ondergrond.org/users/dumpsterqueer",
"published": "2021-05-12T09:58:38Z",
"to": [
"https://ondergrond.org/users/dumpsterqueer/followers"
],
"cc": [
"https://www.w3.org/ns/activitystreams#Public",
"https://social.pixie.town/users/f0x"
],
"object": {
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552",
"type": "Note",
"summary": null,
"inReplyTo": "https://social.pixie.town/users/f0x/statuses/106221628567855262",
"published": "2021-05-12T09:58:38Z",
"url": "https://ondergrond.org/@dumpsterqueer/106221634728637552",
"attributedTo": "https://ondergrond.org/users/dumpsterqueer",
"to": [
"https://ondergrond.org/users/dumpsterqueer/followers"
],
"cc": [
"https://www.w3.org/ns/activitystreams#Public",
"https://social.pixie.town/users/f0x"
],
"sensitive": false,
"atomUri": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552",
"inReplyToAtomUri": "https://social.pixie.town/users/f0x/statuses/106221628567855262",
"conversation": "tag:ondergrond.org,2021-05-12:objectId=1132361:objectType=Conversation",
"content": "<p><span class=\"h-card\"><a href=\"https://social.pixie.town/@f0x\" class=\"u-url mention\">@<span>f0x</span></a></span> nice there it is:</p><p><a href=\"https://social.pixie.town/users/f0x/statuses/106221628567855262/activity\" rel=\"nofollow noopener noreferrer\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">social.pixie.town/users/f0x/st</span><span class=\"invisible\">atuses/106221628567855262/activity</span></a></p>",
"contentMap": {
"en": "<p><span class=\"h-card\"><a href=\"https://social.pixie.town/@f0x\" class=\"u-url mention\">@<span>f0x</span></a></span> nice there it is:</p><p><a href=\"https://social.pixie.town/users/f0x/statuses/106221628567855262/activity\" rel=\"nofollow noopener noreferrer\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">social.pixie.town/users/f0x/st</span><span class=\"invisible\">atuses/106221628567855262/activity</span></a></p>"
},
"attachment": [],
"tag": [
{
"type": "Mention",
"href": "https://social.pixie.town/users/f0x",
"name": "@f0x@pixie.town"
}
],
"replies": {
"id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies",
"type": "Collection",
"first": {
"type": "CollectionPage",
"next": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies?only_other_accounts=true&page=true",
"partOf": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies",
"items": []
}
}
}
}`
statusWithEmojisAndTagsAsActivityJson = `{
"@context": [ "@context": [
"https://www.w3.org/ns/activitystreams", "https://www.w3.org/ns/activitystreams",
{ {
@ -309,7 +376,34 @@ func (suite *ASToInternalTestSuite) TestParseGargron() {
func (suite *ASToInternalTestSuite) TestParseStatus() { func (suite *ASToInternalTestSuite) TestParseStatus() {
m := make(map[string]interface{}) m := make(map[string]interface{})
err := json.Unmarshal([]byte(statusAsActivityJson), &m) err := json.Unmarshal([]byte(statusWithEmojisAndTagsAsActivityJson), &m)
assert.NoError(suite.T(), err)
t, err := streams.ToType(context.Background(), m)
assert.NoError(suite.T(), err)
create, ok := t.(vocab.ActivityStreamsCreate)
assert.True(suite.T(), ok)
obj := create.GetActivityStreamsObject()
assert.NotNil(suite.T(), obj)
first := obj.Begin()
assert.NotNil(suite.T(), first)
rep, ok := first.GetType().(typeutils.Statusable)
assert.True(suite.T(), ok)
status, err := suite.typeconverter.ASStatusToStatus(rep)
assert.NoError(suite.T(), err)
assert.Len(suite.T(), status.GTSEmojis, 3)
// assert.Len(suite.T(), status.GTSTags, 2) TODO: implement this first so that it can pick up tags
}
func (suite *ASToInternalTestSuite) TestParseStatusWithMention() {
m := make(map[string]interface{})
err := json.Unmarshal([]byte(statusWithMentionsActivityJson), &m)
assert.NoError(suite.T(), err) assert.NoError(suite.T(), err)
t, err := streams.ToType(context.Background(), m) t, err := streams.ToType(context.Background(), m)
@ -331,6 +425,9 @@ func (suite *ASToInternalTestSuite) TestParseStatus() {
assert.NoError(suite.T(), err) assert.NoError(suite.T(), err)
fmt.Printf("%+v", status) fmt.Printf("%+v", status)
assert.Len(suite.T(), status.GTSMentions, 1)
fmt.Println(status.GTSMentions[0])
} }
func (suite *ASToInternalTestSuite) TearDownTest() { func (suite *ASToInternalTestSuite) TearDownTest() {

View file

@ -35,6 +35,9 @@ const (
) )
var ( var (
mentionNameRegexString = `@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)`
mentionNameRegex = regexp.MustCompile(fmt.Sprintf(`^%s$`, mentionNameRegexString))
// mention regex can be played around with here: https://regex101.com/r/qwM9D3/1 // mention regex can be played around with here: https://regex101.com/r/qwM9D3/1
mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)` mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)`
mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString) mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString)

View file

@ -19,17 +19,18 @@
package util package util
import ( import (
"fmt"
"strings" "strings"
) )
// DeriveMentions takes a plaintext (ie., not html-formatted) status, // DeriveMentionsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of accounts // and applies a regex to it to return a deduplicated list of accounts
// mentioned in that status. // mentioned in that status.
// //
// It will look for fully-qualified account names in the form "@user@example.org". // It will look for fully-qualified account names in the form "@user@example.org".
// or the form "@username" for local users. // or the form "@username" for local users.
// The case of the returned mentions will be lowered, for consistency. // The case of the returned mentions will be lowered, for consistency.
func DeriveMentions(status string) []string { func DeriveMentionsFromStatus(status string) []string {
mentionedAccounts := []string{} mentionedAccounts := []string{}
for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) { for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) {
mentionedAccounts = append(mentionedAccounts, m[1]) mentionedAccounts = append(mentionedAccounts, m[1])
@ -37,11 +38,11 @@ func DeriveMentions(status string) []string {
return lower(unique(mentionedAccounts)) return lower(unique(mentionedAccounts))
} }
// DeriveHashtags takes a plaintext (ie., not html-formatted) status, // DeriveHashtagsFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of hashtags // and applies a regex to it to return a deduplicated list of hashtags
// used in that status, without the leading #. The case of the returned // used in that status, without the leading #. The case of the returned
// tags will be lowered, for consistency. // tags will be lowered, for consistency.
func DeriveHashtags(status string) []string { func DeriveHashtagsFromStatus(status string) []string {
tags := []string{} tags := []string{}
for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) { for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) {
tags = append(tags, m[1]) tags = append(tags, m[1])
@ -49,11 +50,11 @@ func DeriveHashtags(status string) []string {
return lower(unique(tags)) return lower(unique(tags))
} }
// DeriveEmojis takes a plaintext (ie., not html-formatted) status, // DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of emojis // and applies a regex to it to return a deduplicated list of emojis
// used in that status, without the surround ::. The case of the returned // used in that status, without the surround ::. The case of the returned
// emojis will be lowered, for consistency. // emojis will be lowered, for consistency.
func DeriveEmojis(status string) []string { func DeriveEmojisFromStatus(status string) []string {
emojis := []string{} emojis := []string{}
for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) { for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) {
emojis = append(emojis, m[1]) emojis = append(emojis, m[1])
@ -61,6 +62,26 @@ func DeriveEmojis(status string) []string {
return lower(unique(emojis)) return lower(unique(emojis))
} }
// ExtractMentionParts extracts the username @test_user and the domain @example.org
// from a mention string like @test_user@example.org.
//
// If no domain is provided, it will return just the username part.
//
// If nothing is matched, it will return an error.
func ExtractMentionParts(mention string) (username, domain string, err error) {
matches := mentionNameRegex.FindStringSubmatch(mention)
if matches == nil {
err = fmt.Errorf("could't match mention %s", mention)
return
}
fmt.Println(matches)
username = matches[1]
if len(matches) == 2 {
domain = matches[2]
}
return
}
// unique returns a deduplicated version of a given string slice. // unique returns a deduplicated version of a given string slice.
func unique(s []string) []string { func unique(s []string) []string {
keys := make(map[string]bool) keys := make(map[string]bool)

View file

@ -42,7 +42,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
here is a duplicate mention: @hello@test.lgbt here is a duplicate mention: @hello@test.lgbt
` `
menchies := util.DeriveMentions(statusText) menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 4) assert.Len(suite.T(), menchies, 4)
assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
@ -52,7 +52,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
statusText := `` statusText := ``
menchies := util.DeriveMentions(statusText) menchies := util.DeriveMentionsFromStatus(statusText)
assert.Len(suite.T(), menchies, 0) assert.Len(suite.T(), menchies, 0)
} }
@ -67,7 +67,7 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
#111111 thisalsoshouldn'twork#### ##` #111111 thisalsoshouldn'twork#### ##`
tags := util.DeriveHashtags(statusText) tags := util.DeriveHashtagsFromStatus(statusText)
assert.Len(suite.T(), tags, 5) assert.Len(suite.T(), tags, 5)
assert.Equal(suite.T(), "testing123", tags[0]) assert.Equal(suite.T(), "testing123", tags[0])
assert.Equal(suite.T(), "also", tags[1]) assert.Equal(suite.T(), "also", tags[1])
@ -90,7 +90,7 @@ Here's some normal text with an :emoji: at the end
:underscores_ok_too: :underscores_ok_too:
` `
tags := util.DeriveEmojis(statusText) tags := util.DeriveEmojisFromStatus(statusText)
assert.Len(suite.T(), tags, 7) assert.Len(suite.T(), tags, 7)
assert.Equal(suite.T(), "test", tags[0]) assert.Equal(suite.T(), "test", tags[0])
assert.Equal(suite.T(), "another", tags[1]) assert.Equal(suite.T(), "another", tags[1])