From 884d0ecc8fb2ad551c7bf60a84771732c58f1909 Mon Sep 17 00:00:00 2001 From: tsmethurst Date: Wed, 12 May 2021 17:31:48 +0200 Subject: [PATCH] more work on parsing statuses --- internal/gtsmodel/mention.go | 16 +++- internal/message/processorutil.go | 6 +- internal/typeutils/asextractionutil.go | 97 ++++++++++++++++++++--- internal/typeutils/asinterfaces.go | 9 ++- internal/typeutils/astointernal.go | 10 ++- internal/typeutils/astointernal_test.go | 101 +++++++++++++++++++++++- internal/util/regexes.go | 3 + internal/util/statustools.go | 33 ++++++-- internal/util/statustools_test.go | 8 +- 9 files changed, 251 insertions(+), 32 deletions(-) diff --git a/internal/gtsmodel/mention.go b/internal/gtsmodel/mention.go index 18eb11082..c8e94158e 100644 --- a/internal/gtsmodel/mention.go +++ b/internal/gtsmodel/mention.go @@ -30,10 +30,22 @@ type Mention struct { CreatedAt time.Time `pg:"type:timestamp,notnull,default:now()"` // When was this mention last updated? UpdatedAt time.Time `pg:"type:timestamp,notnull,default:now()"` - // Who created this mention? + // What's the internal account ID of the originator of the mention? OriginAccountID string `pg:",notnull"` - // Who does this mention target? + // What's the AP URI of the originator of the mention? + OriginAccountURI string `pg:",notnull"` + // What's the internal account ID of the mention target? TargetAccountID string `pg:",notnull"` // Prevent this mention from generating a notification? Silent bool + // NameString is for putting in the namestring of the mentioned user + // before the mention is dereferenced. Should be in a form along the lines of: + // @whatever_username@example.org + // + // This will not be put in the database, it's just for convenience. + NameString string `pg:"-"` + // Href is the web URL (not AP uri!) of the user mentioned. + // + // This will not be put in the database, it's just for convenience. + Href string `pg:"-"` } diff --git a/internal/message/processorutil.go b/internal/message/processorutil.go index c928eec1a..233a18ad8 100644 --- a/internal/message/processorutil.go +++ b/internal/message/processorutil.go @@ -179,7 +179,7 @@ func (p *processor) processLanguage(form *apimodel.AdvancedStatusCreateForm, acc func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { menchies := []string{} - gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentions(form.Status), accountID, status.ID) + gtsMenchies, err := p.db.MentionStringsToMentions(util.DeriveMentionsFromStatus(form.Status), accountID, status.ID) if err != nil { return fmt.Errorf("error generating mentions from status: %s", err) } @@ -198,7 +198,7 @@ func (p *processor) processMentions(form *apimodel.AdvancedStatusCreateForm, acc func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { tags := []string{} - gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtags(form.Status), accountID, status.ID) + gtsTags, err := p.db.TagStringsToTags(util.DeriveHashtagsFromStatus(form.Status), accountID, status.ID) if err != nil { return fmt.Errorf("error generating hashtags from status: %s", err) } @@ -217,7 +217,7 @@ func (p *processor) processTags(form *apimodel.AdvancedStatusCreateForm, account func (p *processor) processEmojis(form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { emojis := []string{} - gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojis(form.Status), accountID, status.ID) + gtsEmojis, err := p.db.EmojiStringsToEmojis(util.DeriveEmojisFromStatus(form.Status), accountID, status.ID) if err != nil { return fmt.Errorf("error generating emojis from status: %s", err) } diff --git a/internal/typeutils/asextractionutil.go b/internal/typeutils/asextractionutil.go index e692af85a..82628c434 100644 --- a/internal/typeutils/asextractionutil.go +++ b/internal/typeutils/asextractionutil.go @@ -30,6 +30,7 @@ import ( "github.com/go-fed/activity/pub" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/util" ) func extractPreferredUsername(i withPreferredUsername) (string, error) { @@ -184,12 +185,12 @@ func extractImageURL(i withImage) (*url.URL, error) { // here in order to find the first one that meets these criteria: // 1. is an image // 2. has a URL so we can grab it - for imageIter := imageProp.Begin(); imageIter != imageProp.End(); imageIter = imageIter.Next() { + for iter := imageProp.Begin(); iter != imageProp.End(); iter = iter.Next() { // 1. is an image - if !imageIter.IsActivityStreamsImage() { + if !iter.IsActivityStreamsImage() { continue } - imageValue := imageIter.GetActivityStreamsImage() + imageValue := iter.GetActivityStreamsImage() if imageValue == nil { continue } @@ -210,9 +211,9 @@ func extractSummary(i withSummary) (string, error) { return "", errors.New("summary property was nil") } - for summaryIter := summaryProp.Begin(); summaryIter != summaryProp.End(); summaryIter = summaryIter.Next() { - if summaryIter.IsXMLSchemaString() && summaryIter.GetXMLSchemaString() != "" { - return summaryIter.GetXMLSchemaString(), nil + for iter := summaryProp.Begin(); iter != summaryProp.End(); iter = iter.Next() { + if iter.IsXMLSchemaString() && iter.GetXMLSchemaString() != "" { + return iter.GetXMLSchemaString(), nil } } @@ -232,9 +233,9 @@ func extractURL(i withURL) (*url.URL, error) { return nil, errors.New("url property was nil") } - for urlIter := urlProp.Begin(); urlIter != urlProp.End(); urlIter = urlIter.Next() { - if urlIter.IsIRI() && urlIter.GetIRI() != nil { - return urlIter.GetIRI(), nil + for iter := urlProp.Begin(); iter != urlProp.End(); iter = iter.Next() { + if iter.IsIRI() && iter.GetIRI() != nil { + return iter.GetIRI(), nil } } @@ -247,8 +248,8 @@ func extractPublicKeyForOwner(i withPublicKey, forOwner *url.URL) (*rsa.PublicKe return nil, nil, errors.New("public key property was nil") } - for publicKeyIter := publicKeyProp.Begin(); publicKeyIter != publicKeyProp.End(); publicKeyIter = publicKeyIter.Next() { - pkey := publicKeyIter.Get() + for iter := publicKeyProp.Begin(); iter != publicKeyProp.End(); iter = iter.Next() { + pkey := iter.Get() if pkey == nil { continue } @@ -449,7 +450,79 @@ func extractEmoji(i Emojiable) (*gtsmodel.Emoji, error) { if idProp == nil || !idProp.IsIRI() { return nil, errors.New("no id for emoji") } - emoji.URI = idProp.GetIRI().String() + uri := idProp.GetIRI() + emoji.URI = uri.String() + emoji.Domain = uri.Host + + name, err := extractName(i) + if err != nil { + return nil, err + } + emoji.Shortcode = strings.Trim(name, ":") + + if i.GetActivityStreamsIcon() == nil { + return nil, errors.New("no icon for emoji") + } + imageURL, err := extractIconURL(i) + if err != nil { + return nil, errors.New("no url for emoji image") + } + emoji.ImageRemoteURL = imageURL.String() return emoji, nil } + +func extractMentions(i withTag) ([]*gtsmodel.Mention, error) { + mentions := []*gtsmodel.Mention{} + tagsProp := i.GetActivityStreamsTag() + for iter := tagsProp.Begin(); iter != tagsProp.End(); iter = iter.Next() { + t := iter.GetType() + if t == nil { + continue + } + + if t.GetTypeName() != "Mention" { + continue + } + + mentionable, ok := t.(Mentionable) + if !ok { + continue + } + + mention, err := extractMention(mentionable) + if err != nil { + continue + } + + mentions = append(mentions, mention) + } + return mentions, nil +} + +func extractMention(i Mentionable) (*gtsmodel.Mention, error) { + mention := >smodel.Mention{} + + mentionString, err := extractName(i) + if err != nil { + return nil, err + } + + // just make sure the mention string is valid so we can handle it properly later on... + username, domain, err := util.ExtractMentionParts(mentionString) + if err != nil { + return nil, err + } + if username == "" || domain == "" { + return nil, errors.New("username or domain was empty") + } + + // the href prop should be the URL of a user we know, eg https://example.org/@whatever_user + hrefProp := i.GetActivityStreamsHref() + if hrefProp == nil || !hrefProp.IsIRI() { + return nil, errors.New("no href prop") + } + mention.Href = hrefProp.GetIRI().String() + + return mention, nil +} diff --git a/internal/typeutils/asinterfaces.go b/internal/typeutils/asinterfaces.go index be888249d..fe2306c5b 100644 --- a/internal/typeutils/asinterfaces.go +++ b/internal/typeutils/asinterfaces.go @@ -73,13 +73,14 @@ type Attachmentable interface { withFocalPoint } -// Hashtaggable represents the minimum activitypub interface for representing a 'hashtag'. +// Hashtaggable represents the minimum activitypub interface for representing a 'hashtag' tag. type Hashtaggable interface { withTypeName withHref withName } +// Emojiable represents the minimum interface for an 'emoji' tag. type Emojiable interface { withJSONLDId withTypeName @@ -88,6 +89,12 @@ type Emojiable interface { withIcon } +// Mentionable represents the minimum interface for a 'mention' tag. +type Mentionable interface { + withName + withHref +} + type withJSONLDId interface { GetJSONLDId() vocab.JSONLDIdProperty } diff --git a/internal/typeutils/astointernal.go b/internal/typeutils/astointernal.go index 6a47548c0..afaddf964 100644 --- a/internal/typeutils/astointernal.go +++ b/internal/typeutils/astointernal.go @@ -185,9 +185,15 @@ func (c *converter) ASStatusToStatus(statusable Statusable) (*gtsmodel.Status, e status.GTSTags = hashtags } - // emojis, err := extractEmojis(statusable) + emojis, err := extractEmojis(statusable) + if err == nil { + status.GTSEmojis = emojis + } - // mentions, err := extractMentions(statusable) + mentions, err := extractMentions(statusable) + if err == nil { + status.GTSMentions = mentions + } cw, err := extractSummary(statusable) if err == nil && cw != "" { diff --git a/internal/typeutils/astointernal_test.go b/internal/typeutils/astointernal_test.go index 813ac0a3d..f1287e027 100644 --- a/internal/typeutils/astointernal_test.go +++ b/internal/typeutils/astointernal_test.go @@ -37,7 +37,74 @@ type ASToInternalTestSuite struct { } const ( - statusAsActivityJson = `{ + statusWithMentionsActivityJson = `{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + { + "ostatus": "http://ostatus.org#", + "atomUri": "ostatus:atomUri", + "inReplyToAtomUri": "ostatus:inReplyToAtomUri", + "conversation": "ostatus:conversation", + "sensitive": "as:sensitive", + "toot": "http://joinmastodon.org/ns#", + "votersCount": "toot:votersCount" + } + ], + "id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/activity", + "type": "Create", + "actor": "https://ondergrond.org/users/dumpsterqueer", + "published": "2021-05-12T09:58:38Z", + "to": [ + "https://ondergrond.org/users/dumpsterqueer/followers" + ], + "cc": [ + "https://www.w3.org/ns/activitystreams#Public", + "https://social.pixie.town/users/f0x" + ], + "object": { + "id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552", + "type": "Note", + "summary": null, + "inReplyTo": "https://social.pixie.town/users/f0x/statuses/106221628567855262", + "published": "2021-05-12T09:58:38Z", + "url": "https://ondergrond.org/@dumpsterqueer/106221634728637552", + "attributedTo": "https://ondergrond.org/users/dumpsterqueer", + "to": [ + "https://ondergrond.org/users/dumpsterqueer/followers" + ], + "cc": [ + "https://www.w3.org/ns/activitystreams#Public", + "https://social.pixie.town/users/f0x" + ], + "sensitive": false, + "atomUri": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552", + "inReplyToAtomUri": "https://social.pixie.town/users/f0x/statuses/106221628567855262", + "conversation": "tag:ondergrond.org,2021-05-12:objectId=1132361:objectType=Conversation", + "content": "

@f0x nice there it is:

https://social.pixie.town/users/f0x/statuses/106221628567855262/activity

", + "contentMap": { + "en": "

@f0x nice there it is:

https://social.pixie.town/users/f0x/statuses/106221628567855262/activity

" + }, + "attachment": [], + "tag": [ + { + "type": "Mention", + "href": "https://social.pixie.town/users/f0x", + "name": "@f0x@pixie.town" + } + ], + "replies": { + "id": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies", + "type": "Collection", + "first": { + "type": "CollectionPage", + "next": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies?only_other_accounts=true&page=true", + "partOf": "https://ondergrond.org/users/dumpsterqueer/statuses/106221634728637552/replies", + "items": [] + } + } + } + }` + statusWithEmojisAndTagsAsActivityJson = `{ "@context": [ "https://www.w3.org/ns/activitystreams", { @@ -309,7 +376,34 @@ func (suite *ASToInternalTestSuite) TestParseGargron() { func (suite *ASToInternalTestSuite) TestParseStatus() { m := make(map[string]interface{}) - err := json.Unmarshal([]byte(statusAsActivityJson), &m) + err := json.Unmarshal([]byte(statusWithEmojisAndTagsAsActivityJson), &m) + assert.NoError(suite.T(), err) + + t, err := streams.ToType(context.Background(), m) + assert.NoError(suite.T(), err) + + create, ok := t.(vocab.ActivityStreamsCreate) + assert.True(suite.T(), ok) + + obj := create.GetActivityStreamsObject() + assert.NotNil(suite.T(), obj) + + first := obj.Begin() + assert.NotNil(suite.T(), first) + + rep, ok := first.GetType().(typeutils.Statusable) + assert.True(suite.T(), ok) + + status, err := suite.typeconverter.ASStatusToStatus(rep) + assert.NoError(suite.T(), err) + + assert.Len(suite.T(), status.GTSEmojis, 3) + // assert.Len(suite.T(), status.GTSTags, 2) TODO: implement this first so that it can pick up tags +} + +func (suite *ASToInternalTestSuite) TestParseStatusWithMention() { + m := make(map[string]interface{}) + err := json.Unmarshal([]byte(statusWithMentionsActivityJson), &m) assert.NoError(suite.T(), err) t, err := streams.ToType(context.Background(), m) @@ -331,6 +425,9 @@ func (suite *ASToInternalTestSuite) TestParseStatus() { assert.NoError(suite.T(), err) fmt.Printf("%+v", status) + + assert.Len(suite.T(), status.GTSMentions, 1) + fmt.Println(status.GTSMentions[0]) } func (suite *ASToInternalTestSuite) TearDownTest() { diff --git a/internal/util/regexes.go b/internal/util/regexes.go index a59bd678a..8bdcf8618 100644 --- a/internal/util/regexes.go +++ b/internal/util/regexes.go @@ -35,6 +35,9 @@ const ( ) var ( + mentionNameRegexString = `@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)` + mentionNameRegex = regexp.MustCompile(fmt.Sprintf(`^%s$`, mentionNameRegexString)) + // mention regex can be played around with here: https://regex101.com/r/qwM9D3/1 mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)` mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString) diff --git a/internal/util/statustools.go b/internal/util/statustools.go index 5591f185a..0a85dda41 100644 --- a/internal/util/statustools.go +++ b/internal/util/statustools.go @@ -19,17 +19,18 @@ package util import ( + "fmt" "strings" ) -// DeriveMentions takes a plaintext (ie., not html-formatted) status, +// DeriveMentionsFromStatus takes a plaintext (ie., not html-formatted) status, // and applies a regex to it to return a deduplicated list of accounts // mentioned in that status. // // It will look for fully-qualified account names in the form "@user@example.org". // or the form "@username" for local users. // The case of the returned mentions will be lowered, for consistency. -func DeriveMentions(status string) []string { +func DeriveMentionsFromStatus(status string) []string { mentionedAccounts := []string{} for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) { mentionedAccounts = append(mentionedAccounts, m[1]) @@ -37,11 +38,11 @@ func DeriveMentions(status string) []string { return lower(unique(mentionedAccounts)) } -// DeriveHashtags takes a plaintext (ie., not html-formatted) status, +// DeriveHashtagsFromStatus takes a plaintext (ie., not html-formatted) status, // and applies a regex to it to return a deduplicated list of hashtags // used in that status, without the leading #. The case of the returned // tags will be lowered, for consistency. -func DeriveHashtags(status string) []string { +func DeriveHashtagsFromStatus(status string) []string { tags := []string{} for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) { tags = append(tags, m[1]) @@ -49,11 +50,11 @@ func DeriveHashtags(status string) []string { return lower(unique(tags)) } -// DeriveEmojis takes a plaintext (ie., not html-formatted) status, +// DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status, // and applies a regex to it to return a deduplicated list of emojis // used in that status, without the surround ::. The case of the returned // emojis will be lowered, for consistency. -func DeriveEmojis(status string) []string { +func DeriveEmojisFromStatus(status string) []string { emojis := []string{} for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) { emojis = append(emojis, m[1]) @@ -61,6 +62,26 @@ func DeriveEmojis(status string) []string { return lower(unique(emojis)) } +// ExtractMentionParts extracts the username @test_user and the domain @example.org +// from a mention string like @test_user@example.org. +// +// If no domain is provided, it will return just the username part. +// +// If nothing is matched, it will return an error. +func ExtractMentionParts(mention string) (username, domain string, err error) { + matches := mentionNameRegex.FindStringSubmatch(mention) + if matches == nil { + err = fmt.Errorf("could't match mention %s", mention) + return + } + fmt.Println(matches) + username = matches[1] + if len(matches) == 2 { + domain = matches[2] + } + return +} + // unique returns a deduplicated version of a given string slice. func unique(s []string) []string { keys := make(map[string]bool) diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go index 7c9af2cbd..2a12c7690 100644 --- a/internal/util/statustools_test.go +++ b/internal/util/statustools_test.go @@ -42,7 +42,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() { here is a duplicate mention: @hello@test.lgbt ` - menchies := util.DeriveMentions(statusText) + menchies := util.DeriveMentionsFromStatus(statusText) assert.Len(suite.T(), menchies, 4) assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) @@ -52,7 +52,7 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() { func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { statusText := `` - menchies := util.DeriveMentions(statusText) + menchies := util.DeriveMentionsFromStatus(statusText) assert.Len(suite.T(), menchies, 0) } @@ -67,7 +67,7 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() { #111111 thisalsoshouldn'twork#### ##` - tags := util.DeriveHashtags(statusText) + tags := util.DeriveHashtagsFromStatus(statusText) assert.Len(suite.T(), tags, 5) assert.Equal(suite.T(), "testing123", tags[0]) assert.Equal(suite.T(), "also", tags[1]) @@ -90,7 +90,7 @@ Here's some normal text with an :emoji: at the end :underscores_ok_too: ` - tags := util.DeriveEmojis(statusText) + tags := util.DeriveEmojisFromStatus(statusText) assert.Len(suite.T(), tags, 7) assert.Equal(suite.T(), "test", tags[0]) assert.Equal(suite.T(), "another", tags[1])