Improve GetRemoteStatus and db.GetStatus() logic (#174)

* only fetch status parents / children if explicity requested when dereferencing

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* Remove recursive DB GetStatus logic, don't fetch parent unless requested

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* StatusCache copies status so there are no thread-safety issues with modified status objects

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* remove sqlite test files

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* fix bugs introduced by previous commit

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* fix not continue on error in loop

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* use our own RunInTx implementation (possible fix for nested tx error)

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* fix cast statement to work with SQLite

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* be less strict about valid status in cache

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* add cache=shared ALWAYS for SQLite db instances

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* Fix EnrichRemoteAccount when updating account fails

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* add nolint tag

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* ensure file: prefixes the filename in sqlite addr

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* add an account cache, add status author account from db

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* Fix incompatible SQLite query

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* *actually* use the new getAccount() function in accountsDB

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* update cache tests to use test suite

Signed-off-by: kim (grufwub) <grufwub@gmail.com>

* add RelationshipTestSuite, add tests for methods with changed SQL

Signed-off-by: kim (grufwub) <grufwub@gmail.com>
This commit is contained in:
kim 2021-09-01 10:08:21 +01:00 committed by GitHub
commit 7d193de25f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 660 additions and 234 deletions

View file

@ -48,7 +48,6 @@ func instanceAccount(account *gtsmodel.Account) bool {
// EnrichRemoteAccount is mostly useful for calling after an account has been initially created by
// the federatingDB's Create function, or during the federated authorization flow.
func (d *deref) EnrichRemoteAccount(ctx context.Context, username string, account *gtsmodel.Account) (*gtsmodel.Account, error) {
// if we're dealing with an instance account, we don't need to update anything
if instanceAccount(account) {
return account, nil
@ -58,13 +57,13 @@ func (d *deref) EnrichRemoteAccount(ctx context.Context, username string, accoun
return nil, err
}
var err error
account, err = d.db.UpdateAccount(ctx, account)
updated, err := d.db.UpdateAccount(ctx, account)
if err != nil {
d.log.Errorf("EnrichRemoteAccount: error updating account: %s", err)
return account, nil
}
return account, nil
return updated, nil
}
// GetRemoteAccount completely dereferences a remote account, converts it to a GtS model account,

View file

@ -46,7 +46,7 @@ func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Stat
return fmt.Errorf("DereferenceAnnounce: error dereferencing thread of boosted status: %s", err)
}
boostedStatus, _, _, err := d.GetRemoteStatus(ctx, requestingUsername, boostedStatusURI, false)
boostedStatus, _, _, err := d.GetRemoteStatus(ctx, requestingUsername, boostedStatusURI, false, false, false)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err)
}

View file

@ -38,8 +38,8 @@ type Dereferencer interface {
GetRemoteAccount(ctx context.Context, username string, remoteAccountID *url.URL, refresh bool) (*gtsmodel.Account, bool, error)
EnrichRemoteAccount(ctx context.Context, username string, account *gtsmodel.Account) (*gtsmodel.Account, error)
GetRemoteStatus(ctx context.Context, username string, remoteStatusID *url.URL, refresh bool) (*gtsmodel.Status, ap.Statusable, bool, error)
EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status) (*gtsmodel.Status, error)
GetRemoteStatus(ctx context.Context, username string, remoteStatusID *url.URL, refresh, includeParent, includeChilds bool) (*gtsmodel.Status, ap.Statusable, bool, error)
EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status, includeParent, includeChilds bool) (*gtsmodel.Status, error)
GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error)

View file

@ -39,8 +39,8 @@ import (
//
// EnrichRemoteStatus is mostly useful for calling after a status has been initially created by
// the federatingDB's Create function, but additional dereferencing is needed on it.
func (d *deref) EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status) (*gtsmodel.Status, error) {
if err := d.populateStatusFields(ctx, status, username); err != nil {
func (d *deref) EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status, includeParent, includeChilds bool) (*gtsmodel.Status, error) {
if err := d.populateStatusFields(ctx, status, username, includeParent, includeChilds); err != nil {
return nil, err
}
@ -62,7 +62,7 @@ func (d *deref) EnrichRemoteStatus(ctx context.Context, username string, status
// If a dereference was performed, then the function also returns the ap.Statusable representation for further processing.
//
// SIDE EFFECTS: remote status will be stored in the database, and the remote status owner will also be stored.
func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStatusID *url.URL, refresh bool) (*gtsmodel.Status, ap.Statusable, bool, error) {
func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStatusID *url.URL, refresh, includeParent, includeChilds bool) (*gtsmodel.Status, ap.Statusable, bool, error) {
new := true
// check if we already have the status in our db
@ -105,7 +105,7 @@ func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStat
}
gtsStatus.ID = ulid
if err := d.populateStatusFields(ctx, gtsStatus, username); err != nil {
if err := d.populateStatusFields(ctx, gtsStatus, username, includeParent, includeChilds); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
@ -115,7 +115,7 @@ func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStat
} else {
gtsStatus.ID = maybeStatus.ID
if err := d.populateStatusFields(ctx, gtsStatus, username); err != nil {
if err := d.populateStatusFields(ctx, gtsStatus, username, includeParent, includeChilds); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
@ -235,7 +235,7 @@ func (d *deref) dereferenceStatusable(ctx context.Context, username string, remo
// This function will deference all of the above, insert them in the database as necessary,
// and attach them to the status. The status itself will not be added to the database yet,
// that's up the caller to do.
func (d *deref) populateStatusFields(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error {
func (d *deref) populateStatusFields(ctx context.Context, status *gtsmodel.Status, requestingUsername string, includeParent, includeChilds bool) error {
l := d.log.WithFields(logrus.Fields{
"func": "dereferenceStatusFields",
"status": fmt.Sprintf("%+v", status),
@ -275,14 +275,19 @@ func (d *deref) populateStatusFields(ctx context.Context, status *gtsmodel.Statu
// 3. Emojis
// TODO
// 4. Mentions
if err := d.populateStatusMentions(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status mentions: %s", err)
// 4. Mentions (only if requested)
// TODO: do we need to handle removing empty mention objects and just using mention IDs slice?
if includeChilds {
if err := d.populateStatusMentions(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status mentions: %s", err)
}
}
// 5. Replied-to-status.
if err := d.populateStatusRepliedTo(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status repliedTo: %s", err)
// 5. Replied-to-status (only if requested)
if includeParent {
if err := d.populateStatusRepliedTo(ctx, status, requestingUsername); err != nil {
return fmt.Errorf("populateStatusFields: error populating status repliedTo: %s", err)
}
}
return nil
@ -391,7 +396,6 @@ func (d *deref) populateStatusAttachments(ctx context.Context, status *gtsmodel.
attachments := []*gtsmodel.MediaAttachment{}
for _, a := range status.Attachments {
aURL, err := url.Parse(a.RemoteURL)
if err != nil {
l.Errorf("populateStatusAttachments: couldn't parse attachment url %s: %s", a.RemoteURL, err)
@ -401,6 +405,7 @@ func (d *deref) populateStatusAttachments(ctx context.Context, status *gtsmodel.
attachment, err := d.GetRemoteAttachment(ctx, requestingUsername, aURL, status.AccountID, status.ID, a.File.ContentType)
if err != nil {
l.Errorf("populateStatusAttachments: couldn't get remote attachment %s: %s", a.RemoteURL, err)
continue
}
attachmentIDs = append(attachmentIDs, attachment.ID)
@ -420,29 +425,16 @@ func (d *deref) populateStatusRepliedTo(ctx context.Context, status *gtsmodel.St
return err
}
var replyToStatus *gtsmodel.Status
errs := []string{}
// see if we have the status in our db already
if s, err := d.db.GetStatusByURI(ctx, status.InReplyToURI); err != nil {
errs = append(errs, err.Error())
} else {
replyToStatus = s
}
if replyToStatus == nil {
// didn't find the status in our db, try to get it remotely
if s, _, _, err := d.GetRemoteStatus(ctx, requestingUsername, statusURI, false); err != nil {
errs = append(errs, err.Error())
} else {
replyToStatus = s
replyToStatus, err := d.db.GetStatusByURI(ctx, status.InReplyToURI)
if err != nil {
// Status was not in the DB, try fetch
replyToStatus, _, _, err = d.GetRemoteStatus(ctx, requestingUsername, statusURI, false, false, false)
if err != nil {
return fmt.Errorf("populateStatusRepliedTo: couldn't get reply to status with uri %s: %s", status.InReplyToURI, err)
}
}
if replyToStatus == nil {
return fmt.Errorf("populateStatusRepliedTo: couldn't get reply to status with uri %s: %s", statusURI, strings.Join(errs, " : "))
}
// we have the status
status.InReplyToID = replyToStatus.ID
status.InReplyTo = replyToStatus

View file

@ -119,7 +119,7 @@ func (suite *StatusTestSuite) TestDereferenceSimpleStatus() {
fetchingAccount := suite.testAccounts["local_account_1"]
statusURL := testrig.URLMustParse("https://unknown-instance.com/users/brand_new_person/statuses/01FE4NTHKWW7THT67EF10EB839")
status, statusable, new, err := suite.dereferencer.GetRemoteStatus(context.Background(), fetchingAccount.Username, statusURL, false)
status, statusable, new, err := suite.dereferencer.GetRemoteStatus(context.Background(), fetchingAccount.Username, statusURL, false, false, false)
suite.NoError(err)
suite.NotNil(status)
suite.NotNil(statusable)
@ -157,7 +157,7 @@ func (suite *StatusTestSuite) TestDereferenceStatusWithMention() {
fetchingAccount := suite.testAccounts["local_account_1"]
statusURL := testrig.URLMustParse("https://unknown-instance.com/users/brand_new_person/statuses/01FE5Y30E3W4P7TRE0R98KAYQV")
status, statusable, new, err := suite.dereferencer.GetRemoteStatus(context.Background(), fetchingAccount.Username, statusURL, false)
status, statusable, new, err := suite.dereferencer.GetRemoteStatus(context.Background(), fetchingAccount.Username, statusURL, false, false, true)
suite.NoError(err)
suite.NotNil(status)
suite.NotNil(statusable)

View file

@ -49,7 +49,7 @@ func (d *deref) DereferenceThread(ctx context.Context, username string, statusIR
}
// first make sure we have this status in our db
_, statusable, _, err := d.GetRemoteStatus(ctx, username, statusIRI, true)
_, statusable, _, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false, false)
if err != nil {
return fmt.Errorf("DereferenceThread: error getting status with id %s: %s", statusIRI.String(), err)
}
@ -104,7 +104,7 @@ func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI
// If we reach here, we're looking at a remote status -- make sure we have it in our db by calling GetRemoteStatus
// We call it with refresh to true because we want the statusable representation to parse inReplyTo from.
status, statusable, _, err := d.GetRemoteStatus(ctx, username, &statusIRI, true)
_, statusable, _, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false, false)
if err != nil {
l.Debugf("error getting remote status: %s", err)
return nil
@ -116,18 +116,6 @@ func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI
return nil
}
// get the ancestor status into our database if we don't have it yet
if _, _, _, err := d.GetRemoteStatus(ctx, username, inReplyTo, false); err != nil {
l.Debugf("error getting remote status: %s", err)
return nil
}
// now enrich the current status, since we should have the ancestor in the db
if _, err := d.EnrichRemoteStatus(ctx, username, status); err != nil {
l.Debugf("error enriching remote status: %s", err)
return nil
}
// now move up to the next ancestor
return d.iterateAncestors(ctx, username, *inReplyTo)
}
@ -226,7 +214,7 @@ pageLoop:
foundReplies = foundReplies + 1
// get the remote statusable and put it in the db
_, statusable, new, err := d.GetRemoteStatus(ctx, username, itemURI, false)
_, statusable, new, err := d.GetRemoteStatus(ctx, username, itemURI, false, false, false)
if new && err == nil && statusable != nil {
// now iterate descendants of *that* status
if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil {