[feature] status refetch support (#1690)

* revamp http client to not limit requests, instead use sender worker

Signed-off-by: kim <grufwub@gmail.com>

* remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping

Signed-off-by: kim <grufwub@gmail.com>

* improve batch senders to keep popping recipients until remote URL found

Signed-off-by: kim <grufwub@gmail.com>

* fix recipient looping issue

Signed-off-by: kim <grufwub@gmail.com>

* move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP

Signed-off-by: kim <grufwub@gmail.com>

* first draft of status refetching logic

Signed-off-by: kim <grufwub@gmail.com>

* fix testrig to use new federation alloc func signature

Signed-off-by: kim <grufwub@gmail.com>

* fix log format directive

Signed-off-by: kim <grufwub@gmail.com>

* add status fetched_at migration

Signed-off-by: kim <grufwub@gmail.com>

* remove unused / unchecked for error types

Signed-off-by: kim <grufwub@gmail.com>

* add back the used type...

Signed-off-by: kim <grufwub@gmail.com>

* add separate internal getStatus() function for derefThread() that doesn't recurse

Signed-off-by: kim <grufwub@gmail.com>

* improved mention and media attachment error handling

Signed-off-by: kim <grufwub@gmail.com>

* fix log and error format directives

Signed-off-by: kim <grufwub@gmail.com>

* update account deref to match status deref changes

Signed-off-by: kim <grufwub@gmail.com>

* very small code formatting change to make things clearer

Signed-off-by: kim <grufwub@gmail.com>

* add more code comments

Signed-off-by: kim <grufwub@gmail.com>

* improved code commenting

Signed-off-by: kim <grufwub@gmail.com>

* only check for required further derefs if needed

Signed-off-by: kim <grufwub@gmail.com>

* improved cache invalidation

Signed-off-by: kim <grufwub@gmail.com>

* tweak cache restarting to use a (very small) backoff

Signed-off-by: kim <grufwub@gmail.com>

* small readability changes and fixes

Signed-off-by: kim <grufwub@gmail.com>

* fix account sync issues

Signed-off-by: kim <grufwub@gmail.com>

* fix merge conflicts + update account enrichment to accept already-passed accountable

Signed-off-by: kim <grufwub@gmail.com>

* remove secondary function declaration

Signed-off-by: kim <grufwub@gmail.com>

* normalise dereferencer get status / account behaviour, fix remaining tests

Signed-off-by: kim <grufwub@gmail.com>

* fix remaining rebase conflicts, finish commenting code

Signed-off-by: kim <grufwub@gmail.com>

* appease the linter

Signed-off-by: kim <grufwub@gmail.com>

* add source file header

Signed-off-by: kim <grufwub@gmail.com>

* update to use TIMESTAMPTZ column type instead of just TIMESTAMP

Signed-off-by: kim <grufwub@gmail.com>

* don't pass in 'updated_at' to UpdateEmoji()

Signed-off-by: kim <grufwub@gmail.com>

* use new ap.Resolve{Account,Status}able() functions

Signed-off-by: kim <grufwub@gmail.com>

* remove the somewhat confusing rescoping of the same variable names

Signed-off-by: kim <grufwub@gmail.com>

* update migration file name, improved database delete error returns

Signed-off-by: kim <grufwub@gmail.com>

* formatting

Signed-off-by: kim <grufwub@gmail.com>

* improved multi-delete database functions to minimise DB calls

Signed-off-by: kim <grufwub@gmail.com>

* remove unused type

Signed-off-by: kim <grufwub@gmail.com>

* fix delete statements

Signed-off-by: kim <grufwub@gmail.com>

---------

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2023-05-12 10:15:54 +01:00 committed by GitHub
commit 6c9d8e78eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
55 changed files with 1552 additions and 1118 deletions

View file

@ -35,34 +35,16 @@ import (
// ancesters we are willing to follow before returning error.
const maxIter = 1000
// DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),
// and dereferences statusables in the conversation.
//
// This process involves working up and down the chain of replies, and parsing through the collections of IDs
// presented by remote instances as part of their replies collections, and will likely involve making several calls to
// multiple different hosts.
//
// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues.
func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"statusIRI", status.URI},
}...)
// Log function start
l.Trace("beginning")
// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
// Ensure that ancestors have been fully dereferenced
if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
l.Errorf("error dereferencing status ancestors: %v", err)
// we don't return error, we have deref'd as much as we can
log.Errorf(ctx, "error dereferencing status ancestors: %v", err)
}
// Ensure that descendants have been fully dereferenced
if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
l.Errorf("error dereferencing status descendants: %v", err)
// we don't return error, we have deref'd as much as we can
log.Errorf(ctx, "error dereferencing status descendants: %v", err)
}
}
@ -103,7 +85,7 @@ func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string,
}
// Fetch this status from the database
localStatus, err := d.db.GetStatusByID(ctx, id)
localStatus, err := d.state.DB.GetStatusByID(ctx, id)
if err != nil {
return fmt.Errorf("error fetching local status %q: %w", id, err)
}
@ -115,7 +97,10 @@ func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string,
l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
// Fetch the remote status found at this IRI
remoteStatus, _, err := d.GetStatus(ctx, username, replyIRI, false, false)
remoteStatus, _, err := d.getStatusByURI(ctx,
username,
replyIRI,
)
if err != nil {
return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err)
}
@ -277,10 +262,15 @@ stackLoop:
continue itemLoop
}
// Dereference the remote status and store in the database
_, statusable, err := d.GetStatus(ctx, username, itemIRI, true, false)
// Dereference the remote status and store in the database.
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil {
l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err)
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
continue itemLoop
}
if statusable == nil {
// Already up-to-date.
continue itemLoop
}
@ -307,7 +297,10 @@ stackLoop:
}
// Dereference this next collection page by its IRI
collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI)
collectionPage, err := d.dereferenceCollectionPage(ctx,
username,
pageNextIRI,
)
if err != nil {
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
continue stackLoop