mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-18 10:57:35 -06:00
[bugfix] parent status replied to status not dereferenced sometimes (#2587)
* much simplified DereferenceStatusAncestors(), also handles edge cases now * perform status acceptibility check before handling even as forward * don't further dereference ancestors if they're up to date * call enrichStatusSafely() directly to ensure we get error messages * change getStatusByURI() semantics to return error + old model on failed update, fix deref ancestor to check for staleness before refetch * perform a nil-check on the status.Local variable, in case it hasn't been set on new status attempting refresh * more consistently set returned parent status, don't check if updated * only home-timeline statuses if explicitly visible AND not explicitly invisible! * fix broken test now that status acceptibility checks happen on forwarded statuses
This commit is contained in:
parent
81198fa2d0
commit
0f7a2024c3
6 changed files with 230 additions and 237 deletions
|
|
@ -372,18 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(
|
|||
|
||||
// By default use account.URI
|
||||
// as the per-URI deref lock.
|
||||
var lockKey string
|
||||
var uriStr string
|
||||
if account.URI != "" {
|
||||
lockKey = account.URI
|
||||
uriStr = account.URI
|
||||
} else {
|
||||
// No URI is set yet, instead generate a faux-one from user+domain.
|
||||
lockKey = "https://" + account.Domain + "/users/" + account.Username
|
||||
uriStr = "https://" + account.Domain + "/users/" + account.Username
|
||||
}
|
||||
|
||||
// Acquire per-URI deref lock, wraping unlock
|
||||
// to safely defer in case of panic, while still
|
||||
// performing more granular unlocks when needed.
|
||||
unlock := d.state.FedLocks.Lock(lockKey)
|
||||
unlock := d.state.FedLocks.Lock(uriStr)
|
||||
unlock = doOnce(unlock)
|
||||
defer unlock()
|
||||
|
||||
|
|
@ -395,12 +395,7 @@ func (d *Dereferencer) enrichAccountSafely(
|
|||
accountable,
|
||||
)
|
||||
|
||||
if code := gtserror.StatusCode(err); code >= 400 {
|
||||
// No matter what, log the error
|
||||
// so instance admins have an idea
|
||||
// why something isn't working.
|
||||
log.Info(ctx, err)
|
||||
|
||||
if gtserror.StatusCode(err) >= 400 {
|
||||
if account.IsNew() {
|
||||
// This was a new account enrich
|
||||
// attempt which failed before we
|
||||
|
|
@ -417,7 +412,7 @@ func (d *Dereferencer) enrichAccountSafely(
|
|||
// return the model we had stored already.
|
||||
account.FetchedAt = time.Now()
|
||||
if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil {
|
||||
log.Errorf(ctx, "error updating account fetched_at: %v", err)
|
||||
log.Error(ctx, "error updating %s fetched_at: %v", uriStr, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -435,7 +430,7 @@ func (d *Dereferencer) enrichAccountSafely(
|
|||
// in a call to db.Put(Account). Look again in DB by URI.
|
||||
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
|
||||
if err != nil {
|
||||
err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)
|
||||
err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1070,11 +1065,17 @@ func (d *Dereferencer) dereferenceAccountFeatured(ctx context.Context, requestUs
|
|||
// we still know it was *meant* to be pinned.
|
||||
statusURIs = append(statusURIs, statusURI)
|
||||
|
||||
// Search for status by URI. Note this may return an existing model
|
||||
// we have stored with an error from attempted update, so check both.
|
||||
status, _, _, err := d.getStatusByURI(ctx, requestUser, statusURI)
|
||||
if err != nil {
|
||||
// We couldn't get the status, bummer. Just log + move on, we can try later.
|
||||
log.Errorf(ctx, "error getting status from featured collection %s: %v", statusURI, err)
|
||||
continue
|
||||
|
||||
if status == nil {
|
||||
// This is only unactionable
|
||||
// if no status was returned.
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// If the status was already pinned, we don't need to do anything.
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ import (
|
|||
// statusUpToDate returns whether the given status model is both updateable
|
||||
// (i.e. remote status) and whether it needs an update based on `fetched_at`.
|
||||
func statusUpToDate(status *gtsmodel.Status, force bool) bool {
|
||||
if *status.Local {
|
||||
if status.Local != nil && *status.Local {
|
||||
// Can't update local statuses.
|
||||
return true
|
||||
}
|
||||
|
|
@ -69,16 +69,24 @@ func statusUpToDate(status *gtsmodel.Status, force bool) bool {
|
|||
// is beyond a certain interval, the status will be dereferenced. In the case of dereferencing, some low-priority status information may be enqueued for asynchronous fetching,
|
||||
// e.g. dereferencing the status thread. Param 'syncParent' = true indicates to fetch status ancestors synchronously. An ActivityPub object indicates the status was dereferenced.
|
||||
func (d *Dereferencer) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
|
||||
// Fetch and dereference status if necessary.
|
||||
|
||||
// Fetch and dereference / update status if necessary.
|
||||
status, statusable, isNew, err := d.getStatusByURI(ctx,
|
||||
requestUser,
|
||||
uri,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if statusable != nil {
|
||||
if err != nil {
|
||||
if status == nil {
|
||||
// err with no existing
|
||||
// status for fallback.
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
log.Errorf(ctx, "error updating status %s: %v", uri, err)
|
||||
|
||||
} else if statusable != nil {
|
||||
|
||||
// Deref parents + children.
|
||||
d.dereferenceThread(ctx,
|
||||
requestUser,
|
||||
|
|
@ -92,7 +100,7 @@ func (d *Dereferencer) GetStatusByURI(ctx context.Context, requestUser string, u
|
|||
return status, statusable, nil
|
||||
}
|
||||
|
||||
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update.
|
||||
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't dereference thread on update, and may return an existing status with error on failed re-fetch.
|
||||
func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, bool, error) {
|
||||
var (
|
||||
status *gtsmodel.Status
|
||||
|
|
@ -100,8 +108,9 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
|
|||
err error
|
||||
)
|
||||
|
||||
// Search the database for existing status with URI.
|
||||
// Search the database for existing by URI.
|
||||
status, err = d.state.DB.GetStatusByURI(
|
||||
|
||||
// request a barebones object, it may be in the
|
||||
// db but with related models not yet dereferenced.
|
||||
gtscontext.SetBarebones(ctx),
|
||||
|
|
@ -112,7 +121,7 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
|
|||
}
|
||||
|
||||
if status == nil {
|
||||
// Else, search the database for existing by URL.
|
||||
// Else, search database for existing by URL.
|
||||
status, err = d.state.DB.GetStatusByURL(
|
||||
gtscontext.SetBarebones(ctx),
|
||||
uriStr,
|
||||
|
|
@ -123,14 +132,16 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
|
|||
}
|
||||
|
||||
if status == nil {
|
||||
// Ensure that this isn't a search for a local status.
|
||||
if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() {
|
||||
return nil, nil, false, gtserror.SetUnretrievable(err) // this will be db.ErrNoEntries
|
||||
// Ensure not a failed search for a local
|
||||
// status, if so we know it doesn't exist.
|
||||
if uri.Host == config.GetHost() ||
|
||||
uri.Host == config.GetAccountDomain() {
|
||||
return nil, nil, false, gtserror.SetUnretrievable(err)
|
||||
}
|
||||
|
||||
// Create and pass-through a new bare-bones model for deref.
|
||||
return d.enrichStatusSafely(ctx, requestUser, uri, >smodel.Status{
|
||||
Local: func() *bool { var false bool; return &false }(),
|
||||
Local: util.Ptr(false),
|
||||
URI: uriStr,
|
||||
}, nil)
|
||||
}
|
||||
|
|
@ -145,21 +156,22 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
|
|||
return status, nil, false, nil
|
||||
}
|
||||
|
||||
// Try to update + deref existing status model.
|
||||
// Try to deref and update existing status model.
|
||||
latest, statusable, isNew, err := d.enrichStatusSafely(ctx,
|
||||
requestUser,
|
||||
uri,
|
||||
status,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "error enriching remote status: %v", err)
|
||||
|
||||
// Fallback to existing status.
|
||||
return status, nil, false, nil
|
||||
if err != nil {
|
||||
// fallback to the
|
||||
// existing status.
|
||||
latest = status
|
||||
statusable = nil
|
||||
}
|
||||
|
||||
return latest, statusable, isNew, nil
|
||||
return latest, statusable, isNew, err
|
||||
}
|
||||
|
||||
// RefreshStatus is functionally equivalent to GetStatusByURI(), except that it requires a pre
|
||||
|
|
@ -294,12 +306,7 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
apubStatus,
|
||||
)
|
||||
|
||||
if code := gtserror.StatusCode(err); code >= 400 {
|
||||
// No matter what, log the error
|
||||
// so instance admins have an idea
|
||||
// why something isn't working.
|
||||
log.Info(ctx, err)
|
||||
|
||||
if gtserror.StatusCode(err) >= 400 {
|
||||
if isNew {
|
||||
// This was a new status enrich
|
||||
// attempt which failed before we
|
||||
|
|
@ -316,7 +323,7 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
// return the model we had stored already.
|
||||
status.FetchedAt = time.Now()
|
||||
if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
|
||||
log.Errorf(ctx, "error updating status fetched_at: %v", err)
|
||||
log.Error(ctx, "error updating %s fetched_at: %v", uriStr, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -408,19 +415,21 @@ func (d *Dereferencer) enrichStatus(
|
|||
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
|
||||
}
|
||||
|
||||
// Check if we've previously
|
||||
// stored this status in the DB.
|
||||
// If we have, it'll be ID'd.
|
||||
var isNew = (status.ID == "")
|
||||
if isNew {
|
||||
// No ID, we haven't stored this status before.
|
||||
// Generate new status ID from the status publication time.
|
||||
// Based on the original provided
|
||||
// status model, determine whether
|
||||
// this is a new insert / update.
|
||||
var isNew bool
|
||||
|
||||
if isNew = (status.ID == ""); isNew {
|
||||
|
||||
// Generate new status ID from the provided creation date.
|
||||
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)
|
||||
latestStatus.ID = id.NewULID() // just use "now"
|
||||
}
|
||||
} else {
|
||||
|
||||
// Reuse existing status ID.
|
||||
latestStatus.ID = status.ID
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ package dereferencing
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
|
|
@ -27,8 +26,6 @@ import (
|
|||
"github.com/superseriousbusiness/activity/pub"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/ap"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
|
|
@ -101,144 +98,85 @@ func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username
|
|||
return nil
|
||||
}
|
||||
|
||||
// Add new log fields for this iteration.
|
||||
l = l.WithFields(kv.Fields{
|
||||
{"current", current.URI},
|
||||
{"parent", current.InReplyToURI},
|
||||
}...)
|
||||
l.Trace("following status ancestors")
|
||||
l = l.WithField("parent", current.InReplyToURI)
|
||||
l.Trace("following status ancestor")
|
||||
|
||||
// Check whether this parent has already been deref'd.
|
||||
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
|
||||
l.Warn("self referencing status ancestors")
|
||||
// Parse status parent URI for later use.
|
||||
uri, err := url.Parse(current.InReplyToURI)
|
||||
if err != nil {
|
||||
l.Warnf("invalid uri: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add this status URI to map of deref'd.
|
||||
derefdStatuses[current.URI] = struct{}{}
|
||||
|
||||
if current.InReplyToID != "" {
|
||||
// We already have an InReplyToID set. This means
|
||||
// the status's parent has, at some point, been
|
||||
// inserted into the database, either because it
|
||||
// is a status from our instance, or a status from
|
||||
// remote that we've dereferenced before, or found
|
||||
// out about in some other way.
|
||||
//
|
||||
// Working on this assumption, check if the parent
|
||||
// status exists, either as a copy pinned on the
|
||||
// current status, or in the database.
|
||||
|
||||
if current.InReplyTo != nil {
|
||||
// We have the parent already, and the child
|
||||
// doesn't need to be updated; keep iterating
|
||||
// from this parent upwards.
|
||||
current = current.InReplyTo
|
||||
continue
|
||||
}
|
||||
|
||||
// Parent isn't pinned to this status (yet), see
|
||||
// if we can get it from the db (we should be
|
||||
// able to, since it has an ID already).
|
||||
parent, err := d.state.DB.GetStatusByID(
|
||||
gtscontext.SetBarebones(ctx),
|
||||
current.InReplyToID,
|
||||
)
|
||||
if err != nil && !errors.Is(err, db.ErrNoEntries) {
|
||||
// Real db error, stop.
|
||||
return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err)
|
||||
}
|
||||
|
||||
if parent != nil {
|
||||
// We got the parent from the db, and the child
|
||||
// doesn't need to be updated; keep iterating
|
||||
// from this parent upwards.
|
||||
current.InReplyTo = parent
|
||||
current = parent
|
||||
continue
|
||||
}
|
||||
|
||||
// If we arrive here, we know this child *did* have
|
||||
// a parent at some point, but it no longer exists in
|
||||
// the database, presumably because it's been deleted
|
||||
// by another action.
|
||||
//
|
||||
// TODO: clean this up in a nightly task.
|
||||
l.Warn("orphaned status (parent no longer exists)")
|
||||
return nil // Cannot iterate further.
|
||||
// Check whether this parent has already been deref'd.
|
||||
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
|
||||
l.Warn("self referencing status ancestor")
|
||||
return nil
|
||||
}
|
||||
|
||||
// If we reach this point, we know the status has
|
||||
// an InReplyToURI set, but it doesn't yet have an
|
||||
// InReplyToID, which means that the parent status
|
||||
// has not yet been dereferenced.
|
||||
inReplyToURI, err := url.Parse(current.InReplyToURI)
|
||||
if err != nil || inReplyToURI == nil {
|
||||
// Parent URI is not something we can handle.
|
||||
l.Warn("orphaned status (invalid InReplyToURI)")
|
||||
return nil //nolint:nilerr
|
||||
}
|
||||
// Add this status's parent URI to map of deref'd.
|
||||
derefdStatuses[current.InReplyToURI] = struct{}{}
|
||||
|
||||
// Parent URI is valid, try to get it.
|
||||
// getStatusByURI guards against the following conditions:
|
||||
// - refetching recently fetched statuses (recursion!)
|
||||
// - remote domain is blocked (will return unretrievable)
|
||||
// - any http type error for a new status returns unretrievable
|
||||
parent, _, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
|
||||
if err == nil {
|
||||
// We successfully fetched the parent.
|
||||
// Update current status with new info.
|
||||
current.InReplyToID = parent.ID
|
||||
current.InReplyToAccountID = parent.AccountID
|
||||
if err := d.state.DB.UpdateStatus(
|
||||
ctx, current,
|
||||
// Fetch parent status by current's reply URI, this handles
|
||||
// case of existing (updating if necessary) or a new status.
|
||||
parent, _, _, err := d.getStatusByURI(ctx, username, uri)
|
||||
|
||||
// Check for a returned HTTP code via error.
|
||||
switch code := gtserror.StatusCode(err); {
|
||||
|
||||
// Status codes 404 and 410 incicate the status does not exist anymore.
|
||||
// Gone (410) is the preferred for deletion, but we accept NotFound too.
|
||||
case code == http.StatusNotFound || code == http.StatusGone:
|
||||
l.Trace("status orphaned")
|
||||
current.InReplyToID = ""
|
||||
current.InReplyToURI = ""
|
||||
current.InReplyToAccountID = ""
|
||||
current.InReplyTo = nil
|
||||
current.InReplyToAccount = nil
|
||||
if err := d.state.DB.UpdateStatus(ctx,
|
||||
current,
|
||||
"in_reply_to_id",
|
||||
"in_reply_to_uri",
|
||||
"in_reply_to_account_id",
|
||||
); err != nil {
|
||||
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
||||
}
|
||||
return nil
|
||||
|
||||
// Mark parent as next status to
|
||||
// work on, and keep iterating.
|
||||
current = parent
|
||||
continue
|
||||
}
|
||||
// An error was returned for a status during
|
||||
// an attempted NEW dereference, return here.
|
||||
case err != nil && current.InReplyToID == "":
|
||||
return gtserror.Newf("error dereferencing new %s: %w", current.InReplyToURI, err)
|
||||
|
||||
// We could not fetch the parent, check if we can do anything
|
||||
// useful with the error. For example, HTTP status code returned
|
||||
// from remote may indicate that the parent has been deleted.
|
||||
switch code := gtserror.StatusCode(err); {
|
||||
case code == http.StatusGone:
|
||||
// 410 means the status has definitely been deleted.
|
||||
// Update this status to reflect that, then bail.
|
||||
l.Debug("orphaned status: parent returned 410 Gone")
|
||||
// An error was returned for an existing parent,
|
||||
// we simply treat this as a temporary situation.
|
||||
// (we fallback to using existing parent status).
|
||||
case err != nil:
|
||||
l.Errorf("error getting parent: %v", err)
|
||||
|
||||
current.InReplyToURI = ""
|
||||
if err := d.state.DB.UpdateStatus(
|
||||
ctx, current,
|
||||
// The ID has changed for currently stored parent ID
|
||||
// (which may be empty, if new!) and fetched version.
|
||||
//
|
||||
// Update the current's inReplyTo fields to parent.
|
||||
case current.InReplyToID != parent.ID:
|
||||
l.Tracef("parent changed %s => %s", current.InReplyToID, parent.ID)
|
||||
current.InReplyToAccountID = parent.AccountID
|
||||
current.InReplyToAccount = parent.Account
|
||||
current.InReplyToURI = parent.URI
|
||||
current.InReplyToID = parent.ID
|
||||
if err := d.state.DB.UpdateStatus(ctx,
|
||||
current,
|
||||
"in_reply_to_id",
|
||||
"in_reply_to_uri",
|
||||
"in_reply_to_account_id",
|
||||
); err != nil {
|
||||
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
case code != 0:
|
||||
// We had a code, but not one indicating deletion, log the code
|
||||
// but don't return error or update the status; we can try again later.
|
||||
l.Warnf("orphaned status: http error dereferencing parent: %v)", err)
|
||||
return nil
|
||||
|
||||
case gtserror.IsUnretrievable(err):
|
||||
// Not retrievable for some other reason, so just
|
||||
// bail for now; we can try again later if necessary.
|
||||
l.Warnf("orphaned status: parent unretrievable: %v)", err)
|
||||
return nil
|
||||
|
||||
default:
|
||||
// Some other error that stops us in our tracks.
|
||||
return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err)
|
||||
}
|
||||
|
||||
// Set next parent to use.
|
||||
current.InReplyTo = parent
|
||||
current = current.InReplyTo
|
||||
}
|
||||
|
||||
return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
|
||||
|
|
@ -336,7 +274,7 @@ stackLoop:
|
|||
break itemLoop
|
||||
}
|
||||
|
||||
// Check for available IRI on item
|
||||
// Check for available IRI.
|
||||
itemIRI, _ := pub.ToId(next)
|
||||
if itemIRI == nil {
|
||||
continue itemLoop
|
||||
|
|
@ -354,9 +292,7 @@ stackLoop:
|
|||
// - any http type error for a new status returns unretrievable
|
||||
_, statusable, _, err := d.getStatusByURI(ctx, username, itemIRI)
|
||||
if err != nil {
|
||||
if !gtserror.IsUnretrievable(err) {
|
||||
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
||||
}
|
||||
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
||||
continue itemLoop
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue