[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)

* tidy up account, status, webfingering logic a wee bit

* go fmt

* invert published check

* alter resp initialization

* get Published from account in typeutils

* don't instantiate error for no darn good reason

* shadow err

* don't repeat error codes in wrapped errors

* don't wrap error unnecessarily
This commit is contained in:
tobi 2024-01-26 14:17:10 +01:00 committed by GitHub
commit e3052e8c82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 461 additions and 211 deletions

View file

@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool {
return true
}
if !account.CreatedAt.IsZero() && account.IsInstance() {
if account.IsInstance() && !account.IsNew() {
// Existing instance account. No need for update.
return true
}
@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain(
// Create and pass-through a new bare-bones model for dereferencing.
account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(),
Username: username,
Domain: domain,
Username: username,
}, nil)
if err != nil {
return nil, nil, err
@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(
// By default use account.URI
// as the per-URI deref lock.
uriStr := account.URI
if uriStr == "" {
var lockKey string
if account.URI != "" {
lockKey = account.URI
} else {
// No URI is set yet, instead generate a faux-one from user+domain.
uriStr = "https://" + account.Domain + "/user/" + account.Username
lockKey = "https://" + account.Domain + "/users/" + account.Username
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d.state.FedLocks.Lock(uriStr)
unlock := d.state.FedLocks.Lock(lockKey)
unlock = doOnce(unlock)
defer unlock()
@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely(
accountable,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
if code := gtserror.StatusCode(err); code >= 400 {
// No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if account.IsNew() {
// This was a new account enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, err
}
// We had this account stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
account.FetchedAt = time.Now()
_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating account fetched_at: %v", err)
}
}
// Unlock now
@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely(
// in a call to db.Put(Account). Look again in DB by URI.
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
if err != nil {
err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err)
err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)
}
}
@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount(
if account.Username != "" {
// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.
accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain)
if err != nil {
if account.URI == "" {
// this is a new account (to us) with username@domain
// but failed webfinger, nothing more we can do.
err := gtserror.Newf("error webfingering account: %w", err)
return nil, nil, gtserror.SetUnretrievable(err)
switch {
case err != nil && account.URI == "":
// This is a new account (to us) with username@domain
// but failed webfinger, nothing more we can do.
err := gtserror.Newf("error webfingering account: %w", err)
return nil, nil, gtserror.SetUnretrievable(err)
case err != nil:
// Simply log this error and move on,
// we already have an account URI.
log.Errorf(ctx,
"error webfingering[1] remote account %s@%s: %v",
account.Username, account.Domain, err,
)
case err == nil && account.Domain != accDomain:
// After webfinger, we now have correct account domain from which we can do a final DB check.
alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
}
// Simply log this error and move on, we already have an account URI.
log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err)
}
if err == nil {
if account.Domain != accDomain {
// After webfinger, we now have correct account domain from which we can do a final DB check.
alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
}
if alreadyAccount != nil {
// Enrich existing account.
account = alreadyAccount
}
if alreadyAcct != nil {
// We had this account stored under
// the discovered accountDomain.
// Proceed with this account.
account = alreadyAcct
}
// Whether we had the account or not, we
// now have webfinger info relevant to the
// account, so fallthrough to set webfinger
// info on either the account we just found,
// or the stub account we were passed.
fallthrough
case err == nil:
// Update account with latest info.
account.URI = accURI.String()
account.Domain = accDomain
@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount(
}
if uri == nil {
// No URI provided / found, must parse from account.
// No URI provided / found,
// must parse from account.
uri, err = url.Parse(account.URI)
if err != nil {
return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err)
return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
}
if uri.Scheme != "http" && uri.Scheme != "https" {
err = errors.New("account URI scheme must be http or https")
return nil, nil, gtserror.Newf(
"invalid uri %q: %w",
account.URI, gtserror.SetUnretrievable(err),
)
}
}
/*
BY THIS POINT we must have an account URI set,
either provided, pinned to the account, or
obtained via webfinger call.
*/
// Check whether this account URI is a blocked domain / subdomain.
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount(
defer d.stopHandshake(requestUser, uri)
if apubAcc == nil {
// We were not given any (partial) ActivityPub
// version of this account as a parameter.
// Dereference latest version of the account.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err)
err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
// Attempt to resolve ActivityPub acc from data.
apubAcc, err = ap.ResolveAccountable(ctx, b)
if err != nil {
return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
// ResolveAccountable will set Unretrievable/WrongType
// on the returned error, so we don't need to do it here.
err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
return nil, nil, err
}
}
/*
BY THIS POINT we must have the ActivityPub
representation of the account, either provided,
or obtained via a dereference call.
*/
// Convert the dereferenced AP account object to our GTS model.
//
// We put this in the variable latestAcc because we might want
// to compare the provided account model with this fresh version,
// in order to check if anything changed since we last saw it.
latestAcc, err := d.converter.ASRepresentationToAccount(ctx,
apubAcc,
account.Domain,
)
if err != nil {
return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
// ASRepresentationToAccount will set Malformed on the
// returned error, so we don't need to do it here.
err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
return nil, nil, err
}
if account.Username == "" {
@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount(
// from example.org to somewhere.else: we want to take somewhere.else
// as the accountDomain then, not the example.org we were redirected from.
// Assume the host from the returned ActivityPub representation.
idProp := apubAcc.GetJSONLDId()
if idProp == nil || !idProp.IsIRI() {
// Assume the host from the returned
// ActivityPub representation.
id := ap.GetJSONLDId(apubAcc)
if id == nil {
return nil, nil, gtserror.New("no id property found on person, or id was not an iri")
}
// Get IRI host value.
accHost := idProp.GetIRI().Host
accHost := id.Host
latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx,
tsport,
@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount(
}
if latestAcc.Domain == "" {
// ensure we have a domain set by this point,
// Ensure we have a domain set by this point,
// otherwise it gets stored as a local user!
//
// TODO: there is probably a more granular way
@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("empty domain for %s", uri)
}
// Ensure ID is set and update fetch time.
/*
BY THIS POINT we have more or less a fullly-formed
representation of the target account, derived from
a combination of webfinger lookups and dereferencing.
Further fetching beyond this point is for peripheral
things like account avatar, header, emojis.
*/
// Ensure internal db ID is
// set and update fetch time.
latestAcc.ID = account.ID
latestAcc.FetchedAt = time.Now()
@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount(
log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err)
}
if account.CreatedAt.IsZero() {
// CreatedAt will be zero if no local copy was
// found in one of the GetAccountBy___() functions.
//
// Set time of creation from the last-fetched date.
latestAcc.CreatedAt = latestAcc.FetchedAt
if account.IsNew() {
// Prefer published/created time from
// apubAcc, fall back to FetchedAt value.
if latestAcc.CreatedAt.IsZero() {
latestAcc.CreatedAt = latestAcc.FetchedAt
}
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
// This is new, put it in the database.
@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("error putting in database: %w", err)
}
} else {
// Prefer published time from apubAcc,
// fall back to previous stored value.
if latestAcc.CreatedAt.IsZero() {
latestAcc.CreatedAt = account.CreatedAt
}
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
// Use existing account values.
latestAcc.CreatedAt = account.CreatedAt
// Carry over existing account language.
latestAcc.Language = account.Language
// This is an existing account, update the model in the database.

View file

@ -20,54 +20,109 @@ package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"strings"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) {
b, err := transport.Finger(ctx, targetUsername, targetHost)
// fingerRemoteAccount performs a webfinger call for the
// given username and host, using the provided transport.
//
// The webfinger response will be parsed, and the subject
// domain and AP URI will be extracted and returned.
//
// In case the response cannot be parsed, or the response
// does not contain a valid subject string or AP URI, an
// error will be returned instead.
func (d *Dereferencer) fingerRemoteAccount(
ctx context.Context,
transport transport.Transport,
username string,
host string,
) (
string, // discovered account domain
*url.URL, // discovered account URI
error,
) {
// Assemble target namestring for logging.
var target = "@" + username + "@" + host
b, err := transport.Finger(ctx, username, host)
if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err)
return
err = gtserror.Newf("error webfingering %s: %w", target, err)
return "", nil, err
}
resp := &apimodel.WellKnownResponse{}
if err = json.Unmarshal(b, resp); err != nil {
err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err)
return
var resp apimodel.WellKnownResponse
if err := json.Unmarshal(b, &resp); err != nil {
err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err)
return "", nil, err
}
if len(resp.Links) == 0 {
err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b))
return
err = gtserror.Newf("no links found in response for %s", target)
return "", nil, err
}
if resp.Subject == "" {
err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b))
return
err = gtserror.Newf("no subject found in response for %s", target)
return "", nil, err
}
_, accountDomain, err = util.ExtractWebfingerParts(resp.Subject)
_, accountDomain, err := util.ExtractWebfingerParts(resp.Subject)
if err != nil {
err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err)
err = gtserror.Newf("error extracting subject parts for %s: %w", target, err)
return "", nil, err
}
// look through the links for the first one that matches what we need
for _, l := range resp.Links {
if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) {
if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") {
// found it!
accountURI = uri
return
}
// Look through links for the first
// one that matches what we need:
//
// - Must be self link.
// - Must be AP type.
// - Valid https/http URI.
for _, link := range resp.Links {
if link.Rel != "self" {
// Not self link, ignore.
continue
}
if !strings.EqualFold(link.Type, "application/activity+json") &&
!strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") {
// Not an AP type, ignore.
continue
}
uri, err := url.Parse(link.Href)
if err != nil {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: %v",
link.Href, target, err,
)
// Funky URL, ignore.
continue
}
if uri.Scheme != "http" && uri.Scheme != "https" {
log.Warnf(ctx,
"invalid href for ActivityPub self link %s for %s: schema must be http or https",
link.Href, target,
)
// Can't handle this
// schema, ignore.
continue
}
// All looks good, return happily!
return accountDomain, uri, nil
}
return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response")
return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target)
}

View file

@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely(
apubStatus,
)
if gtserror.StatusCode(err) >= 400 {
// Update fetch-at to slow re-attempts.
if code := gtserror.StatusCode(err); code >= 400 {
// No matter what, log the error
// so instance admins have an idea
// why something isn't working.
log.Info(ctx, err)
if isNew {
// This was a new status enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil, nil, isNew, err
}
// We had this status stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
status.FetchedAt = time.Now()
_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
log.Errorf(ctx, "error updating status fetched_at: %v", err)
}
}
// Unlock now
@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus(
// Dereference latest version of the status.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
err := gtserror.Newf("error deferencing %s: %w", uri, err)
err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
}
// Use existing status ID.
latestStatus.ID = status.ID
if latestStatus.ID == "" {
// Generate new status ID from the provided creation date.
// Check if we've previously
// stored this status in the DB.
// If we have, it'll be ID'd.
var isNew = (status.ID == "")
if isNew {
// No ID, we haven't stored this status before.
// Generate new status ID from the status publication time.
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
if err != nil {
log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)
latestStatus.ID = id.NewULID() // just use "now"
}
} else {
// Reuse existing status ID.
latestStatus.ID = status.ID
}
// Carry-over values and set fetch time.
@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)
}
if status.CreatedAt.IsZero() {
// CreatedAt will be zero if no local copy was
// found in one of the GetStatusBy___() functions.
//
if isNew {
// This is new, put the status in the database.
err := d.state.DB.PutStatus(ctx, latestStatus)
if err != nil {