Dereference remote replies (#132)

* decided where to put reply dereferencing

* fiddling with dereferencing threads

* further adventures

* tidy up some stuff

* move dereferencing functionality

* a bunch of refactoring

* go fmt

* more refactoring

* bleep bloop

* docs and linting

* start implementing replies collection on gts side

* fiddling around

* allow dereferencing our replies

* lint, fmt
This commit is contained in:
Tobi Smethurst 2021-08-10 13:32:39 +02:00 committed by GitHub
commit 0f2de6394a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
68 changed files with 2946 additions and 1393 deletions

View file

@ -0,0 +1,243 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"github.com/go-fed/activity/streams"
"github.com/go-fed/activity/streams/vocab"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/transport"
)
// EnrichRemoteAccount takes an account that's already been inserted into the database in a minimal form,
// and populates it with additional fields, media, etc.
//
// EnrichRemoteAccount is mostly useful for calling after an account has been initially created by
// the federatingDB's Create function, or during the federated authorization flow.
func (d *deref) EnrichRemoteAccount(username string, account *gtsmodel.Account) (*gtsmodel.Account, error) {
if err := d.populateAccountFields(account, username, false); err != nil {
return nil, err
}
if err := d.db.UpdateByID(account.ID, account); err != nil {
return nil, fmt.Errorf("EnrichRemoteAccount: error updating account: %s", err)
}
return account, nil
}
// GetRemoteAccount completely dereferences a remote account, converts it to a GtS model account,
// puts it in the database, and returns it to a caller. The boolean indicates whether the account is new
// to us or not. If we haven't seen the account before, bool will be true. If we have seen the account before,
// it will be false.
//
// Refresh indicates whether--if the account exists in our db already--it should be refreshed by calling
// the remote instance again.
//
// SIDE EFFECTS: remote account will be stored in the database, or updated if it already exists (and refresh is true).
func (d *deref) GetRemoteAccount(username string, remoteAccountID *url.URL, refresh bool) (*gtsmodel.Account, bool, error) {
new := true
// check if we already have the account in our db
maybeAccount := &gtsmodel.Account{}
if err := d.db.GetWhere([]db.Where{{Key: "uri", Value: remoteAccountID.String()}}, maybeAccount); err == nil {
// we've seen this account before so it's not new
new = false
// if we're not being asked to refresh, we can just return the maybeAccount as-is and avoid doing any external calls
if !refresh {
return maybeAccount, new, nil
}
}
accountable, err := d.dereferenceAccountable(username, remoteAccountID)
if err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error dereferencing accountable: %s", err)
}
gtsAccount, err := d.typeConverter.ASRepresentationToAccount(accountable, false)
if err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error converting accountable to account: %s", err)
}
if new {
// generate a new id since we haven't seen this account before, and do a put
ulid, err := id.NewRandomULID()
if err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error generating new id for account: %s", err)
}
gtsAccount.ID = ulid
if err := d.populateAccountFields(gtsAccount, username, refresh); err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error populating further account fields: %s", err)
}
if err := d.db.Put(gtsAccount); err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error putting new account: %s", err)
}
} else {
// take the id we already have and do an update
gtsAccount.ID = maybeAccount.ID
if err := d.populateAccountFields(gtsAccount, username, refresh); err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error populating further account fields: %s", err)
}
if err := d.db.UpdateByID(gtsAccount.ID, gtsAccount); err != nil {
return nil, new, fmt.Errorf("FullyDereferenceAccount: error updating existing account: %s", err)
}
}
return gtsAccount, new, nil
}
// dereferenceAccountable calls remoteAccountID with a GET request, and tries to parse whatever
// it finds as something that an account model can be constructed out of.
//
// Will work for Person, Application, or Service models.
func (d *deref) dereferenceAccountable(username string, remoteAccountID *url.URL) (ap.Accountable, error) {
d.startHandshake(username, remoteAccountID)
defer d.stopHandshake(username, remoteAccountID)
if blocked, err := d.blockedDomain(remoteAccountID.Host); blocked || err != nil {
return nil, fmt.Errorf("DereferenceAccountable: domain %s is blocked", remoteAccountID.Host)
}
transport, err := d.transportController.NewTransportForUsername(username)
if err != nil {
return nil, fmt.Errorf("DereferenceAccountable: transport err: %s", err)
}
b, err := transport.Dereference(context.Background(), remoteAccountID)
if err != nil {
return nil, fmt.Errorf("DereferenceAccountable: error deferencing %s: %s", remoteAccountID.String(), err)
}
m := make(map[string]interface{})
if err := json.Unmarshal(b, &m); err != nil {
return nil, fmt.Errorf("DereferenceAccountable: error unmarshalling bytes into json: %s", err)
}
t, err := streams.ToType(context.Background(), m)
if err != nil {
return nil, fmt.Errorf("DereferenceAccountable: error resolving json into ap vocab type: %s", err)
}
switch t.GetTypeName() {
case string(gtsmodel.ActivityStreamsPerson):
p, ok := t.(vocab.ActivityStreamsPerson)
if !ok {
return nil, errors.New("DereferenceAccountable: error resolving type as activitystreams person")
}
return p, nil
case string(gtsmodel.ActivityStreamsApplication):
p, ok := t.(vocab.ActivityStreamsApplication)
if !ok {
return nil, errors.New("DereferenceAccountable: error resolving type as activitystreams application")
}
return p, nil
case string(gtsmodel.ActivityStreamsService):
p, ok := t.(vocab.ActivityStreamsService)
if !ok {
return nil, errors.New("DereferenceAccountable: error resolving type as activitystreams service")
}
return p, nil
}
return nil, fmt.Errorf("DereferenceAccountable: type name %s not supported", t.GetTypeName())
}
// populateAccountFields populates any fields on the given account that weren't populated by the initial
// dereferencing. This includes things like header and avatar etc.
func (d *deref) populateAccountFields(account *gtsmodel.Account, requestingUsername string, refresh bool) error {
l := d.log.WithFields(logrus.Fields{
"func": "PopulateAccountFields",
"requestingUsername": requestingUsername,
})
accountURI, err := url.Parse(account.URI)
if err != nil {
return fmt.Errorf("PopulateAccountFields: couldn't parse account URI %s: %s", account.URI, err)
}
if blocked, err := d.blockedDomain(accountURI.Host); blocked || err != nil {
return fmt.Errorf("PopulateAccountFields: domain %s is blocked", accountURI.Host)
}
t, err := d.transportController.NewTransportForUsername(requestingUsername)
if err != nil {
return fmt.Errorf("PopulateAccountFields: error getting transport for user: %s", err)
}
// fetch the header and avatar
if err := d.fetchHeaderAndAviForAccount(account, t, refresh); err != nil {
// if this doesn't work, just skip it -- we can do it later
l.Debugf("error fetching header/avi for account: %s", err)
}
return nil
}
// fetchHeaderAndAviForAccount fetches the header and avatar for a remote account, using a transport
// on behalf of requestingUsername.
//
// targetAccount's AvatarMediaAttachmentID and HeaderMediaAttachmentID will be updated as necessary.
//
// SIDE EFFECTS: remote header and avatar will be stored in local storage.
func (d *deref) fetchHeaderAndAviForAccount(targetAccount *gtsmodel.Account, t transport.Transport, refresh bool) error {
accountURI, err := url.Parse(targetAccount.URI)
if err != nil {
return fmt.Errorf("fetchHeaderAndAviForAccount: couldn't parse account URI %s: %s", targetAccount.URI, err)
}
if blocked, err := d.blockedDomain(accountURI.Host); blocked || err != nil {
return fmt.Errorf("fetchHeaderAndAviForAccount: domain %s is blocked", accountURI.Host)
}
if targetAccount.AvatarRemoteURL != "" && (targetAccount.AvatarMediaAttachmentID == "" || refresh) {
a, err := d.mediaHandler.ProcessRemoteHeaderOrAvatar(t, &gtsmodel.MediaAttachment{
RemoteURL: targetAccount.AvatarRemoteURL,
Avatar: true,
}, targetAccount.ID)
if err != nil {
return fmt.Errorf("error processing avatar for user: %s", err)
}
targetAccount.AvatarMediaAttachmentID = a.ID
}
if targetAccount.HeaderRemoteURL != "" && (targetAccount.HeaderMediaAttachmentID == "" || refresh) {
a, err := d.mediaHandler.ProcessRemoteHeaderOrAvatar(t, &gtsmodel.MediaAttachment{
RemoteURL: targetAccount.HeaderRemoteURL,
Header: true,
}, targetAccount.ID)
if err != nil {
return fmt.Errorf("error processing header for user: %s", err)
}
targetAccount.HeaderMediaAttachmentID = a.ID
}
return nil
}

View file

@ -0,0 +1,65 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"errors"
"fmt"
"net/url"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (d *deref) DereferenceAnnounce(announce *gtsmodel.Status, requestingUsername string) error {
if announce.GTSBoostedStatus == nil || announce.GTSBoostedStatus.URI == "" {
// we can't do anything unfortunately
return errors.New("DereferenceAnnounce: no URI to dereference")
}
boostedStatusURI, err := url.Parse(announce.GTSBoostedStatus.URI)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: couldn't parse boosted status URI %s: %s", announce.GTSBoostedStatus.URI, err)
}
if blocked, err := d.blockedDomain(boostedStatusURI.Host); blocked || err != nil {
return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedStatusURI.Host)
}
// dereference statuses in the thread of the boosted status
if err := d.DereferenceThread(requestingUsername, boostedStatusURI); err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing thread of boosted status: %s", err)
}
boostedStatus, _, _, err := d.GetRemoteStatus(requestingUsername, boostedStatusURI, false)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.GTSBoostedStatus.URI, err)
}
announce.Content = boostedStatus.Content
announce.ContentWarning = boostedStatus.ContentWarning
announce.ActivityStreamsType = boostedStatus.ActivityStreamsType
announce.Sensitive = boostedStatus.Sensitive
announce.Language = boostedStatus.Language
announce.Text = boostedStatus.Text
announce.BoostOfID = boostedStatus.ID
announce.BoostOfAccountID = boostedStatus.AccountID
announce.Visibility = boostedStatus.Visibility
announce.VisibilityAdvanced = boostedStatus.VisibilityAdvanced
announce.GTSBoostedStatus = boostedStatus
return nil
}

View file

@ -0,0 +1,41 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (d *deref) blockedDomain(host string) (bool, error) {
b := &gtsmodel.DomainBlock{}
err := d.db.GetWhere([]db.Where{{Key: "domain", Value: host, CaseInsensitive: true}}, b)
if err == nil {
// block exists
return true, nil
}
if _, ok := err.(db.ErrNoEntries); ok {
// there are no entries so there's no block
return false, nil
}
// there's an actual error
return false, err
}

View file

@ -0,0 +1,70 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"github.com/go-fed/activity/streams"
"github.com/go-fed/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
// DereferenceCollectionPage returns the activitystreams CollectionPage at the specified IRI, or an error if something goes wrong.
func (d *deref) DereferenceCollectionPage(username string, pageIRI *url.URL) (ap.CollectionPageable, error) {
if blocked, err := d.blockedDomain(pageIRI.Host); blocked || err != nil {
return nil, fmt.Errorf("DereferenceCollectionPage: domain %s is blocked", pageIRI.Host)
}
transport, err := d.transportController.NewTransportForUsername(username)
if err != nil {
return nil, fmt.Errorf("DereferenceCollectionPage: error creating transport: %s", err)
}
b, err := transport.Dereference(context.Background(), pageIRI)
if err != nil {
return nil, fmt.Errorf("DereferenceCollectionPage: error deferencing %s: %s", pageIRI.String(), err)
}
m := make(map[string]interface{})
if err := json.Unmarshal(b, &m); err != nil {
return nil, fmt.Errorf("DereferenceCollectionPage: error unmarshalling bytes into json: %s", err)
}
t, err := streams.ToType(context.Background(), m)
if err != nil {
return nil, fmt.Errorf("DereferenceCollectionPage: error resolving json into ap vocab type: %s", err)
}
if t.GetTypeName() != gtsmodel.ActivityStreamsCollectionPage {
return nil, fmt.Errorf("DereferenceCollectionPage: type name %s not supported", t.GetTypeName())
}
p, ok := t.(vocab.ActivityStreamsCollectionPage)
if !ok {
return nil, errors.New("DereferenceCollectionPage: error resolving type as activitystreams collection page")
}
return p, nil
}

View file

@ -0,0 +1,73 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"net/url"
"sync"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/typeutils"
)
// Dereferencer wraps logic and functionality for doing dereferencing of remote accounts, statuses, etc, from federated instances.
type Dereferencer interface {
GetRemoteAccount(username string, remoteAccountID *url.URL, refresh bool) (*gtsmodel.Account, bool, error)
EnrichRemoteAccount(username string, account *gtsmodel.Account) (*gtsmodel.Account, error)
GetRemoteStatus(username string, remoteStatusID *url.URL, refresh bool) (*gtsmodel.Status, ap.Statusable, bool, error)
EnrichRemoteStatus(username string, status *gtsmodel.Status) (*gtsmodel.Status, error)
GetRemoteInstance(username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error)
DereferenceAnnounce(announce *gtsmodel.Status, requestingUsername string) error
DereferenceThread(username string, statusIRI *url.URL) error
Handshaking(username string, remoteAccountID *url.URL) bool
}
type deref struct {
log *logrus.Logger
db db.DB
typeConverter typeutils.TypeConverter
transportController transport.Controller
mediaHandler media.Handler
config *config.Config
handshakes map[string][]*url.URL
handshakeSync *sync.Mutex // mutex to lock/unlock when checking or updating the handshakes map
}
// NewDereferencer returns a Dereferencer initialized with the given parameters.
func NewDereferencer(config *config.Config, db db.DB, typeConverter typeutils.TypeConverter, transportController transport.Controller, mediaHandler media.Handler, log *logrus.Logger) Dereferencer {
return &deref{
log: log,
db: db,
typeConverter: typeConverter,
transportController: transportController,
mediaHandler: mediaHandler,
config: config,
handshakeSync: &sync.Mutex{},
}
}

View file

@ -0,0 +1,98 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import "net/url"
func (d *deref) Handshaking(username string, remoteAccountID *url.URL) bool {
d.handshakeSync.Lock()
defer d.handshakeSync.Unlock()
if d.handshakes == nil {
// handshakes isn't even initialized yet so we can't be handshaking with anyone
return false
}
remoteIDs, ok := d.handshakes[username]
if !ok {
// user isn't handshaking with anyone, bail
return false
}
for _, id := range remoteIDs {
if id.String() == remoteAccountID.String() {
// we are currently handshaking with the remote account, yep
return true
}
}
// didn't find it which means we're not handshaking
return false
}
func (d *deref) startHandshake(username string, remoteAccountID *url.URL) {
d.handshakeSync.Lock()
defer d.handshakeSync.Unlock()
// lazily initialize handshakes
if d.handshakes == nil {
d.handshakes = make(map[string][]*url.URL)
}
remoteIDs, ok := d.handshakes[username]
if !ok {
// there was nothing in there yet, so just add this entry and return
d.handshakes[username] = []*url.URL{remoteAccountID}
return
}
// add the remote ID to the slice
remoteIDs = append(remoteIDs, remoteAccountID)
d.handshakes[username] = remoteIDs
}
func (d *deref) stopHandshake(username string, remoteAccountID *url.URL) {
d.handshakeSync.Lock()
defer d.handshakeSync.Unlock()
if d.handshakes == nil {
return
}
remoteIDs, ok := d.handshakes[username]
if !ok {
// there was nothing in there yet anyway so just bail
return
}
newRemoteIDs := []*url.URL{}
for _, id := range remoteIDs {
if id.String() != remoteAccountID.String() {
newRemoteIDs = append(newRemoteIDs, id)
}
}
if len(newRemoteIDs) == 0 {
// there are no handshakes so just remove this user entry from the map and save a few bytes
delete(d.handshakes, username)
} else {
// there are still other handshakes ongoing
d.handshakes[username] = newRemoteIDs
}
}

View file

@ -0,0 +1,40 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"context"
"fmt"
"net/url"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (d *deref) GetRemoteInstance(username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error) {
if blocked, err := d.blockedDomain(remoteInstanceURI.Host); blocked || err != nil {
return nil, fmt.Errorf("GetRemoteInstance: domain %s is blocked", remoteInstanceURI.Host)
}
transport, err := d.transportController.NewTransportForUsername(username)
if err != nil {
return nil, fmt.Errorf("transport err: %s", err)
}
return transport.DereferenceInstance(context.Background(), remoteInstanceURI)
}

View file

@ -0,0 +1,369 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"github.com/go-fed/activity/streams"
"github.com/go-fed/activity/streams/vocab"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
)
// EnrichRemoteStatus takes a status that's already been inserted into the database in a minimal form,
// and populates it with additional fields, media, etc.
//
// EnrichRemoteStatus is mostly useful for calling after a status has been initially created by
// the federatingDB's Create function, but additional dereferencing is needed on it.
func (d *deref) EnrichRemoteStatus(username string, status *gtsmodel.Status) (*gtsmodel.Status, error) {
if err := d.populateStatusFields(status, username); err != nil {
return nil, err
}
if err := d.db.UpdateByID(status.ID, status); err != nil {
return nil, fmt.Errorf("EnrichRemoteStatus: error updating status: %s", err)
}
return status, nil
}
// GetRemoteStatus completely dereferences a remote status, converts it to a GtS model status,
// puts it in the database, and returns it to a caller. The boolean indicates whether the status is new
// to us or not. If we haven't seen the status before, bool will be true. If we have seen the status before,
// it will be false.
//
// If refresh is true, then even if we have the status in our database already, it will be dereferenced from its
// remote representation, as will its owner.
//
// If a dereference was performed, then the function also returns the ap.Statusable representation for further processing.
//
// SIDE EFFECTS: remote status will be stored in the database, and the remote status owner will also be stored.
func (d *deref) GetRemoteStatus(username string, remoteStatusID *url.URL, refresh bool) (*gtsmodel.Status, ap.Statusable, bool, error) {
new := true
// check if we already have the status in our db
maybeStatus := &gtsmodel.Status{}
if err := d.db.GetWhere([]db.Where{{Key: "uri", Value: remoteStatusID.String()}}, maybeStatus); err == nil {
// we've seen this status before so it's not new
new = false
// if we're not being asked to refresh, we can just return the maybeStatus as-is and avoid doing any external calls
if !refresh {
return maybeStatus, nil, new, nil
}
}
statusable, err := d.dereferenceStatusable(username, remoteStatusID)
if err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error dereferencing statusable: %s", err)
}
accountURI, err := ap.ExtractAttributedTo(statusable)
if err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error extracting attributedTo: %s", err)
}
// do this so we know we have the remote account of the status in the db
_, _, err = d.GetRemoteAccount(username, accountURI, false)
if err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: couldn't derive status author: %s", err)
}
gtsStatus, err := d.typeConverter.ASStatusToStatus(statusable)
if err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error converting statusable to status: %s", err)
}
if new {
ulid, err := id.NewULIDFromTime(gtsStatus.CreatedAt)
if err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error generating new id for status: %s", err)
}
gtsStatus.ID = ulid
if err := d.populateStatusFields(gtsStatus, username); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
if err := d.db.Put(gtsStatus); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error putting new status: %s", err)
}
} else {
gtsStatus.ID = maybeStatus.ID
if err := d.populateStatusFields(gtsStatus, username); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
if err := d.db.UpdateByID(gtsStatus.ID, gtsStatus); err != nil {
return nil, statusable, new, fmt.Errorf("GetRemoteStatus: error updating status: %s", err)
}
}
return gtsStatus, statusable, new, nil
}
func (d *deref) dereferenceStatusable(username string, remoteStatusID *url.URL) (ap.Statusable, error) {
if blocked, err := d.blockedDomain(remoteStatusID.Host); blocked || err != nil {
return nil, fmt.Errorf("DereferenceStatusable: domain %s is blocked", remoteStatusID.Host)
}
transport, err := d.transportController.NewTransportForUsername(username)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: transport err: %s", err)
}
b, err := transport.Dereference(context.Background(), remoteStatusID)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error deferencing %s: %s", remoteStatusID.String(), err)
}
m := make(map[string]interface{})
if err := json.Unmarshal(b, &m); err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error unmarshalling bytes into json: %s", err)
}
t, err := streams.ToType(context.Background(), m)
if err != nil {
return nil, fmt.Errorf("DereferenceStatusable: error resolving json into ap vocab type: %s", err)
}
// Article, Document, Image, Video, Note, Page, Event, Place, Mention, Profile
switch t.GetTypeName() {
case gtsmodel.ActivityStreamsArticle:
p, ok := t.(vocab.ActivityStreamsArticle)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsArticle")
}
return p, nil
case gtsmodel.ActivityStreamsDocument:
p, ok := t.(vocab.ActivityStreamsDocument)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsDocument")
}
return p, nil
case gtsmodel.ActivityStreamsImage:
p, ok := t.(vocab.ActivityStreamsImage)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsImage")
}
return p, nil
case gtsmodel.ActivityStreamsVideo:
p, ok := t.(vocab.ActivityStreamsVideo)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsVideo")
}
return p, nil
case gtsmodel.ActivityStreamsNote:
p, ok := t.(vocab.ActivityStreamsNote)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsNote")
}
return p, nil
case gtsmodel.ActivityStreamsPage:
p, ok := t.(vocab.ActivityStreamsPage)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsPage")
}
return p, nil
case gtsmodel.ActivityStreamsEvent:
p, ok := t.(vocab.ActivityStreamsEvent)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsEvent")
}
return p, nil
case gtsmodel.ActivityStreamsPlace:
p, ok := t.(vocab.ActivityStreamsPlace)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsPlace")
}
return p, nil
case gtsmodel.ActivityStreamsProfile:
p, ok := t.(vocab.ActivityStreamsProfile)
if !ok {
return nil, errors.New("DereferenceStatusable: error resolving type as ActivityStreamsProfile")
}
return p, nil
}
return nil, fmt.Errorf("DereferenceStatusable: type name %s not supported", t.GetTypeName())
}
// populateStatusFields fetches all the information we temporarily pinned to an incoming
// federated status, back in the federating db's Create function.
//
// When a status comes in from the federation API, there are certain fields that
// haven't been dereferenced yet, because we needed to provide a snappy synchronous
// response to the caller. By the time it reaches this function though, it's being
// processed asynchronously, so we have all the time in the world to fetch the various
// bits and bobs that are attached to the status, and properly flesh it out, before we
// send the status to any timelines and notify people.
//
// Things to dereference and fetch here:
//
// 1. Media attachments.
// 2. Hashtags.
// 3. Emojis.
// 4. Mentions.
// 5. Posting account.
// 6. Replied-to-status.
//
// SIDE EFFECTS:
// This function will deference all of the above, insert them in the database as necessary,
// and attach them to the status. The status itself will not be added to the database yet,
// that's up the caller to do.
func (d *deref) populateStatusFields(status *gtsmodel.Status, requestingUsername string) error {
l := d.log.WithFields(logrus.Fields{
"func": "dereferenceStatusFields",
"status": fmt.Sprintf("%+v", status),
})
l.Debug("entering function")
// make sure we have a status URI and that the domain in question isn't blocked
statusURI, err := url.Parse(status.URI)
if err != nil {
return fmt.Errorf("DereferenceStatusFields: couldn't parse status URI %s: %s", status.URI, err)
}
if blocked, err := d.blockedDomain(statusURI.Host); blocked || err != nil {
return fmt.Errorf("DereferenceStatusFields: domain %s is blocked", statusURI.Host)
}
// we can continue -- create a new transport here because we'll probably need it
t, err := d.transportController.NewTransportForUsername(requestingUsername)
if err != nil {
return fmt.Errorf("error creating transport: %s", err)
}
// in case the status doesn't have an id yet (ie., it hasn't entered the database yet), then create one
if status.ID == "" {
newID, err := id.NewULIDFromTime(status.CreatedAt)
if err != nil {
return err
}
status.ID = newID
}
// 1. Media attachments.
//
// At this point we should know:
// * the media type of the file we're looking for (a.File.ContentType)
// * the blurhash (a.Blurhash)
// * the file type (a.Type)
// * the remote URL (a.RemoteURL)
// This should be enough to pass along to the media processor.
attachmentIDs := []string{}
for _, a := range status.GTSMediaAttachments {
l.Tracef("dereferencing attachment: %+v", a)
// it might have been processed elsewhere so check first if it's already in the database or not
maybeAttachment := &gtsmodel.MediaAttachment{}
err := d.db.GetWhere([]db.Where{{Key: "remote_url", Value: a.RemoteURL}}, maybeAttachment)
if err == nil {
// we already have it in the db, dereferenced, no need to do it again
l.Tracef("attachment already exists with id %s", maybeAttachment.ID)
attachmentIDs = append(attachmentIDs, maybeAttachment.ID)
continue
}
if _, ok := err.(db.ErrNoEntries); !ok {
// we have a real error
return fmt.Errorf("error checking db for existence of attachment with remote url %s: %s", a.RemoteURL, err)
}
// it just doesn't exist yet so carry on
l.Debug("attachment doesn't exist yet, calling ProcessRemoteAttachment", a)
deferencedAttachment, err := d.mediaHandler.ProcessRemoteAttachment(t, a, status.AccountID)
if err != nil {
l.Errorf("error dereferencing status attachment: %s", err)
continue
}
l.Debugf("dereferenced attachment: %+v", deferencedAttachment)
deferencedAttachment.StatusID = status.ID
deferencedAttachment.Description = a.Description
if err := d.db.Put(deferencedAttachment); err != nil {
return fmt.Errorf("error inserting dereferenced attachment with remote url %s: %s", a.RemoteURL, err)
}
attachmentIDs = append(attachmentIDs, deferencedAttachment.ID)
}
status.Attachments = attachmentIDs
// 2. Hashtags
// 3. Emojis
// 4. Mentions
// At this point, mentions should have the namestring and mentionedAccountURI set on them.
//
// We should dereference any accounts mentioned here which we don't have in our db yet, by their URI.
mentions := []string{}
for _, m := range status.GTSMentions {
if m.ID != "" {
continue
// we've already populated this mention, since it has an ID
}
mID, err := id.NewRandomULID()
if err != nil {
return err
}
m.ID = mID
uri, err := url.Parse(m.MentionedAccountURI)
if err != nil {
l.Debugf("error parsing mentioned account uri %s: %s", m.MentionedAccountURI, err)
continue
}
m.StatusID = status.ID
m.OriginAccountID = status.GTSAuthorAccount.ID
m.OriginAccountURI = status.GTSAuthorAccount.URI
targetAccount, _, err := d.GetRemoteAccount(requestingUsername, uri, false)
if err != nil {
continue
}
// by this point, we know the targetAccount exists in our database with an ID :)
m.TargetAccountID = targetAccount.ID
if err := d.db.Put(m); err != nil {
return fmt.Errorf("error creating mention: %s", err)
}
mentions = append(mentions, m.ID)
}
status.Mentions = mentions
// status has replyToURI but we don't have an ID yet for the status it replies to
if status.InReplyToURI != "" && status.InReplyToID == "" {
replyToStatus := &gtsmodel.Status{}
if err := d.db.GetWhere([]db.Where{{Key: "uri", Value: status.InReplyToURI}}, replyToStatus); err == nil {
// we have the status
status.InReplyToID = replyToStatus.ID
status.InReplyToAccountID = replyToStatus.AccountID
}
}
return nil
}

View file

@ -0,0 +1,250 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing
import (
"fmt"
"net/url"
"github.com/sirupsen/logrus"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),
// and dereferences statusables in the conversation.
//
// This process involves working up and down the chain of replies, and parsing through the collections of IDs
// presented by remote instances as part of their replies collections, and will likely involve making several calls to
// multiple different hosts.
func (d *deref) DereferenceThread(username string, statusIRI *url.URL) error {
l := d.log.WithFields(logrus.Fields{
"func": "DereferenceThread",
"username": username,
"statusIRI": statusIRI.String(),
})
l.Debug("entering DereferenceThread")
// if it's our status we already have everything stashed so we can bail early
if statusIRI.Host == d.config.Host {
l.Debug("iri belongs to us, bailing")
return nil
}
// first make sure we have this status in our db
_, statusable, _, err := d.GetRemoteStatus(username, statusIRI, true)
if err != nil {
return fmt.Errorf("DereferenceThread: error getting status with id %s: %s", statusIRI.String(), err)
}
// first iterate up through ancestors, dereferencing if necessary as we go
if err := d.iterateAncestors(username, *statusIRI); err != nil {
return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err)
}
// now iterate down through descendants, again dereferencing as we go
if err := d.iterateDescendants(username, *statusIRI, statusable); err != nil {
return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err)
}
return nil
}
// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func (d *deref) iterateAncestors(username string, statusIRI url.URL) error {
l := d.log.WithFields(logrus.Fields{
"func": "iterateAncestors",
"username": username,
"statusIRI": statusIRI.String(),
})
l.Debug("entering iterateAncestors")
// if it's our status we don't need to dereference anything so we can immediately move up the chain
if statusIRI.Host == d.config.Host {
l.Debug("iri belongs to us, moving up to next ancestor")
// since this is our status, we know we can extract the id from the status path
_, id, err := util.ParseStatusesPath(&statusIRI)
if err != nil {
return err
}
status := &gtsmodel.Status{}
if err := d.db.GetByID(id, status); err != nil {
return err
}
if status.InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
nextIRI, err := url.Parse(status.URI)
if err != nil {
return err
}
return d.iterateAncestors(username, *nextIRI)
}
// If we reach here, we're looking at a remote status -- make sure we have it in our db by calling GetRemoteStatus
// We call it with refresh to true because we want the statusable representation to parse inReplyTo from.
status, statusable, _, err := d.GetRemoteStatus(username, &statusIRI, true)
if err != nil {
l.Debugf("error getting remote status: %s", err)
return nil
}
inReplyTo := ap.ExtractInReplyToURI(statusable)
if inReplyTo == nil || inReplyTo.String() == "" {
// status doesn't reply to anything
return nil
}
// get the ancestor status into our database if we don't have it yet
if _, _, _, err := d.GetRemoteStatus(username, inReplyTo, false); err != nil {
l.Debugf("error getting remote status: %s", err)
return nil
}
// now enrich the current status, since we should have the ancestor in the db
if _, err := d.EnrichRemoteStatus(username, status); err != nil {
l.Debugf("error enriching remote status: %s", err)
return nil
}
// now move up to the next ancestor
return d.iterateAncestors(username, *inReplyTo)
}
func (d *deref) iterateDescendants(username string, statusIRI url.URL, statusable ap.Statusable) error {
l := d.log.WithFields(logrus.Fields{
"func": "iterateDescendants",
"username": username,
"statusIRI": statusIRI.String(),
})
l.Debug("entering iterateDescendants")
// if it's our status we already have descendants stashed so we can bail early
if statusIRI.Host == d.config.Host {
l.Debug("iri belongs to us, bailing")
return nil
}
replies := statusable.GetActivityStreamsReplies()
if replies == nil || !replies.IsActivityStreamsCollection() {
l.Debug("no replies, bailing")
return nil
}
repliesCollection := replies.GetActivityStreamsCollection()
if repliesCollection == nil {
l.Debug("replies collection is nil, bailing")
return nil
}
first := repliesCollection.GetActivityStreamsFirst()
if first == nil {
l.Debug("replies collection has no first, bailing")
return nil
}
firstPage := first.GetActivityStreamsCollectionPage()
if firstPage == nil {
l.Debug("first has no collection page, bailing")
return nil
}
firstPageNext := firstPage.GetActivityStreamsNext()
if firstPageNext == nil || !firstPageNext.IsIRI() {
l.Debug("next is not an iri, bailing")
return nil
}
var foundReplies int
currentPageIRI := firstPageNext.GetIRI()
pageLoop:
for {
l.Debugf("dereferencing page %s", currentPageIRI)
nextPage, err := d.DereferenceCollectionPage(username, currentPageIRI)
if err != nil {
return nil
}
// next items could be either a list of URLs or a list of statuses
nextItems := nextPage.GetActivityStreamsItems()
if nextItems.Len() == 0 {
// no items on this page, which means we're done
break pageLoop
}
// have a look through items and see what we can find
for iter := nextItems.Begin(); iter != nextItems.End(); iter = iter.Next() {
// We're looking for a url to feed to GetRemoteStatus.
// Items can be either an IRI, or a Note.
// If a note, we grab the ID from it and call it, rather than parsing the note.
var itemURI *url.URL
if iter.IsIRI() {
// iri, easy
itemURI = iter.GetIRI()
} else if iter.IsActivityStreamsNote() {
// note, get the id from it to use as iri
n := iter.GetActivityStreamsNote()
id := n.GetJSONLDId()
if id != nil && id.IsIRI() {
itemURI = id.GetIRI()
}
} else {
// if it's not an iri or a note, we don't know how to process it
continue
}
if itemURI.Host == d.config.Host {
// skip if the reply is from us -- we already have it then
continue
}
// we can confidently say now that we found something
foundReplies = foundReplies + 1
// get the remote statusable and put it in the db
_, statusable, new, err := d.GetRemoteStatus(username, itemURI, false)
if new && err == nil && statusable != nil {
// now iterate descendants of *that* status
if err := d.iterateDescendants(username, *itemURI, statusable); err != nil {
continue
}
}
}
next := nextPage.GetActivityStreamsNext()
if next != nil && next.IsIRI() {
l.Debug("setting next page")
currentPageIRI = next.GetIRI()
} else {
l.Debug("no next page, bailing")
break pageLoop
}
}
l.Debugf("foundReplies %d", foundReplies)
return nil
}