rework media processing a little bit (#191)

* rework media processing a little bit

* review changes
This commit is contained in:
tobi 2021-09-04 14:02:01 +02:00 committed by GitHub
commit 2b14b20802
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 486 additions and 166 deletions

View file

@ -28,17 +28,23 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, statusID string, expectedContentType string) (*gtsmodel.MediaAttachment, error) {
func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error) {
if minAttachment.RemoteURL == "" {
return nil, fmt.Errorf("GetRemoteAttachment: minAttachment remote URL was empty")
}
remoteAttachmentURL := minAttachment.RemoteURL
l := d.log.WithFields(logrus.Fields{
"username": requestingUsername,
"remoteAttachmentURI": remoteAttachmentURI,
"remoteAttachmentURL": remoteAttachmentURL,
})
// return early if we already have the attachment somewhere
maybeAttachment := &gtsmodel.MediaAttachment{}
where := []db.Where{
{
Key: "remote_url",
Value: remoteAttachmentURI.String(),
Value: remoteAttachmentURL,
},
}
@ -48,12 +54,11 @@ func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername stri
return maybeAttachment, nil
}
a, err := d.RefreshAttachment(ctx, requestingUsername, remoteAttachmentURI, ownerAccountID, expectedContentType)
a, err := d.RefreshAttachment(ctx, requestingUsername, minAttachment)
if err != nil {
return nil, fmt.Errorf("GetRemoteAttachment: error refreshing attachment: %s", err)
}
a.StatusID = statusID
if err := d.db.Put(ctx, a); err != nil {
if err != db.ErrAlreadyExists {
return nil, fmt.Errorf("GetRemoteAttachment: error inserting attachment: %s", err)
@ -63,19 +68,32 @@ func (d *deref) GetRemoteAttachment(ctx context.Context, requestingUsername stri
return a, nil
}
func (d *deref) RefreshAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, expectedContentType string) (*gtsmodel.MediaAttachment, error) {
func (d *deref) RefreshAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error) {
// it just doesn't exist or we have to refresh
if minAttachment.AccountID == "" {
return nil, fmt.Errorf("RefreshAttachment: minAttachment account ID was empty")
}
if minAttachment.File.ContentType == "" {
return nil, fmt.Errorf("RefreshAttachment: minAttachment.file.contentType was empty")
}
t, err := d.transportController.NewTransportForUsername(ctx, requestingUsername)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error creating transport: %s", err)
}
attachmentBytes, err := t.DereferenceMedia(ctx, remoteAttachmentURI, expectedContentType)
derefURI, err := url.Parse(minAttachment.RemoteURL)
if err != nil {
return nil, err
}
attachmentBytes, err := t.DereferenceMedia(ctx, derefURI, minAttachment.File.ContentType)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error dereferencing media: %s", err)
}
a, err := d.mediaHandler.ProcessAttachment(ctx, attachmentBytes, ownerAccountID, remoteAttachmentURI.String())
a, err := d.mediaHandler.ProcessAttachment(ctx, attachmentBytes, minAttachment)
if err != nil {
return nil, fmt.Errorf("RefreshAttachment: error processing attachment: %s", err)
}

View file

@ -0,0 +1,106 @@
/*
GoToSocial
Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dereferencing_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
type AttachmentTestSuite struct {
DereferencerStandardTestSuite
}
func (suite *AttachmentTestSuite) TestDereferenceAttachmentOK() {
fetchingAccount := suite.testAccounts["local_account_1"]
attachmentOwner := "01FENS9F666SEQ6TYQWEEY78GM"
attachmentStatus := "01FENS9NTTVNEX1YZV7GB63MT8"
attachmentContentType := "image/jpeg"
attachmentURL := "https://s3-us-west-2.amazonaws.com/plushcity/media_attachments/files/106/867/380/219/163/828/original/88e8758c5f011439.jpg"
attachmentDescription := "It's a cute plushie."
minAttachment := &gtsmodel.MediaAttachment{
RemoteURL: attachmentURL,
AccountID: attachmentOwner,
StatusID: attachmentStatus,
File: gtsmodel.File{
ContentType: attachmentContentType,
},
Description: attachmentDescription,
}
attachment, err := suite.dereferencer.GetRemoteAttachment(context.Background(), fetchingAccount.Username, minAttachment)
suite.NoError(err)
suite.NotNil(attachment)
suite.Equal(attachmentOwner, attachment.AccountID)
suite.Equal(attachmentStatus, attachment.StatusID)
suite.Equal(attachmentURL, attachment.RemoteURL)
suite.NotEmpty(attachment.URL)
suite.NotEmpty(attachment.Blurhash)
suite.NotEmpty(attachment.ID)
suite.NotEmpty(attachment.CreatedAt)
suite.NotEmpty(attachment.UpdatedAt)
suite.Equal(1.336546184738956, attachment.FileMeta.Original.Aspect)
suite.Equal(2071680, attachment.FileMeta.Original.Size)
suite.Equal(1245, attachment.FileMeta.Original.Height)
suite.Equal(1664, attachment.FileMeta.Original.Width)
suite.Equal("LwQ9yKRP_4t8t7RjWBt7%hozM_ay", attachment.Blurhash)
suite.Equal(gtsmodel.ProcessingStatusProcessed, attachment.Processing)
suite.NotEmpty(attachment.File.Path)
suite.Equal(attachmentContentType, attachment.File.ContentType)
suite.Equal(attachmentDescription, attachment.Description)
suite.NotEmpty(attachment.Thumbnail.Path)
suite.NotEmpty(attachment.Type)
// attachment should also now be in the database
dbAttachment, err := suite.db.GetAttachmentByID(context.Background(), attachment.ID)
suite.NoError(err)
suite.NotNil(dbAttachment)
suite.Equal(attachmentOwner, dbAttachment.AccountID)
suite.Equal(attachmentStatus, dbAttachment.StatusID)
suite.Equal(attachmentURL, dbAttachment.RemoteURL)
suite.NotEmpty(dbAttachment.URL)
suite.NotEmpty(dbAttachment.Blurhash)
suite.NotEmpty(dbAttachment.ID)
suite.NotEmpty(dbAttachment.CreatedAt)
suite.NotEmpty(dbAttachment.UpdatedAt)
suite.Equal(1.336546184738956, dbAttachment.FileMeta.Original.Aspect)
suite.Equal(2071680, dbAttachment.FileMeta.Original.Size)
suite.Equal(1245, dbAttachment.FileMeta.Original.Height)
suite.Equal(1664, dbAttachment.FileMeta.Original.Width)
suite.Equal("LwQ9yKRP_4t8t7RjWBt7%hozM_ay", dbAttachment.Blurhash)
suite.Equal(gtsmodel.ProcessingStatusProcessed, dbAttachment.Processing)
suite.NotEmpty(dbAttachment.File.Path)
suite.Equal(attachmentContentType, dbAttachment.File.ContentType)
suite.Equal(attachmentDescription, dbAttachment.Description)
suite.NotEmpty(dbAttachment.Thumbnail.Path)
suite.NotEmpty(dbAttachment.Type)
}
func TestAttachmentTestSuite(t *testing.T) {
suite.Run(t, new(AttachmentTestSuite))
}

View file

@ -43,8 +43,34 @@ type Dereferencer interface {
GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error)
GetRemoteAttachment(ctx context.Context, username string, remoteAttachmentURI *url.URL, ownerAccountID string, statusID string, expectedContentType string) (*gtsmodel.MediaAttachment, error)
RefreshAttachment(ctx context.Context, requestingUsername string, remoteAttachmentURI *url.URL, ownerAccountID string, expectedContentType string) (*gtsmodel.MediaAttachment, error)
// GetRemoteAttachment takes a minimal attachment struct and converts it into a fully fleshed out attachment, stored in the database and instance storage.
//
// The parameter minAttachment must have at least the following fields defined:
// * minAttachment.RemoteURL
// * minAttachment.AccountID
// * minAttachment.File.ContentType
//
// The returned attachment will have an ID generated for it, so no need to generate one beforehand.
// A blurhash will also be generated for the attachment.
//
// Most other fields will be preserved on the passed attachment, including:
// * minAttachment.StatusID
// * minAttachment.CreatedAt
// * minAttachment.UpdatedAt
// * minAttachment.FileMeta
// * minAttachment.AccountID
// * minAttachment.Description
// * minAttachment.ScheduledStatusID
// * minAttachment.Thumbnail.RemoteURL
// * minAttachment.Avatar
// * minAttachment.Header
//
// GetRemoteAttachment will return early if an attachment with the same value as minAttachment.RemoteURL
// is found in the database -- then that attachment will be returned and nothing else will be changed or stored.
GetRemoteAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error)
// RefreshAttachment is like GetRemoteAttachment, but the attachment will always be dereferenced again,
// whether or not it was already stored in the database.
RefreshAttachment(ctx context.Context, requestingUsername string, minAttachment *gtsmodel.MediaAttachment) (*gtsmodel.MediaAttachment, error)
DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error
DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error

View file

@ -19,24 +19,131 @@
package dereferencing_test
import (
"bytes"
"encoding/json"
"io"
"net/http"
"github.com/go-fed/activity/streams"
"github.com/go-fed/activity/streams/vocab"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/blob"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/testrig"
)
type DereferencerStandardTestSuite struct {
suite.Suite
config *config.Config
db db.DB
log *logrus.Logger
config *config.Config
db db.DB
log *logrus.Logger
storage blob.Storage
testRemoteStatuses map[string]vocab.ActivityStreamsNote
testRemoteAccounts map[string]vocab.ActivityStreamsPerson
testAccounts map[string]*gtsmodel.Account
testRemoteStatuses map[string]vocab.ActivityStreamsNote
testRemoteAccounts map[string]vocab.ActivityStreamsPerson
testRemoteAttachments map[string]testrig.RemoteAttachmentFile
testAccounts map[string]*gtsmodel.Account
dereferencer dereferencing.Dereferencer
}
func (suite *DereferencerStandardTestSuite) SetupSuite() {
suite.testAccounts = testrig.NewTestAccounts()
suite.testRemoteStatuses = testrig.NewTestFediStatuses()
suite.testRemoteAccounts = testrig.NewTestFediPeople()
suite.testRemoteAttachments = testrig.NewTestFediAttachments("../../../testrig/media")
}
func (suite *DereferencerStandardTestSuite) SetupTest() {
suite.config = testrig.NewTestConfig()
suite.db = testrig.NewTestDB()
suite.log = testrig.NewTestLog()
suite.storage = testrig.NewTestStorage()
suite.dereferencer = dereferencing.NewDereferencer(suite.config,
suite.db,
testrig.NewTestTypeConverter(suite.db),
suite.mockTransportController(),
testrig.NewTestMediaHandler(suite.db, suite.storage),
suite.log)
testrig.StandardDBSetup(suite.db, nil)
}
func (suite *DereferencerStandardTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
}
// mockTransportController returns basically a miniature muxer, which returns a different
// value based on the request URL. It can be used to return remote statuses, profiles, etc,
// as though they were actually being dereferenced. If the URL doesn't correspond to any person
// or note or attachment that we have stored, then just a 200 code will be returned, with an empty body.
func (suite *DereferencerStandardTestSuite) mockTransportController() transport.Controller {
do := func(req *http.Request) (*http.Response, error) {
suite.log.Debugf("received request for %s", req.URL)
responseBytes := []byte{}
responseType := ""
responseLength := 0
note, ok := suite.testRemoteStatuses[req.URL.String()]
if ok {
// the request is for a note that we have stored
noteI, err := streams.Serialize(note)
if err != nil {
panic(err)
}
noteJson, err := json.Marshal(noteI)
if err != nil {
panic(err)
}
responseBytes = noteJson
responseType = "application/activity+json"
}
person, ok := suite.testRemoteAccounts[req.URL.String()]
if ok {
// the request is for a person that we have stored
personI, err := streams.Serialize(person)
if err != nil {
panic(err)
}
personJson, err := json.Marshal(personI)
if err != nil {
panic(err)
}
responseBytes = personJson
responseType = "application/activity+json"
}
attachment, ok := suite.testRemoteAttachments[req.URL.String()]
if ok {
responseBytes = attachment.Data
responseType = attachment.ContentType
}
if len(responseBytes) != 0 {
// we found something, so print what we're going to return
suite.log.Debugf("returning response %s", string(responseBytes))
}
responseLength = len(responseBytes)
reader := bytes.NewReader(responseBytes)
readCloser := io.NopCloser(reader)
response := &http.Response{
StatusCode: 200,
Body: readCloser,
ContentLength: int64(responseLength),
Header: http.Header{
"content-type": {responseType},
},
}
return response, nil
}
mockClient := testrig.NewMockHTTPClient(do)
return testrig.NewTestTransportController(mockClient, suite.db)
}

View file

@ -396,13 +396,10 @@ func (d *deref) populateStatusAttachments(ctx context.Context, status *gtsmodel.
attachments := []*gtsmodel.MediaAttachment{}
for _, a := range status.Attachments {
aURL, err := url.Parse(a.RemoteURL)
if err != nil {
l.Errorf("populateStatusAttachments: couldn't parse attachment url %s: %s", a.RemoteURL, err)
continue
}
a.AccountID = status.AccountID
a.StatusID = status.ID
attachment, err := d.GetRemoteAttachment(ctx, requestingUsername, aURL, status.AccountID, status.ID, a.File.ContentType)
attachment, err := d.GetRemoteAttachment(ctx, requestingUsername, a)
if err != nil {
l.Errorf("populateStatusAttachments: couldn't get remote attachment %s: %s", a.RemoteURL, err)
continue

View file

@ -19,21 +19,14 @@
package dereferencing_test
import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"testing"
"time"
"github.com/go-fed/activity/streams"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/testrig"
)
@ -41,81 +34,6 @@ type StatusTestSuite struct {
DereferencerStandardTestSuite
}
// mockTransportController returns basically a miniature muxer, which returns a different
// value based on the request URL. It can be used to return remote statuses, profiles, etc,
// as though they were actually being dereferenced. If the URL doesn't correspond to any person
// or note or attachment that we have stored, then just a 200 code will be returned, with an empty body.
func (suite *StatusTestSuite) mockTransportController() transport.Controller {
do := func(req *http.Request) (*http.Response, error) {
suite.log.Debugf("received request for %s", req.URL)
responseBytes := []byte{}
note, ok := suite.testRemoteStatuses[req.URL.String()]
if ok {
// the request is for a note that we have stored
noteI, err := streams.Serialize(note)
if err != nil {
panic(err)
}
noteJson, err := json.Marshal(noteI)
if err != nil {
panic(err)
}
responseBytes = noteJson
}
person, ok := suite.testRemoteAccounts[req.URL.String()]
if ok {
// the request is for a person that we have stored
personI, err := streams.Serialize(person)
if err != nil {
panic(err)
}
personJson, err := json.Marshal(personI)
if err != nil {
panic(err)
}
responseBytes = personJson
}
if len(responseBytes) != 0 {
// we found something, so print what we're going to return
suite.log.Debugf("returning response %s", string(responseBytes))
}
reader := bytes.NewReader(responseBytes)
readCloser := io.NopCloser(reader)
response := &http.Response{
StatusCode: 200,
Body: readCloser,
}
return response, nil
}
mockClient := testrig.NewMockHTTPClient(do)
return testrig.NewTestTransportController(mockClient, suite.db)
}
func (suite *StatusTestSuite) SetupSuite() {
suite.testAccounts = testrig.NewTestAccounts()
suite.testRemoteStatuses = testrig.NewTestFediStatuses()
suite.testRemoteAccounts = testrig.NewTestFediPeople()
}
func (suite *StatusTestSuite) SetupTest() {
suite.config = testrig.NewTestConfig()
suite.db = testrig.NewTestDB()
suite.log = testrig.NewTestLog()
suite.dereferencer = dereferencing.NewDereferencer(suite.config,
suite.db,
testrig.NewTestTypeConverter(suite.db),
suite.mockTransportController(),
testrig.NewTestMediaHandler(suite.db, testrig.NewTestStorage()),
suite.log)
testrig.StandardDBSetup(suite.db, nil)
}
func (suite *StatusTestSuite) TestDereferenceSimpleStatus() {
fetchingAccount := suite.testAccounts["local_account_1"]
@ -205,10 +123,6 @@ func (suite *StatusTestSuite) TestDereferenceStatusWithMention() {
suite.False(m.Silent)
}
func (suite *StatusTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
}
func TestStatusTestSuite(t *testing.T) {
suite.Run(t, new(StatusTestSuite))
}