[feature] Start implementing refetch of lost media files via /api/v1/admin/media_refetch (#1221)

* [chore] Move ShortcodeDomain to its own little util func

* [feature] Add RefetchEmojis function to media manager

* [feature] Expose admin media refresh via admin API

* update following review feedback

- change/fix log levels
- make sure not to try to refetch local emojis
- small style refactoring + comments

* log on emoji refetch start

Signed-off-by: kim <grufwub@gmail.com>
Co-authored-by: kim <grufwub@gmail.com>
This commit is contained in:
tobi 2022-12-10 22:43:11 +01:00 committed by GitHub
commit 5e060d0bcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 499 additions and 28 deletions

View file

@ -40,6 +40,15 @@ const UnusedLocalAttachmentCacheDays = 3
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error
/*
PROCESSING FUNCTIONS
*/
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting
// the finished media, thumbnail, attachment, etc.
@ -75,6 +84,10 @@ type Manager interface {
// RecacheMedia refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)
/*
PRUNING FUNCTIONS
*/
// PruneAllRemote prunes all remote media attachments cached on this instance which are older than the given amount of days.
// 'Pruning' in this context means removing the locally stored data of the attachment (both thumbnail and full size),
// and setting 'cached' to false on the associated attachment.
@ -98,10 +111,18 @@ type Manager interface {
// is returned to the caller.
PruneOrphaned(ctx context.Context, dry bool) (int, error)
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error
/*
REFETCHING FUNCTIONS
Useful when data loss has occurred.
*/
// RefetchEmojis iterates through remote emojis (for the given domain, or all if domain is empty string).
//
// For each emoji, the manager will check whether both the full size and static images are present in storage.
// If not, the manager will refetch and reprocess full size and static images for the emoji.
//
// The provided DereferenceMedia function will be used when it's necessary to refetch something this way.
RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error)
}
type manager struct {

View file

@ -20,22 +20,26 @@ package media_test
import (
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/db"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/testrig"
)
type MediaStandardTestSuite struct {
suite.Suite
db db.DB
storage *storage.Driver
manager media.Manager
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
db db.DB
storage *storage.Driver
manager media.Manager
transportController transport.Controller
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
}
func (suite *MediaStandardTestSuite) SetupSuite() {
@ -53,6 +57,7 @@ func (suite *MediaStandardTestSuite) SetupTest() {
suite.testAccounts = testrig.NewTestAccounts()
suite.testEmojis = testrig.NewTestEmojis()
suite.manager = testrig.NewTestMediaManager(suite.db, suite.storage)
suite.transportController = testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, concurrency.NewWorkerPool[messages.FromFederator](0, 0))
}
func (suite *MediaStandardTestSuite) TearDownTest() {

149
internal/media/refetch.go Normal file
View file

@ -0,0 +1,149 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media
import (
"context"
"errors"
"fmt"
"io"
"net/url"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
type DereferenceMedia func(ctx context.Context, iri *url.URL) (io.ReadCloser, int64, error)
func (m *manager) RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error) {
// normalize domain
if domain == "" {
domain = db.EmojiAllDomains
}
var (
maxShortcodeDomain string
refetchIDs []string
)
// page through emojis 20 at a time, looking for those with missing images
for {
// Fetch next block of emojis from database
emojis, err := m.db.GetEmojis(ctx, domain, false, true, "", maxShortcodeDomain, "", 20)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
// an actual error has occurred
log.Errorf("error fetching emojis from database: %s", err)
}
break
}
for _, emoji := range emojis {
if emoji.Domain == "" {
// never try to refetch local emojis
continue
}
if refetch, err := m.emojiRequiresRefetch(ctx, emoji); err != nil {
// an error here indicates something is wrong with storage, so we should stop
return 0, fmt.Errorf("error checking refetch requirement for emoji %s: %w", util.ShortcodeDomain(emoji), err)
} else if !refetch {
continue
}
refetchIDs = append(refetchIDs, emoji.ID)
}
// Update next maxShortcodeDomain from last emoji
maxShortcodeDomain = util.ShortcodeDomain(emojis[len(emojis)-1])
}
// bail early if we've got nothing to do
toRefetchCount := len(refetchIDs)
if toRefetchCount == 0 {
log.Debug("no remote emojis require a refetch")
return 0, nil
}
log.Debugf("%d remote emoji(s) require a refetch, doing that now...", toRefetchCount)
var totalRefetched int
for _, emojiID := range refetchIDs {
emoji, err := m.db.GetEmojiByID(ctx, emojiID)
if err != nil {
// this shouldn't happen--since we know we have the emoji--so return if it does
return 0, fmt.Errorf("error getting emoji %s: %w", emojiID, err)
}
shortcodeDomain := util.ShortcodeDomain(emoji)
if emoji.ImageRemoteURL == "" {
log.Errorf("remote emoji %s could not be refreshed because it has no ImageRemoteURL set", shortcodeDomain)
continue
}
emojiImageIRI, err := url.Parse(emoji.ImageRemoteURL)
if err != nil {
log.Errorf("remote emoji %s could not be refreshed because its ImageRemoteURL (%s) is not a valid uri: %s", shortcodeDomain, emoji.ImageRemoteURL, err)
continue
}
dataFunc := func(ctx context.Context) (reader io.ReadCloser, fileSize int64, err error) {
return dereferenceMedia(ctx, emojiImageIRI)
}
processingEmoji, err := m.ProcessEmoji(ctx, dataFunc, nil, emoji.Shortcode, emoji.ID, emoji.URI, &AdditionalEmojiInfo{
Domain: &emoji.Domain,
ImageRemoteURL: &emoji.ImageRemoteURL,
ImageStaticRemoteURL: &emoji.ImageStaticRemoteURL,
Disabled: emoji.Disabled,
VisibleInPicker: emoji.VisibleInPicker,
}, true)
if err != nil {
log.Errorf("emoji %s could not be refreshed because of an error during processing: %s", shortcodeDomain, err)
continue
}
if _, err := processingEmoji.LoadEmoji(ctx); err != nil {
log.Errorf("emoji %s could not be refreshed because of an error during loading: %s", shortcodeDomain, err)
continue
}
log.Tracef("refetched emoji %s successfully from remote", shortcodeDomain)
totalRefetched++
}
return totalRefetched, nil
}
func (m *manager) emojiRequiresRefetch(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
if has, err := m.storage.Has(ctx, emoji.ImagePath); err != nil {
return false, err
} else if !has {
return true, nil
}
if has, err := m.storage.Has(ctx, emoji.ImageStaticPath); err != nil {
return false, err
} else if !has {
return true, nil
}
return false, nil
}

View file

@ -0,0 +1,85 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package media_test
import (
"context"
"testing"
"github.com/stretchr/testify/suite"
)
type RefetchTestSuite struct {
MediaStandardTestSuite
}
func (suite *RefetchTestSuite) TestRefetchEmojisNothingToDo() {
ctx := context.Background()
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(0, refetched)
}
func (suite *RefetchTestSuite) TestRefetchEmojis() {
ctx := context.Background()
if err := suite.storage.Delete(ctx, suite.testEmojis["yell"].ImagePath); err != nil {
suite.FailNow(err.Error())
}
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(1, refetched)
}
func (suite *RefetchTestSuite) TestRefetchEmojisLocal() {
ctx := context.Background()
// delete the image for a LOCAL emoji
if err := suite.storage.Delete(ctx, suite.testEmojis["rainbow"].ImagePath); err != nil {
suite.FailNow(err.Error())
}
adminAccount := suite.testAccounts["admin_account"]
transport, err := suite.transportController.NewTransportForUsername(ctx, adminAccount.Username)
if err != nil {
suite.FailNow(err.Error())
}
refetched, err := suite.manager.RefetchEmojis(ctx, "", transport.DereferenceMedia)
suite.NoError(err)
suite.Equal(0, refetched) // shouldn't refetch anything because local
}
func TestRefetchTestSuite(t *testing.T) {
suite.Run(t, &RefetchTestSuite{})
}