[feature] allow uncaching of other media types (#1234)

* simplify pruneRemote, remove unncecessary media trace logging, update RemoteOlderThan() to include headers/avis

Signed-off-by: kim <grufwub@gmail.com>

* cleanup pruneallmeta, add remote header to pruneremote tests

Signed-off-by: kim <grufwub@gmail.com>

* fix olderthan duration additions

Signed-off-by: kim <grufwub@gmail.com>

* fix broken test now that test model header changed

Signed-off-by: kim <grufwub@gmail.com>

* instead use new remote test account for new header model

Signed-off-by: kim <grufwub@gmail.com>

* use newer generated ULID for remote_account_3 to ensure it is sorted last

Signed-off-by: kim <grufwub@gmail.com>

* reorganize serialized keys to match expected test account model order

Signed-off-by: kim <grufwub@gmail.com>

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2022-12-12 11:22:19 +00:00 committed by GitHub
commit 58c87bdd7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 192 additions and 108 deletions

View file

@ -81,10 +81,8 @@ func (p *ProcessingMedia) AttachmentID() string {
// LoadAttachment blocks until the thumbnail and fullsize content
// has been processed, and then returns the completed attachment.
func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) {
log.Tracef("LoadAttachment: getting lock for attachment %s", p.attachment.URL)
p.mu.Lock()
defer p.mu.Unlock()
log.Tracef("LoadAttachment: got lock for attachment %s", p.attachment.URL)
if err := p.store(ctx); err != nil {
return nil, err
@ -98,23 +96,24 @@ func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAt
return nil, err
}
// store the result in the database before returning it
if !p.insertedInDB {
if p.recache {
// if it's a recache we should only need to update
// This is an existing media attachment we're recaching, so only need to update it
if err := p.database.UpdateByID(ctx, p.attachment, p.attachment.ID); err != nil {
return nil, err
}
} else {
// otherwise we need to really PUT it
// This is a new media attachment we're caching for first time
if err := p.database.Put(ctx, p.attachment); err != nil {
return nil, err
}
}
// Mark this as stored in DB
p.insertedInDB = true
}
log.Tracef("LoadAttachment: finished, returning attachment %s", p.attachment.URL)
log.Tracef("finished loading attachment %s", p.attachment.URL)
return p.attachment, nil
}
@ -180,7 +179,7 @@ func (p *ProcessingMedia) loadThumb(ctx context.Context) error {
// we're done processing the thumbnail!
atomic.StoreInt32(&p.thumbState, int32(complete))
log.Tracef("loadThumb: finished processing thumbnail for attachment %s", p.attachment.URL)
log.Tracef("finished processing thumbnail for attachment %s", p.attachment.URL)
fallthrough
case complete:
return nil
@ -241,7 +240,7 @@ func (p *ProcessingMedia) loadFullSize(ctx context.Context) error {
// we're done processing the full-size image
atomic.StoreInt32(&p.fullSizeState, int32(complete))
log.Tracef("loadFullSize: finished processing full size image for attachment %s", p.attachment.URL)
log.Tracef("finished processing full size image for attachment %s", p.attachment.URL)
fallthrough
case complete:
return nil
@ -362,7 +361,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
p.attachment.File.FileSize = int(fileSize)
p.read = true
log.Tracef("store: finished storing initial data for attachment %s", p.attachment.URL)
log.Tracef("finished storing initial data for attachment %s", p.attachment.URL)
return nil
}

View file

@ -20,6 +20,7 @@ package media
import (
"context"
"errors"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
@ -28,17 +29,23 @@ import (
)
func (m *manager) PruneAllMeta(ctx context.Context) (int, error) {
var totalPruned int
var maxID string
var attachments []*gtsmodel.MediaAttachment
var err error
var (
totalPruned int
maxID string
)
for {
// select "selectPruneLimit" headers / avatars at a time for pruning
attachments, err := m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
} else if len(attachments) == 0 {
break
}
// select 20 attachments at a time and prune them
for attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
// use the id of the last attachment in the slice as the next 'maxID' value
l := len(attachments)
log.Tracef("PruneAllMeta: got %d attachments with maxID < %s", l, maxID)
maxID = attachments[l-1].ID
log.Tracef("PruneAllMeta: got %d attachments with maxID < %s", len(attachments), maxID)
maxID = attachments[len(attachments)-1].ID
// prune each attachment that meets one of the following criteria:
// - has no owning account in the database
@ -56,11 +63,6 @@ func (m *manager) PruneAllMeta(ctx context.Context) (int, error) {
}
}
// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}
log.Infof("PruneAllMeta: finished pruning avatars + headers: pruned %d entries", totalPruned)
return totalPruned, nil
}

View file

@ -20,7 +20,8 @@ package media
import (
"context"
"fmt"
"errors"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
@ -31,21 +32,23 @@ import (
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int
olderThan, err := parseOlderThan(olderThanDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err)
}
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
log.Infof("PruneAllRemote: pruning media older than %s", olderThan)
// select 20 attachments at a time and prune them
for attachments, err := m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
for {
// Select "selectPruneLimit" status attacchments at a time for pruning
attachments, err := m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
} else if len(attachments) == 0 {
break
}
// use the age of the oldest attachment (the last one in the slice) as the next 'older than' value
l := len(attachments)
log.Tracef("PruneAllRemote: got %d attachments older than %s", l, olderThan)
olderThan = attachments[l-1].CreatedAt
// use the age of the oldest attachment (last in slice) as the next 'olderThan' value
log.Tracef("PruneAllRemote: got %d status attachments older than %s", len(attachments), olderThan)
olderThan = attachments[len(attachments)-1].CreatedAt
// prune each attachment
// prune each status attachment
for _, attachment := range attachments {
if err := m.pruneOneRemote(ctx, attachment); err != nil {
return totalPruned, err
@ -54,11 +57,6 @@ func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, e
}
}
// make sure we don't have a real error when we leave the loop
if err != nil && err != db.ErrNoEntries {
return totalPruned, err
}
log.Infof("PruneAllRemote: finished pruning remote media: pruned %d entries", totalPruned)
return totalPruned, nil
}
@ -69,7 +67,7 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media
if attachment.File.Path != "" {
// delete the full size attachment from storage
log.Tracef("pruneOneRemote: deleting %s", attachment.File.Path)
if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && err != storage.ErrNotFound {
if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
return err
}
cached := false
@ -80,7 +78,7 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media
if attachment.Thumbnail.Path != "" {
// delete the thumbnail from storage
log.Tracef("pruneOneRemote: deleting %s", attachment.Thumbnail.Path)
if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && !errors.Is(err, storage.ErrNotFound) {
return err
}
cached := false
@ -88,10 +86,10 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media
changed = true
}
// update the attachment to reflect that we no longer have it cached
if changed {
return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached")
if !changed {
return nil
}
return nil
// update the attachment to reflect that we no longer have it cached
return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached")
}

View file

@ -27,6 +27,7 @@ import (
"codeberg.org/gruf/go-store/v2/storage"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
type PruneRemoteTestSuite struct {
@ -34,24 +35,29 @@ type PruneRemoteTestSuite struct {
}
func (suite *PruneRemoteTestSuite) TestPruneRemote() {
testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testAttachment.Cached)
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
suite.Equal(3, totalPruned)
prunedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
prunedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.False(*prunedAttachment.Cached)
// the media should no longer be cached
prunedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.False(*prunedAttachment.Cached)
}
func (suite *PruneRemoteTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
suite.Equal(3, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneAllRemote(context.Background(), 1)
@ -61,16 +67,21 @@ func (suite *PruneRemoteTestSuite) TestPruneRemoteTwice() {
func (suite *PruneRemoteTestSuite) TestPruneAndRecache() {
ctx := context.Background()
testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testHeader := suite.testAttachments["remote_account_3_header"]
totalPruned, err := suite.manager.PruneAllRemote(ctx, 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
suite.Equal(3, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, testAttachment.File.Path)
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testAttachment.Thumbnail.Path)
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// now recache the image....
@ -82,34 +93,40 @@ func (suite *PruneRemoteTestSuite) TestPruneAndRecache() {
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
}
processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, testAttachment.ID)
suite.NoError(err)
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
for _, original := range []*gtsmodel.MediaAttachment{
testStatusAttachment,
testHeader,
} {
processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, original.ID)
suite.NoError(err)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(testAttachment.ID, recachedAttachment.ID)
suite.Equal(testAttachment.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(testAttachment.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(testAttachment.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(original.ID, recachedAttachment.ID)
suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
}
}
func (suite *PruneRemoteTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
@ -118,7 +135,7 @@ func (suite *PruneRemoteTestSuite) TestPruneOneNonExistent() {
// Now attempt to prune remote for item with db entry no file
totalPruned, err := suite.manager.PruneAllRemote(ctx, 1)
suite.NoError(err)
suite.Equal(2, totalPruned)
suite.Equal(3, totalPruned)
}
func TestPruneRemoteTestSuite(t *testing.T) {

View file

@ -20,7 +20,7 @@ package media
import (
"context"
"fmt"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
@ -34,10 +34,7 @@ func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error)
var attachments []*gtsmodel.MediaAttachment
var err error
olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays)
if err != nil {
return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err)
}
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(UnusedLocalAttachmentCacheDays))
log.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)
// select 20 attachments at a time and prune them

View file

@ -23,7 +23,6 @@ import (
"errors"
"fmt"
"io"
"time"
"github.com/h2non/filetype"
"github.com/superseriousbusiness/gotosocial/internal/log"
@ -134,22 +133,6 @@ func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{
log.Error("media manager cron logger: ", err, msg, keysAndValues)
}
func parseOlderThan(olderThanDays int) (time.Time, error) {
// convert days into a duration string
olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
// parse the duration string into a duration
olderThanHours, err := time.ParseDuration(olderThanHoursString)
if err != nil {
return time.Time{}, err
}
// 'subtract' that from the time now to give our threshold
olderThan := time.Now().Add(-olderThanHours)
return olderThan, nil
}
// lengthReader wraps a reader and reads the length of total bytes written as it goes.
type lengthReader struct {
source io.Reader