[bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853)

This commit is contained in:
kim 2023-06-22 20:46:36 +01:00 committed by GitHub
commit 9a22102fa8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 2076 additions and 1090 deletions

View file

@ -25,10 +25,8 @@ import (
"time"
"codeberg.org/gruf/go-iotools"
"codeberg.org/gruf/go-runners"
"codeberg.org/gruf/go-sched"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
@ -61,7 +59,6 @@ type Manager struct {
// See internal/concurrency.NewWorkerPool() documentation for further information.
func NewManager(state *state.State) *Manager {
m := &Manager{state: state}
scheduleCleanupJobs(m)
return m
}
@ -214,7 +211,7 @@ func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID str
func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) {
instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "")
if err != nil {
return nil, fmt.Errorf("preProcessEmoji: error fetching this instance account from the db: %s", err)
return nil, gtserror.Newf("error fetching this instance account from the db: %s", err)
}
var (
@ -227,7 +224,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode
// Look for existing emoji by given ID.
emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID)
if err != nil {
return nil, fmt.Errorf("preProcessEmoji: error fetching emoji to refresh from the db: %s", err)
return nil, gtserror.Newf("error fetching emoji to refresh from the db: %s", err)
}
// if this is a refresh, we will end up with new images
@ -260,7 +257,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode
newPathID, err = id.NewRandomULID()
if err != nil {
return nil, fmt.Errorf("preProcessEmoji: error generating alternateID for emoji refresh: %s", err)
return nil, gtserror.Newf("error generating alternateID for emoji refresh: %s", err)
}
// store + serve static image at new path ID
@ -356,33 +353,3 @@ func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode str
return emoji, nil
}
func scheduleCleanupJobs(m *Manager) {
const day = time.Hour * 24
// Calculate closest midnight.
now := time.Now()
midnight := now.Round(day)
if midnight.Before(now) {
// since <= 11:59am rounds down.
midnight = midnight.Add(day)
}
// Get ctx associated with scheduler run state.
done := m.state.Workers.Scheduler.Done()
doneCtx := runners.CancelCtx(done)
// TODO: we'll need to do some thinking to make these
// jobs restartable if we want to implement reloads in
// the future that make call to Workers.Stop() -> Workers.Start().
// Schedule the PruneAll task to execute every day at midnight.
m.state.Workers.Scheduler.Schedule(sched.NewJob(func(now time.Time) {
err := m.PruneAll(doneCtx, config.GetMediaRemoteCacheDays(), true)
if err != nil {
log.Errorf(nil, "error during prune: %v", err)
}
log.Infof(nil, "finished pruning all in %s", time.Since(now))
}).EveryAt(midnight, day))
}

View file

@ -214,7 +214,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() {
// do a blocking call to fetch the emoji
emoji, err := processingEmoji.LoadEmoji(ctx)
suite.EqualError(err, "given emoji size 630kiB greater than max allowed 50.0kiB")
suite.EqualError(err, "store: given emoji size 630kiB greater than max allowed 50.0kiB")
suite.Nil(emoji)
}
@ -238,7 +238,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() {
// do a blocking call to fetch the emoji
emoji, err := processingEmoji.LoadEmoji(ctx)
suite.EqualError(err, "calculated emoji size 630kiB greater than max allowed 50.0kiB")
suite.EqualError(err, "store: calculated emoji size 630kiB greater than max allowed 50.0kiB")
suite.Nil(emoji)
}
@ -626,7 +626,7 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() {
// we should get an error while loading
attachment, err := processingMedia.LoadAttachment(ctx)
suite.EqualError(err, "error decoding video: error determining video metadata: [width height framerate]")
suite.EqualError(err, "finish: error decoding video: error determining video metadata: [width height framerate]")
suite.Nil(attachment)
}

View file

@ -28,6 +28,7 @@ import (
"codeberg.org/gruf/go-runners"
"github.com/h2non/filetype"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
@ -137,7 +138,7 @@ func (p *ProcessingEmoji) load(ctx context.Context) (*gtsmodel.Emoji, bool, erro
}
// Existing emoji we're refreshing, so only need to update.
_, err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...)
err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...)
return err
}
@ -160,7 +161,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// Load media from provided data fn.
rc, sz, err := p.dataFn(ctx)
if err != nil {
return fmt.Errorf("error executing data function: %w", err)
return gtserror.Newf("error executing data function: %w", err)
}
defer func() {
@ -177,13 +178,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// Read the first 261 header bytes into buffer.
if _, err := io.ReadFull(rc, hdrBuf); err != nil {
return fmt.Errorf("error reading incoming media: %w", err)
return gtserror.Newf("error reading incoming media: %w", err)
}
// Parse file type info from header buffer.
info, err := filetype.Match(hdrBuf)
if err != nil {
return fmt.Errorf("error parsing file type: %w", err)
return gtserror.Newf("error parsing file type: %w", err)
}
switch info.Extension {
@ -192,7 +193,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// unhandled
default:
return fmt.Errorf("unsupported emoji filetype: %s", info.Extension)
return gtserror.Newf("unsupported emoji filetype: %s", info.Extension)
}
// Recombine header bytes with remaining stream
@ -211,7 +212,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// Check that provided size isn't beyond max. We check beforehand
// so that we don't attempt to stream the emoji into storage if not needed.
if size := bytesize.Size(sz); sz > 0 && size > maxSize {
return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize)
return gtserror.Newf("given emoji size %s greater than max allowed %s", size, maxSize)
}
var pathID string
@ -241,14 +242,14 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
// Attempt to remove existing emoji at storage path (might be broken / out-of-date)
if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil {
return fmt.Errorf("error removing emoji from storage: %v", err)
return gtserror.Newf("error removing emoji from storage: %v", err)
}
}
// Write the final image reader stream to our storage.
sz, err = p.mgr.state.Storage.PutStream(ctx, p.emoji.ImagePath, r)
if err != nil {
return fmt.Errorf("error writing emoji to storage: %w", err)
return gtserror.Newf("error writing emoji to storage: %w", err)
}
// Once again check size in case none was provided previously.
@ -257,7 +258,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil {
log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err)
}
return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize)
return gtserror.Newf("calculated emoji size %s greater than max allowed %s", size, maxSize)
}
// Fill in remaining attachment data now it's stored.
@ -278,19 +279,19 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error {
// Fetch a stream to the original file in storage.
rc, err := p.mgr.state.Storage.GetStream(ctx, p.emoji.ImagePath)
if err != nil {
return fmt.Errorf("error loading file from storage: %w", err)
return gtserror.Newf("error loading file from storage: %w", err)
}
defer rc.Close()
// Decode the image from storage.
staticImg, err := decodeImage(rc)
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
return gtserror.Newf("error decoding image: %w", err)
}
// The image should be in-memory by now.
if err := rc.Close(); err != nil {
return fmt.Errorf("error closing file: %w", err)
return gtserror.Newf("error closing file: %w", err)
}
// This shouldn't already exist, but we do a check as it's worth logging.
@ -298,7 +299,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error {
log.Warnf(ctx, "static emoji already exists at storage path: %s", p.emoji.ImagePath)
// Attempt to remove static existing emoji at storage path (might be broken / out-of-date)
if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil {
return fmt.Errorf("error removing static emoji from storage: %v", err)
return gtserror.Newf("error removing static emoji from storage: %v", err)
}
}
@ -308,7 +309,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error {
// Stream-encode the PNG static image into storage.
sz, err := p.mgr.state.Storage.PutStream(ctx, p.emoji.ImageStaticPath, enc)
if err != nil {
return fmt.Errorf("error stream-encoding static emoji to storage: %w", err)
return gtserror.Newf("error stream-encoding static emoji to storage: %w", err)
}
// Set written image size.

View file

@ -30,6 +30,7 @@ import (
"github.com/disintegration/imaging"
"github.com/h2non/filetype"
terminator "github.com/superseriousbusiness/exif-terminator"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
@ -145,7 +146,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// Load media from provided data fun
rc, sz, err := p.dataFn(ctx)
if err != nil {
return fmt.Errorf("error executing data function: %w", err)
return gtserror.Newf("error executing data function: %w", err)
}
defer func() {
@ -162,13 +163,13 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// Read the first 261 header bytes into buffer.
if _, err := io.ReadFull(rc, hdrBuf); err != nil {
return fmt.Errorf("error reading incoming media: %w", err)
return gtserror.Newf("error reading incoming media: %w", err)
}
// Parse file type info from header buffer.
info, err := filetype.Match(hdrBuf)
if err != nil {
return fmt.Errorf("error parsing file type: %w", err)
return gtserror.Newf("error parsing file type: %w", err)
}
// Recombine header bytes with remaining stream
@ -187,12 +188,12 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// A file size was provided so we can clean exif data from image.
r, err = terminator.Terminate(r, int(sz), info.Extension)
if err != nil {
return fmt.Errorf("error cleaning exif data: %w", err)
return gtserror.Newf("error cleaning exif data: %w", err)
}
}
default:
return fmt.Errorf("unsupported file type: %s", info.Extension)
return gtserror.Newf("unsupported file type: %s", info.Extension)
}
// Calculate attachment file path.
@ -211,14 +212,14 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// Attempt to remove existing media at storage path (might be broken / out-of-date)
if err := p.mgr.state.Storage.Delete(ctx, p.media.File.Path); err != nil {
return fmt.Errorf("error removing media from storage: %v", err)
return gtserror.Newf("error removing media from storage: %v", err)
}
}
// Write the final image reader stream to our storage.
sz, err = p.mgr.state.Storage.PutStream(ctx, p.media.File.Path, r)
if err != nil {
return fmt.Errorf("error writing media to storage: %w", err)
return gtserror.Newf("error writing media to storage: %w", err)
}
// Set written image size.
@ -245,7 +246,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// Fetch a stream to the original file in storage.
rc, err := p.mgr.state.Storage.GetStream(ctx, p.media.File.Path)
if err != nil {
return fmt.Errorf("error loading file from storage: %w", err)
return gtserror.Newf("error loading file from storage: %w", err)
}
defer rc.Close()
@ -256,7 +257,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
case mimeImageJpeg, mimeImageGif, mimeImageWebp:
fullImg, err = decodeImage(rc, imaging.AutoOrientation(true))
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
return gtserror.Newf("error decoding image: %w", err)
}
// .png image (requires ancillary chunk stripping)
@ -265,14 +266,14 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
Reader: rc,
}, imaging.AutoOrientation(true))
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
return gtserror.Newf("error decoding image: %w", err)
}
// .mp4 video type
case mimeVideoMp4:
video, err := decodeVideoFrame(rc)
if err != nil {
return fmt.Errorf("error decoding video: %w", err)
return gtserror.Newf("error decoding video: %w", err)
}
// Set video frame as image.
@ -286,7 +287,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// The image should be in-memory by now.
if err := rc.Close(); err != nil {
return fmt.Errorf("error closing file: %w", err)
return gtserror.Newf("error closing file: %w", err)
}
// Set full-size dimensions in attachment info.
@ -314,7 +315,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// Blurhash needs generating from thumb.
hash, err := thumbImg.Blurhash()
if err != nil {
return fmt.Errorf("error generating blurhash: %w", err)
return gtserror.Newf("error generating blurhash: %w", err)
}
// Set the attachment blurhash.
@ -326,7 +327,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// Attempt to remove existing thumbnail at storage path (might be broken / out-of-date)
if err := p.mgr.state.Storage.Delete(ctx, p.media.Thumbnail.Path); err != nil {
return fmt.Errorf("error removing thumbnail from storage: %v", err)
return gtserror.Newf("error removing thumbnail from storage: %v", err)
}
}
@ -338,7 +339,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error {
// Stream-encode the JPEG thumbnail image into storage.
sz, err := p.mgr.state.Storage.PutStream(ctx, p.media.Thumbnail.Path, enc)
if err != nil {
return fmt.Errorf("error stream-encoding thumbnail to storage: %w", err)
return gtserror.Newf("error stream-encoding thumbnail to storage: %w", err)
}
// Fill in remaining thumbnail now it's stored

View file

@ -1,373 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package media
import (
"context"
"errors"
"fmt"
"time"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
const (
selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning.
unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status.
)
// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
// empty directories from the storage driver. It can be called as a shortcut for calling the below
// pruning functions one by one.
//
// If blocking is true, then any errors encountered during the prune will be combined + returned to
// the caller. If blocking is false, the prune is run in the background and errors are just logged
// instead.
func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error {
const dry = false
f := func(innerCtx context.Context) error {
errs := gtserror.MultiError{}
pruned, err := m.PruneUnusedLocal(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err))
} else {
log.Infof(ctx, "pruned %d unused local media", pruned)
}
pruned, err = m.PruneUnusedRemote(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err))
} else {
log.Infof(ctx, "pruned %d unused remote media", pruned)
}
pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err))
} else {
log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays)
}
pruned, err = m.PruneOrphaned(innerCtx, dry)
if err != nil {
errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err))
} else {
log.Infof(ctx, "pruned %d orphaned media", pruned)
}
if err := m.state.Storage.Storage.Clean(innerCtx); err != nil {
errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err))
} else {
log.Info(ctx, "cleaned storage")
}
return errs.Combine()
}
if blocking {
return f(ctx)
}
go func() {
if err := f(context.Background()); err != nil {
log.Error(ctx, err)
}
}()
return nil
}
// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) {
var (
totalPruned int
maxID string
attachments []*gtsmodel.MediaAttachment
err error
)
// We don't know in advance how many remote attachments will meet
// our criteria for being 'unused'. So a dry run in this case just
// means we iterate through as normal, but do nothing with each entry
// instead of removing it. Define this here so we don't do the 'if dry'
// check inside the loop a million times.
var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error
if !dry {
f = m.deleteAttachment
} else {
f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error {
return nil // noop
}
}
for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value
for _, attachment := range attachments {
// Retrieve owning account if possible.
var account *gtsmodel.Account
if accountID := attachment.AccountID; accountID != "" {
account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
// Only return on a real error.
return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err)
}
}
// Prune each attachment that meets one of the following criteria:
// - Has no owning account in the database.
// - Is a header but isn't the owning account's current header.
// - Is an avatar but isn't the owning account's current avatar.
if account == nil ||
(*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) ||
(*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) {
if err := f(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
// entry in the database.
//
// If dry is true, then nothing will be changed, only the amount that *would* be removed
// is returned to the caller.
func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
// Emojis are stored under the instance account, so we
// need the ID of the instance account for the next part.
instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "")
if err != nil {
return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
}
instanceAccountID := instanceAccount.ID
var orphanedKeys []string
// Keys in storage will look like the following format:
// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
// We can filter out keys we're not interested in by matching through a regex.
if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
if !regexes.FilePath.MatchString(key) {
// This is not our expected key format.
return nil
}
// Check whether this storage entry is orphaned.
orphaned, err := m.orphaned(ctx, key, instanceAccountID)
if err != nil {
return fmt.Errorf("error checking orphaned status: %w", err)
}
if orphaned {
// Add this orphaned entry to list of keys.
orphanedKeys = append(orphanedKeys, key)
}
return nil
}); err != nil {
return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err)
}
totalPruned := len(orphanedKeys)
if dry {
// Dry run: don't remove anything.
return totalPruned, nil
}
// This is not a drill! We have to delete stuff!
return m.removeFiles(ctx, orphanedKeys...)
}
func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) {
pathParts := regexes.FilePath.FindStringSubmatch(key)
if len(pathParts) != 6 {
// This doesn't match our expectations so
// it wasn't created by gts; ignore it.
return false, nil
}
var (
mediaType = pathParts[2]
mediaID = pathParts[4]
orphaned = false
)
// Look for keys in storage that we don't have an attachment for.
switch Type(mediaType) {
case TypeAttachment, TypeHeader, TypeAvatar:
if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return false, fmt.Errorf("error calling GetAttachmentByID: %w", err)
}
orphaned = true
}
case TypeEmoji:
// Look using the static URL for the emoji. Emoji images can change, so
// the MEDIA_ID part of the key for emojis will not necessarily correspond
// to the file that's currently being used as the emoji image.
staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil {
if !errors.Is(err, db.ErrNoEntries) {
return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err)
}
orphaned = true
}
}
return orphaned, nil
}
// UncacheRemote uncaches all remote media attachments older than the given amount of days.
//
// In this context, uncacheing means deleting media files from storage and marking the attachment
// as cached=false in the database.
//
// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
//
// The returned int is the amount of media that was/would be uncached by this function.
func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) {
if olderThanDays < 0 {
return 0, nil
}
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
if dry {
// Dry run, just count eligible entries without removing them.
return m.state.DB.CountRemoteOlderThan(ctx, olderThan)
}
var (
totalPruned int
attachments []*gtsmodel.MediaAttachment
err error
)
for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
for _, attachment := range attachments {
if err := m.uncacheAttachment(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
// PruneUnusedLocal prunes unused media attachments that were uploaded by
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) {
olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays))
if dry {
// Dry run, just count eligible entries without removing them.
return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan)
}
var (
totalPruned int
attachments []*gtsmodel.MediaAttachment
err error
)
for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) {
olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
for _, attachment := range attachments {
if err := m.deleteAttachment(ctx, attachment); err != nil {
return totalPruned, err
}
totalPruned++
}
}
// Make sure we don't have a real error when we leave the loop.
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return totalPruned, err
}
return totalPruned, nil
}
/*
Handy little helpers
*/
func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
return err
}
// Delete attachment completely.
return m.state.DB.DeleteAttachment(ctx, attachment.ID)
}
func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
return err
}
// Update attachment to reflect that we no longer have it cached.
attachment.Cached = func() *bool { i := false; return &i }()
return m.state.DB.UpdateAttachment(ctx, attachment, "cached")
}
func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) {
errs := make(gtserror.MultiError, 0, len(keys))
for _, key := range keys {
if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) {
errs = append(errs, "storage error removing "+key+": "+err.Error())
}
}
return len(keys) - len(errs), errs.Combine()
}

View file

@ -1,357 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package media_test
import (
"bytes"
"context"
"io"
"os"
"testing"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/db"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
type PruneTestSuite struct {
MediaStandardTestSuite
}
func (suite *PruneTestSuite) TestPruneOrphanedDry() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
suite.FailNow(err.Error())
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
suite.FailNow(err.Error())
}
// dry run should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should still be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.True(hasKey)
}
func (suite *PruneTestSuite) TestPruneOrphanedMoist() {
// add a big orphan panda to store
b, err := os.ReadFile("./test/big-panda.gif")
if err != nil {
suite.FailNow(err.Error())
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
suite.FailNow(err.Error())
}
// should show up 1 orphaned panda
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
// panda should no longer be in storage
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
suite.NoError(err)
suite.False(hasKey)
}
func (suite *PruneTestSuite) TestPruneUnusedLocal() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(*testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestPruneUnusedLocalDry() {
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
suite.True(*testAttachment.Cached)
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true)
suite.NoError(err)
suite.Equal(1, totalPruned)
_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
suite.NoError(err)
}
func (suite *PruneTestSuite) TestPruneRemoteTwice() {
totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(1, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false)
suite.NoError(err)
suite.Equal(0, totalPrunedAgain)
}
func (suite *PruneTestSuite) TestPruneOneNonExistent() {
ctx := context.Background()
testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to prune for item with db entry no file
totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false)
suite.NoError(err)
suite.Equal(1, totalPruned)
}
func (suite *PruneTestSuite) TestPruneUnusedRemote() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestPruneUnusedRemoteTwice() {
ctx := context.Background()
// start by clearing zork's avatar + header
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// final prune should prune nothing, since the first prune already happened
totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(0, totalPruned)
}
func (suite *PruneTestSuite) TestPruneUnusedRemoteMultipleAccounts() {
ctx := context.Background()
// start by clearing zork's avatar + header
zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
zork := suite.testAccounts["local_account_1"]
zork.AvatarMediaAttachmentID = ""
zork.HeaderMediaAttachmentID = ""
if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
panic(err)
}
// set zork's unused header as belonging to turtle
turtle := suite.testAccounts["local_account_1"]
zorkOldHeader.AccountID = turtle.ID
if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil {
panic(err)
}
totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
suite.NoError(err)
suite.Equal(2, totalPruned)
// media should no longer be stored
_, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// attachments should no longer be in the db
_, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
suite.ErrorIs(err, db.ErrNoEntries)
_, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
suite.ErrorIs(err, db.ErrNoEntries)
}
func (suite *PruneTestSuite) TestUncacheRemote() {
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
}
func (suite *PruneTestSuite) TestUncacheRemoteDry() {
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, true)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
}
func (suite *PruneTestSuite) TestUncacheRemoteTwice() {
totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
// final uncache should uncache nothing, since the first uncache already happened
totalUncachedAgain, err := suite.manager.UncacheRemote(context.Background(), 1, false)
suite.NoError(err)
suite.Equal(0, totalUncachedAgain)
}
func (suite *PruneTestSuite) TestUncacheAndRecache() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testHeader := suite.testAttachments["remote_account_3_header"]
totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
// media should no longer be stored
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg")
if err != nil {
panic(err)
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
}
for _, original := range []*gtsmodel.MediaAttachment{
testStatusAttachment,
testHeader,
} {
processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID)
suite.NoError(err)
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(original.ID, recachedAttachment.ID)
suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
}
}
func (suite *PruneTestSuite) TestUncacheOneNonExistent() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to uncache remote for item with db entry no file
totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false)
suite.NoError(err)
suite.Equal(2, totalUncached)
}
func TestPruneOrphanedTestSuite(t *testing.T) {
suite.Run(t, &PruneTestSuite{})
}

View file

@ -52,7 +52,7 @@ func (m *Manager) RefetchEmojis(ctx context.Context, domain string, dereferenceM
// page through emojis 20 at a time, looking for those with missing images
for {
// Fetch next block of emojis from database
emojis, err := m.state.DB.GetEmojis(ctx, domain, false, true, "", maxShortcodeDomain, "", 20)
emojis, err := m.state.DB.GetEmojisBy(ctx, domain, false, true, "", maxShortcodeDomain, "", 20)
if err != nil {
if !errors.Is(err, db.ErrNoEntries) {
// an actual error has occurred