[bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853)

This commit is contained in:
kim 2023-06-22 20:46:36 +01:00 committed by GitHub
commit 9a22102fa8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 2076 additions and 1090 deletions

135
internal/cleaner/cleaner.go Normal file
View file

@ -0,0 +1,135 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cleaner
import (
"context"
"errors"
"time"
"codeberg.org/gruf/go-runners"
"codeberg.org/gruf/go-sched"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/state"
)
const (
selectLimit = 50
)
type Cleaner struct {
state *state.State
emoji Emoji
media Media
}
func New(state *state.State) *Cleaner {
c := new(Cleaner)
c.state = state
c.emoji.Cleaner = c
c.media.Cleaner = c
scheduleJobs(c)
return c
}
// Emoji returns the emoji set of cleaner utilities.
func (c *Cleaner) Emoji() *Emoji {
return &c.emoji
}
// Media returns the media set of cleaner utilities.
func (c *Cleaner) Media() *Media {
return &c.media
}
// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing.
func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) {
for _, file := range files {
// Check whether each file exists in storage.
have, err := c.state.Storage.Has(ctx, file)
if err != nil {
return false, gtserror.Newf("error checking storage for %s: %w", file, err)
} else if !have {
// Missing files, perform hook.
return true, onMissing()
}
}
return false, nil
}
// removeFiles removes the provided files, returning the number of them returned.
func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error) {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return len(files), nil
}
var errs gtserror.MultiError
for _, path := range files {
// Remove each provided storage path.
log.Debugf(ctx, "removing file: %s", path)
err := c.state.Storage.Delete(ctx, path)
if err != nil && !errors.Is(err, storage.ErrNotFound) {
errs.Appendf("error removing %s: %v", path, err)
}
}
// Calculate no. files removed.
diff := len(files) - len(errs)
// Wrap the combined error slice.
if err := errs.Combine(); err != nil {
return diff, gtserror.Newf("error(s) removing files: %w", err)
}
return diff, nil
}
func scheduleJobs(c *Cleaner) {
const day = time.Hour * 24
// Calculate closest midnight.
now := time.Now()
midnight := now.Round(day)
if midnight.Before(now) {
// since <= 11:59am rounds down.
midnight = midnight.Add(day)
}
// Get ctx associated with scheduler run state.
done := c.state.Workers.Scheduler.Done()
doneCtx := runners.CancelCtx(done)
// TODO: we'll need to do some thinking to make these
// jobs restartable if we want to implement reloads in
// the future that make call to Workers.Stop() -> Workers.Start().
// Schedule the cleaning tasks to execute every day at midnight.
c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) {
log.Info(nil, "starting media clean")
c.Media().All(doneCtx, config.GetMediaRemoteCacheDays())
c.Emoji().All(doneCtx)
log.Infof(nil, "finished media clean after %s", time.Since(start))
}).EveryAt(midnight, day))
}

238
internal/cleaner/emoji.go Normal file
View file

@ -0,0 +1,238 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cleaner
import (
"context"
"errors"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
// Emoji encompasses a set of
// emoji cleanup / admin utils.
type Emoji struct {
*Cleaner
}
// All will execute all cleaner.Emoji utilities synchronously, including output logging.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (e *Emoji) All(ctx context.Context) {
e.LogPruneMissing(ctx)
e.LogFixBroken(ctx)
}
// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome.
func (e *Emoji) LogPruneMissing(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.PruneMissing(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "pruned: %d", n)
}
}
// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome.
func (e *Emoji) LogFixBroken(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.FixBroken(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "fixed: %d", n)
}
}
// PruneMissing will delete emoji with missing files from the database and storage driver.
// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
break
}
// Use last as the next 'maxID' value.
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
// Check / fix missing emoji media.
fixed, err := e.pruneMissing(ctx, emoji)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
// FixBroken will check all emojis for valid related models (e.g. category).
// Broken media will be automatically updated to remove now-missing models.
// Context will be checked for `gtscontext.DryRun()` to perform the action.
func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
break
}
// Use last as the next 'maxID' value.
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
// Check / fix missing broken emoji.
fixed, err := e.fixBroken(ctx, emoji)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
return e.checkFiles(ctx, func() error {
// Emoji missing files, delete it.
// NOTE: if we ever support uncaching
// of emojis, change to e.uncache().
// In that case we should also rename
// this function to match the media
// equivalent -> fixCacheState().
log.WithContext(ctx).
WithField("emoji", emoji.ID).
Debug("deleting due to missing emoji")
return e.delete(ctx, emoji)
},
emoji.ImageStaticPath,
emoji.ImagePath,
)
}
func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
// Check we have the required category for emoji.
_, missing, err := e.getRelatedCategory(ctx, emoji)
if err != nil {
return false, err
}
if missing {
if !gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return true, nil
}
// Remove related category.
emoji.CategoryID = ""
// Update emoji model in the database to remove category ID.
log.Debugf(ctx, "fixing missing emoji category: %s", emoji.ID)
if err := e.state.DB.UpdateEmoji(ctx, emoji, "category_id"); err != nil {
return true, gtserror.Newf("error updating emoji: %w", err)
}
return true, nil
}
return false, nil
}
func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (*gtsmodel.EmojiCategory, bool, error) {
if emoji.CategoryID == "" {
// no related category.
return nil, false, nil
}
// Load the category related to this emoji.
category, err := e.state.DB.GetEmojiCategory(
gtscontext.SetBarebones(ctx),
emoji.CategoryID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, false, gtserror.Newf("error fetching category by id %s: %w", emoji.CategoryID, err)
}
if category == nil {
// Category is missing.
return nil, true, nil
}
return category, false, nil
}
func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return nil
}
// Remove emoji and static files.
_, err := e.removeFiles(ctx,
emoji.ImageStaticPath,
emoji.ImagePath,
)
if err != nil {
return gtserror.Newf("error removing emoji files: %w", err)
}
// Delete emoji entirely from the database by its ID.
if err := e.state.DB.DeleteEmojiByID(ctx, emoji.ID); err != nil {
return gtserror.Newf("error deleting emoji: %w", err)
}
return nil
}

547
internal/cleaner/media.go Normal file
View file

@ -0,0 +1,547 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cleaner
import (
"context"
"errors"
"time"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// Media encompasses a set of
// media cleanup / admin utils.
type Media struct {
*Cleaner
}
// All will execute all cleaner.Media utilities synchronously, including output logging.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) All(ctx context.Context, maxRemoteDays int) {
t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays))
m.LogUncacheRemote(ctx, t)
m.LogPruneOrphaned(ctx)
m.LogPruneUnused(ctx)
m.LogFixCacheStates(ctx)
_ = m.state.Storage.Storage.Clean(ctx)
}
// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome.
func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
if n, err := m.UncacheRemote(ctx, olderThan); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "uncached: %d", n)
}
}
// LogPruneOrphaned performs Media.PruneOrphaned(...), logging the start and outcome.
func (m *Media) LogPruneOrphaned(ctx context.Context) {
log.Info(ctx, "start")
if n, err := m.PruneOrphaned(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "pruned: %d", n)
}
}
// LogPruneUnused performs Media.PruneUnused(...), logging the start and outcome.
func (m *Media) LogPruneUnused(ctx context.Context) {
log.Info(ctx, "start")
if n, err := m.PruneUnused(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "pruned: %d", n)
}
}
// LogFixCacheStates performs Media.FixCacheStates(...), logging the start and outcome.
func (m *Media) LogFixCacheStates(ctx context.Context) {
log.Info(ctx, "start")
if n, err := m.FixCacheStates(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "fixed: %d", n)
}
}
// PruneOrphaned will delete orphaned files from storage (i.e. media missing a database entry).
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
var files []string
// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}
if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error {
if !regexes.FilePath.MatchString(path) {
// This is not our expected media
// path format, skip this one.
return nil
}
// Check whether this entry is orphaned.
orphaned, err := m.isOrphaned(ctx, path)
if err != nil {
return gtserror.Newf("error checking orphaned status: %w", err)
}
if orphaned {
// Add this orphaned entry.
files = append(files, path)
}
return nil
}); err != nil {
return 0, gtserror.Newf("error walking storage: %w", err)
}
// Delete all orphaned files from storage.
return m.removeFiles(ctx, files...)
}
// PruneUnused will delete all unused media attachments from the database and storage driver.
// Media is marked as unused if not attached to any status, account or account is suspended.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) PruneUnused(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of media attachments up to next max ID.
attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting attachments: %w", err)
}
if len(attachments) == 0 {
// reached end.
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
for _, media := range attachments {
// Check / prune unused media attachment.
fixed, err := m.pruneUnused(ctx, media)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
// UncacheRemote will uncache all remote media attachments older than given input time.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) {
var total int
// Drop time by a minute to improve search,
// (i.e. make it olderThan inclusive search).
olderThan = olderThan.Add(-time.Minute)
// Store recent time.
mostRecent := olderThan
for {
// Fetch the next batch of attachments older than last-set time.
attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote media: %w", err)
}
if len(attachments) == 0 {
// reached end.
break
}
// Use last created-at as the next 'olderThan' value.
olderThan = attachments[len(attachments)-1].CreatedAt
for _, media := range attachments {
// Check / uncache each remote media attachment.
uncached, err := m.uncacheRemote(ctx, mostRecent, media)
if err != nil {
return total, err
}
if uncached {
// Update
// count.
total++
}
}
}
return total, nil
}
// FixCacheStatus will check all media for up-to-date cache status (i.e. in storage driver).
// Media marked as cached, with any required files missing, will be automatically uncached.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (m *Media) FixCacheStates(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of media attachments up to next max ID.
attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting avatars / headers: %w", err)
}
if len(attachments) == 0 {
// reached end.
break
}
// Use last ID as the next 'maxID' value.
maxID = attachments[len(attachments)-1].ID
for _, media := range attachments {
// Check / fix required media cache states.
fixed, err := m.fixCacheState(ctx, media)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
func (m *Media) isOrphaned(ctx context.Context, path string) (bool, error) {
pathParts := regexes.FilePath.FindStringSubmatch(path)
if len(pathParts) != 6 {
// This doesn't match our expectations so
// it wasn't created by gts; ignore it.
return false, nil
}
var (
// 0th -> whole match
// 1st -> account ID
mediaType = pathParts[2]
// 3rd -> media sub-type (e.g. small, static)
mediaID = pathParts[4]
// 5th -> file extension
)
// Start a log entry for media.
l := log.WithContext(ctx).
WithField("media", mediaID)
switch media.Type(mediaType) {
case media.TypeAttachment:
// Look for media in database stored by ID.
media, err := m.state.DB.GetAttachmentByID(
gtscontext.SetBarebones(ctx),
mediaID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return false, gtserror.Newf("error fetching media by id %s: %w", mediaID, err)
}
if media == nil {
l.Debug("missing db entry for media")
return true, nil
}
case media.TypeEmoji:
// Generate static URL for this emoji to lookup.
staticURL := uris.GenerateURIForAttachment(
pathParts[1], // instance account ID
string(media.TypeEmoji),
string(media.SizeStatic),
mediaID,
"png",
)
// Look for emoji in database stored by static URL.
// The media ID part of the storage key for emojis can
// change for refreshed items, so search by generated URL.
emoji, err := m.state.DB.GetEmojiByStaticURL(
gtscontext.SetBarebones(ctx),
staticURL,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return false, gtserror.Newf("error fetching emoji by url %s: %w", staticURL, err)
}
if emoji == nil {
l.Debug("missing db entry for emoji")
return true, nil
}
}
return false, nil
}
func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) {
// Start a log entry for media.
l := log.WithContext(ctx).
WithField("media", media.ID)
// Check whether we have the required account for media.
account, missing, err := m.getRelatedAccount(ctx, media)
if err != nil {
return false, err
} else if missing {
l.Debug("deleting due to missing account")
return true, m.delete(ctx, media)
}
if account != nil {
// Related account exists for this media, check whether it is being used.
headerInUse := (*media.Header && media.ID == account.HeaderMediaAttachmentID)
avatarInUse := (*media.Avatar && media.ID == account.AvatarMediaAttachmentID)
if (headerInUse || avatarInUse) && account.SuspendedAt.IsZero() {
l.Debug("skipping as account media in use")
return false, nil
}
}
// Check whether we have the required status for media.
status, missing, err := m.getRelatedStatus(ctx, media)
if err != nil {
return false, err
} else if missing {
l.Debug("deleting due to missing status")
return true, m.delete(ctx, media)
}
if status != nil {
// Check whether still attached to status.
for _, id := range status.AttachmentIDs {
if id == media.ID {
l.Debug("skippping as attached to status")
return false, nil
}
}
}
// Media totally unused, delete it.
l.Debug("deleting unused media")
return true, m.delete(ctx, media)
}
func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) {
if !*media.Cached {
// We ignore uncached media, a
// false negative is a much better
// situation than a false positive,
// re-cache will just overwrite it.
return false, nil
}
// Start a log entry for media.
l := log.WithContext(ctx).
WithField("media", media.ID)
// Check whether we have the required account for media.
_, missingAccount, err := m.getRelatedAccount(ctx, media)
if err != nil {
return false, err
} else if missingAccount {
l.Debug("skipping due to missing account")
return false, nil
}
// Check whether we have the required status for media.
_, missingStatus, err := m.getRelatedStatus(ctx, media)
if err != nil {
return false, err
} else if missingStatus {
l.Debug("skipping due to missing status")
return false, nil
}
// So we know this a valid cached media entry.
// Check that we have the files on disk required....
return m.checkFiles(ctx, func() error {
l.Debug("uncaching due to missing media")
return m.uncache(ctx, media)
},
media.Thumbnail.Path,
media.File.Path,
)
}
func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) {
if !*media.Cached {
// Already uncached.
return false, nil
}
// Start a log entry for media.
l := log.WithContext(ctx).
WithField("media", media.ID)
// Check whether we have the required account for media.
account, missing, err := m.getRelatedAccount(ctx, media)
if err != nil {
return false, err
} else if missing {
l.Debug("skipping due to missing account")
return false, nil
}
if account != nil && account.FetchedAt.After(after) {
l.Debug("skipping due to recently fetched account")
return false, nil
}
// Check whether we have the required status for media.
status, missing, err := m.getRelatedStatus(ctx, media)
if err != nil {
return false, err
} else if missing {
l.Debug("skipping due to missing status")
return false, nil
}
if status != nil && status.FetchedAt.After(after) {
l.Debug("skipping due to recently fetched status")
return false, nil
}
// This media is too old, uncache it.
l.Debug("uncaching old remote media")
return true, m.uncache(ctx, media)
}
func (m *Media) getRelatedAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) {
if media.AccountID == "" {
// no related account.
return nil, false, nil
}
// Load the account related to this media.
account, err := m.state.DB.GetAccountByID(
gtscontext.SetBarebones(ctx),
media.AccountID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, false, gtserror.Newf("error fetching account by id %s: %w", media.AccountID, err)
}
if account == nil {
// account is missing.
return nil, true, nil
}
return account, false, nil
}
func (m *Media) getRelatedStatus(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Status, bool, error) {
if media.StatusID == "" {
// no related status.
return nil, false, nil
}
// Load the status related to this media.
status, err := m.state.DB.GetStatusByID(
gtscontext.SetBarebones(ctx),
media.StatusID,
)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, false, gtserror.Newf("error fetching status by id %s: %w", media.StatusID, err)
}
if status == nil {
// status is missing.
return nil, true, nil
}
return status, false, nil
}
func (m *Media) uncache(ctx context.Context, media *gtsmodel.MediaAttachment) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return nil
}
// Remove media and thumbnail.
_, err := m.removeFiles(ctx,
media.File.Path,
media.Thumbnail.Path,
)
if err != nil {
return gtserror.Newf("error removing media files: %w", err)
}
// Update attachment to reflect that we no longer have it cached.
log.Debugf(ctx, "marking media attachment as uncached: %s", media.ID)
media.Cached = func() *bool { i := false; return &i }()
if err := m.state.DB.UpdateAttachment(ctx, media, "cached"); err != nil {
return gtserror.Newf("error updating media: %w", err)
}
return nil
}
func (m *Media) delete(ctx context.Context, media *gtsmodel.MediaAttachment) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return nil
}
// Remove media and thumbnail.
_, err := m.removeFiles(ctx,
media.File.Path,
media.Thumbnail.Path,
)
if err != nil {
return gtserror.Newf("error removing media files: %w", err)
}
// Delete media attachment entirely from the database.
log.Debugf(ctx, "deleting media attachment: %s", media.ID)
if err := m.state.DB.DeleteAttachment(ctx, media.ID); err != nil {
return gtserror.Newf("error deleting media: %w", err)
}
return nil
}

View file

@ -0,0 +1,427 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cleaner_test
import (
"bytes"
"context"
"io"
"os"
"testing"
"time"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/cleaner"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/visibility"
"github.com/superseriousbusiness/gotosocial/testrig"
)
type MediaTestSuite struct {
suite.Suite
db db.DB
storage *storage.Driver
state state.State
manager *media.Manager
cleaner *cleaner.Cleaner
transportController transport.Controller
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
}
func TestMediaTestSuite(t *testing.T) {
suite.Run(t, &MediaTestSuite{})
}
func (suite *MediaTestSuite) SetupTest() {
testrig.InitTestConfig()
testrig.InitTestLog()
suite.state.Caches.Init()
testrig.StartWorkers(&suite.state)
suite.db = testrig.NewTestDB(&suite.state)
suite.storage = testrig.NewInMemoryStorage()
suite.state.DB = suite.db
suite.state.Storage = suite.storage
testrig.StandardStorageSetup(suite.storage, "../../testrig/media")
testrig.StandardDBSetup(suite.db, nil)
testrig.StartTimelines(
&suite.state,
visibility.NewFilter(&suite.state),
testrig.NewTestTypeConverter(suite.db),
)
suite.testAttachments = testrig.NewTestAttachments()
suite.testAccounts = testrig.NewTestAccounts()
suite.testEmojis = testrig.NewTestEmojis()
suite.manager = testrig.NewTestMediaManager(&suite.state)
suite.cleaner = cleaner.New(&suite.state)
suite.transportController = testrig.NewTestTransportController(&suite.state, testrig.NewMockHTTPClient(nil, "../../testrig/media"))
}
func (suite *MediaTestSuite) TearDownTest() {
testrig.StandardDBTeardown(suite.db)
testrig.StandardStorageTeardown(suite.storage)
testrig.StopWorkers(&suite.state)
}
// func (suite *MediaTestSuite) TestPruneOrphanedDry() {
// // add a big orphan panda to store
// b, err := os.ReadFile("../media/test/big-panda.gif")
// if err != nil {
// suite.FailNow(err.Error())
// }
// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
// suite.FailNow(err.Error())
// }
// ctx := context.Background()
// // dry run should show up 1 orphaned panda
// totalPruned, err := suite.cleaner.Media().PruneOrphaned(gtscontext.SetDryRun(ctx))
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// // panda should still be in storage
// hasKey, err := suite.storage.Has(ctx, pandaPath)
// suite.NoError(err)
// suite.True(hasKey)
// }
// func (suite *MediaTestSuite) TestPruneOrphanedMoist() {
// // i am not complicit in the moistness of this codebase :|
// // add a big orphan panda to store
// b, err := os.ReadFile("../media/test/big-panda.gif")
// if err != nil {
// suite.FailNow(err.Error())
// }
// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil {
// suite.FailNow(err.Error())
// }
// ctx := context.Background()
// // should show up 1 orphaned panda
// totalPruned, err := suite.cleaner.Media().PruneOrphaned(ctx)
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// // panda should no longer be in storage
// hasKey, err := suite.storage.Has(ctx, pandaPath)
// suite.NoError(err)
// suite.False(hasKey)
// }
// func (suite *MediaTestSuite) TestPruneUnusedLocal() {
// testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// suite.True(*testAttachment.Cached)
// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
// suite.ErrorIs(err, db.ErrNoEntries)
// }
// func (suite *MediaTestSuite) TestPruneUnusedLocalDry() {
// testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// suite.True(*testAttachment.Cached)
// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true)
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
// suite.NoError(err)
// }
// func (suite *MediaTestSuite) TestPruneRemoteTwice() {
// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false)
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// // final prune should prune nothing, since the first prune already happened
// totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false)
// suite.NoError(err)
// suite.Equal(0, totalPrunedAgain)
// }
// func (suite *MediaTestSuite) TestPruneOneNonExistent() {
// ctx := context.Background()
// testAttachment := suite.testAttachments["local_account_1_unattached_1"]
// // Delete this attachment cached on disk
// media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
// suite.NoError(err)
// suite.True(*media.Cached)
// err = suite.storage.Delete(ctx, media.File.Path)
// suite.NoError(err)
// // Now attempt to prune for item with db entry no file
// totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false)
// suite.NoError(err)
// suite.Equal(1, totalPruned)
// }
// func (suite *MediaTestSuite) TestPruneUnusedRemote() {
// ctx := context.Background()
// // start by clearing zork's avatar + header
// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
// zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
// zork := suite.testAccounts["local_account_1"]
// zork.AvatarMediaAttachmentID = ""
// zork.HeaderMediaAttachmentID = ""
// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
// panic(err)
// }
// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
// suite.NoError(err)
// suite.Equal(2, totalPruned)
// // media should no longer be stored
// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// // attachments should no longer be in the db
// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
// suite.ErrorIs(err, db.ErrNoEntries)
// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
// suite.ErrorIs(err, db.ErrNoEntries)
// }
// func (suite *MediaTestSuite) TestPruneUnusedRemoteTwice() {
// ctx := context.Background()
// // start by clearing zork's avatar + header
// zork := suite.testAccounts["local_account_1"]
// zork.AvatarMediaAttachmentID = ""
// zork.HeaderMediaAttachmentID = ""
// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
// panic(err)
// }
// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
// suite.NoError(err)
// suite.Equal(2, totalPruned)
// // final prune should prune nothing, since the first prune already happened
// totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false)
// suite.NoError(err)
// suite.Equal(0, totalPruned)
// }
// func (suite *MediaTestSuite) TestPruneUnusedRemoteMultipleAccounts() {
// ctx := context.Background()
// // start by clearing zork's avatar + header
// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"]
// zorkOldHeader := suite.testAttachments["local_account_1_avatar"]
// zork := suite.testAccounts["local_account_1"]
// zork.AvatarMediaAttachmentID = ""
// zork.HeaderMediaAttachmentID = ""
// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil {
// panic(err)
// }
// // set zork's unused header as belonging to turtle
// turtle := suite.testAccounts["local_account_1"]
// zorkOldHeader.AccountID = turtle.ID
// if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil {
// panic(err)
// }
// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false)
// suite.NoError(err)
// suite.Equal(2, totalPruned)
// // media should no longer be stored
// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path)
// suite.ErrorIs(err, storage.ErrNotFound)
// // attachments should no longer be in the db
// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID)
// suite.ErrorIs(err, db.ErrNoEntries)
// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID)
// suite.ErrorIs(err, db.ErrNoEntries)
// }
func (suite *MediaTestSuite) TestUncacheRemote() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
after := time.Now().Add(-24 * time.Hour)
totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID)
suite.NoError(err)
suite.False(*uncachedAttachment.Cached)
}
func (suite *MediaTestSuite) TestUncacheRemoteDry() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
suite.True(*testStatusAttachment.Cached)
testHeader := suite.testAttachments["remote_account_3_header"]
suite.True(*testHeader.Cached)
after := time.Now().Add(-24 * time.Hour)
totalUncached, err := suite.cleaner.Media().UncacheRemote(gtscontext.SetDryRun(ctx), after)
suite.NoError(err)
suite.Equal(2, totalUncached)
uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID)
suite.NoError(err)
suite.True(*uncachedAttachment.Cached)
}
func (suite *MediaTestSuite) TestUncacheRemoteTwice() {
ctx := context.Background()
after := time.Now().Add(-24 * time.Hour)
totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after)
suite.NoError(err)
suite.Equal(2, totalUncached)
// final uncache should uncache nothing, since the first uncache already happened
totalUncachedAgain, err := suite.cleaner.Media().UncacheRemote(ctx, after)
suite.NoError(err)
suite.Equal(0, totalUncachedAgain)
}
func (suite *MediaTestSuite) TestUncacheAndRecache() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
testHeader := suite.testAttachments["remote_account_3_header"]
after := time.Now().Add(-24 * time.Hour)
totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after)
suite.NoError(err)
suite.Equal(2, totalUncached)
// media should no longer be stored
_, err = suite.storage.Get(ctx, testStatusAttachment.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.File.Path)
suite.ErrorIs(err, storage.ErrNotFound)
_, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path)
suite.ErrorIs(err, storage.ErrNotFound)
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg")
if err != nil {
panic(err)
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
}
for _, original := range []*gtsmodel.MediaAttachment{
testStatusAttachment,
testHeader,
} {
processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID)
suite.NoError(err)
// synchronously load the recached attachment
recachedAttachment, err := processingRecache.LoadAttachment(ctx)
suite.NoError(err)
suite.NotNil(recachedAttachment)
// recachedAttachment should be basically the same as the old attachment
suite.True(*recachedAttachment.Cached)
suite.Equal(original.ID, recachedAttachment.ID)
suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place
suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail
suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same
// recached files should be back in storage
_, err = suite.storage.Get(ctx, recachedAttachment.File.Path)
suite.NoError(err)
_, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path)
suite.NoError(err)
}
}
func (suite *MediaTestSuite) TestUncacheOneNonExistent() {
ctx := context.Background()
testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"]
// Delete this attachment cached on disk
media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID)
suite.NoError(err)
suite.True(*media.Cached)
err = suite.storage.Delete(ctx, media.File.Path)
suite.NoError(err)
// Now attempt to uncache remote for item with db entry no file
after := time.Now().Add(-24 * time.Hour)
totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after)
suite.NoError(err)
suite.Equal(2, totalUncached)
}