[feature/performance] support uncaching remote emoji + scheduled cleanup functions (#1987)

This commit is contained in:
kim 2023-07-24 13:14:13 +01:00 committed by GitHub
commit 9eff0d46e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 1287 additions and 219 deletions

View file

@ -20,6 +20,7 @@ package cleaner
import (
"context"
"errors"
"time"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
@ -36,22 +37,26 @@ type Emoji struct {
// All will execute all cleaner.Emoji utilities synchronously, including output logging.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (e *Emoji) All(ctx context.Context) {
e.LogPruneMissing(ctx)
func (e *Emoji) All(ctx context.Context, maxRemoteDays int) {
t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays))
e.LogUncacheRemote(ctx, t)
e.LogFixBroken(ctx)
e.LogPruneUnused(ctx)
e.LogFixCacheStates(ctx)
_ = e.state.Storage.Storage.Clean(ctx)
}
// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome.
func (e *Emoji) LogPruneMissing(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.PruneMissing(ctx); err != nil {
// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome.
func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
if n, err := e.UncacheRemote(ctx, olderThan); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "pruned: %d", n)
log.Infof(ctx, "uncached: %d", n)
}
}
// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome.
// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome.
func (e *Emoji) LogFixBroken(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.FixBroken(ctx); err != nil {
@ -61,20 +66,43 @@ func (e *Emoji) LogFixBroken(ctx context.Context) {
}
}
// PruneMissing will delete emoji with missing files from the database and storage driver.
// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
var (
total int
maxID string
)
// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome.
func (e *Emoji) LogPruneUnused(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.PruneUnused(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "pruned: %d", n)
}
}
// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome.
func (e *Emoji) LogFixCacheStates(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.FixCacheStates(ctx); err != nil {
log.Error(ctx, err)
} else {
log.Infof(ctx, "fixed: %d", n)
}
}
// UncacheRemote will uncache all remote emoji older than given input time. Context
// will be checked for `gtscontext.DryRun()` in order to actually perform the action.
func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) {
var total int
// Drop time by a minute to improve search,
// (i.e. make it olderThan inclusive search).
olderThan = olderThan.Add(-time.Minute)
// Store recent time.
mostRecent := olderThan
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit)
// Fetch the next batch of cached emojis older than last-set time.
emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting emojis: %w", err)
return total, gtserror.Newf("error getting remote emoji: %w", err)
}
if len(emojis) == 0 {
@ -82,17 +110,20 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
break
}
// Use last as the next 'maxID' value.
maxID = emojis[len(emojis)-1].ID
// Use last created-at as the next 'olderThan' value.
olderThan = emojis[len(emojis)-1].CreatedAt
for _, emoji := range emojis {
// Check / fix missing emoji media.
fixed, err := e.pruneMissing(ctx, emoji)
// Check / uncache each remote emoji.
uncached, err := e.uncacheRemote(ctx,
mostRecent,
emoji,
)
if err != nil {
return total, err
}
if fixed {
if uncached {
// Update
// count.
total++
@ -145,22 +176,197 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
return total, nil
}
func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
return e.checkFiles(ctx, func() error {
// Emoji missing files, delete it.
// NOTE: if we ever support uncaching
// of emojis, change to e.uncache().
// In that case we should also rename
// this function to match the media
// equivalent -> fixCacheState().
log.WithContext(ctx).
WithField("emoji", emoji.ID).
Debug("deleting due to missing emoji")
return e.delete(ctx, emoji)
},
// PruneUnused will delete all unused emoji media from the database and storage driver.
// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
break
}
// Use last as the next 'maxID' value.
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
// Check / prune unused emoji media.
fixed, err := e.pruneUnused(ctx, emoji)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver).
// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) {
var (
total int
maxID string
)
for {
// Fetch the next batch of emoji media up to next ID.
emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return total, gtserror.Newf("error getting remote emojis: %w", err)
}
if len(emojis) == 0 {
// reached end.
break
}
// Use last as the next 'maxID' value.
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
// Check / fix required emoji cache states.
fixed, err := e.fixCacheState(ctx, emoji)
if err != nil {
return total, err
}
if fixed {
// Update
// count.
total++
}
}
}
return total, nil
}
func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
// Start a log entry for emoji.
l := log.WithContext(ctx).
WithField("emoji", emoji.ID)
// Load any related accounts using this emoji.
accounts, err := e.getRelatedAccounts(ctx, emoji)
if err != nil {
return false, err
} else if len(accounts) > 0 {
l.Debug("skipping as account emoji in use")
return false, nil
}
// Load any related statuses using this emoji.
statuses, err := e.getRelatedStatuses(ctx, emoji)
if err != nil {
return false, err
} else if len(statuses) > 0 {
l.Debug("skipping as status emoji in use")
return false, nil
}
// Check not recently created, give it some time to be "used" again.
if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) {
l.Debug("skipping due to recently created")
return false, nil
}
// Emoji totally unused, delete it.
l.Debug("deleting unused emoji")
return true, e.delete(ctx, emoji)
}
func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
// Start a log entry for emoji.
l := log.WithContext(ctx).
WithField("emoji", emoji.ID)
// Check whether files exist.
exist, err := e.haveFiles(ctx,
emoji.ImageStaticPath,
emoji.ImagePath,
)
if err != nil {
return false, err
}
switch {
case *emoji.Cached && !exist:
// Mark as uncached if expected files don't exist.
l.Debug("cached=true exists=false => marking uncached")
return true, e.uncache(ctx, emoji)
case !*emoji.Cached && exist:
// Remove files if we don't expect them to exist.
l.Debug("cached=false exists=true => removing files")
_, err := e.removeFiles(ctx,
emoji.ImageStaticPath,
emoji.ImagePath,
)
return true, err
default:
return false, nil
}
}
func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) {
if !*emoji.Cached {
// Already uncached.
return false, nil
}
// Start a log entry for emoji.
l := log.WithContext(ctx).
WithField("emoji", emoji.ID)
// Load any related accounts using this emoji.
accounts, err := e.getRelatedAccounts(ctx, emoji)
if err != nil {
return false, err
}
for _, account := range accounts {
if account.FetchedAt.After(after) {
l.Debug("skipping due to recently fetched account")
return false, nil
}
}
// Load any related statuses using this emoji.
statuses, err := e.getRelatedStatuses(ctx, emoji)
if err != nil {
return false, err
}
for _, status := range statuses {
if status.FetchedAt.After(after) {
l.Debug("skipping due to recently fetched status")
return false, nil
}
}
// This emoji is too old, uncache it.
l.Debug("uncaching old remote emoji")
return true, e.uncache(ctx, emoji)
}
func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (
return category, false, nil
}
func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) {
accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID)
if err != nil {
return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err)
}
return accounts, nil
}
func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) {
statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID)
if err != nil {
return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err)
}
return statuses, nil
}
func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
return nil
}
// Remove emoji and static.
_, err := e.removeFiles(ctx,
emoji.ImagePath,
emoji.ImageStaticPath,
)
if err != nil {
return gtserror.Newf("error removing emoji files: %w", err)
}
// Update emoji to reflect that we no longer have it cached.
log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID)
emoji.Cached = func() *bool { i := false; return &i }()
if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil {
return gtserror.Newf("error updating emoji: %w", err)
}
return nil
}
func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.