mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-21 16:57:28 -06:00
[feature] Add admin media prune orphaned CLI command (#1146)
* add FilePath regex * add `admin media prune orphaned` command * add prune orphaned function to media manager * don't mark flag as required * document admin media prune orphaned cmd * oh envparsing.sh you coy minx
This commit is contained in:
parent
9e18c7f996
commit
13e9abd02a
11 changed files with 441 additions and 45 deletions
|
|
@ -125,14 +125,15 @@ type Configuration struct {
|
|||
SyslogProtocol string `name:"syslog-protocol" usage:"Protocol to use when directing logs to syslog. Leave empty to connect to local syslog."`
|
||||
SyslogAddress string `name:"syslog-address" usage:"Address:port to send syslog logs to. Leave empty to connect to local syslog."`
|
||||
|
||||
// TODO: move these elsewhere, these are more ephemeral vs long-running flags like above
|
||||
AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
|
||||
AdminAccountEmail string `name:"email" usage:"the email address of this account"`
|
||||
AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
|
||||
AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
|
||||
|
||||
AdvancedCookiesSamesite string `name:"advanced-cookies-samesite" usage:"'strict' or 'lax', see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite"`
|
||||
AdvancedRateLimitRequests int `name:"advanced-rate-limit-requests" usage:"Amount of HTTP requests to permit within a 5 minute window. 0 or less turns rate limiting off."`
|
||||
|
||||
// TODO: move these elsewhere, these are more ephemeral vs long-running flags like above
|
||||
AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
|
||||
AdminAccountEmail string `name:"email" usage:"the email address of this account"`
|
||||
AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
|
||||
AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
|
||||
AdminMediaPruneDryRun bool `name:"dry-run" usage:"perform a dry run and only log number of items eligible for pruning"`
|
||||
}
|
||||
|
||||
// MarshalMap will marshal current Configuration into a map structure (useful for JSON).
|
||||
|
|
|
|||
|
|
@ -178,3 +178,10 @@ func AddAdminTrans(cmd *cobra.Command) {
|
|||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// AddAdminMediaPrune attaches flags pertaining to media storage prune commands.
|
||||
func AddAdminMediaPrune(cmd *cobra.Command) {
|
||||
name := AdminMediaPruneDryRunFlag()
|
||||
usage := fieldtag("AdminMediaPruneDryRun", "usage")
|
||||
cmd.Flags().Bool(name, true, usage)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1745,6 +1745,56 @@ func GetSyslogAddress() string { return global.GetSyslogAddress() }
|
|||
// SetSyslogAddress safely sets the value for global configuration 'SyslogAddress' field
|
||||
func SetSyslogAddress(v string) { global.SetSyslogAddress(v) }
|
||||
|
||||
// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field
|
||||
func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) {
|
||||
st.mutex.Lock()
|
||||
v = st.config.AdvancedCookiesSamesite
|
||||
st.mutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field
|
||||
func (st *ConfigState) SetAdvancedCookiesSamesite(v string) {
|
||||
st.mutex.Lock()
|
||||
defer st.mutex.Unlock()
|
||||
st.config.AdvancedCookiesSamesite = v
|
||||
st.reloadToViper()
|
||||
}
|
||||
|
||||
// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field
|
||||
func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" }
|
||||
|
||||
// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field
|
||||
func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() }
|
||||
|
||||
// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field
|
||||
func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) }
|
||||
|
||||
// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field
|
||||
func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) {
|
||||
st.mutex.Lock()
|
||||
v = st.config.AdvancedRateLimitRequests
|
||||
st.mutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field
|
||||
func (st *ConfigState) SetAdvancedRateLimitRequests(v int) {
|
||||
st.mutex.Lock()
|
||||
defer st.mutex.Unlock()
|
||||
st.config.AdvancedRateLimitRequests = v
|
||||
st.reloadToViper()
|
||||
}
|
||||
|
||||
// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field
|
||||
func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" }
|
||||
|
||||
// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field
|
||||
func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() }
|
||||
|
||||
// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field
|
||||
func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) }
|
||||
|
||||
// GetAdminAccountUsername safely fetches the Configuration value for state's 'AdminAccountUsername' field
|
||||
func (st *ConfigState) GetAdminAccountUsername() (v string) {
|
||||
st.mutex.Lock()
|
||||
|
|
@ -1845,52 +1895,27 @@ func GetAdminTransPath() string { return global.GetAdminTransPath() }
|
|||
// SetAdminTransPath safely sets the value for global configuration 'AdminTransPath' field
|
||||
func SetAdminTransPath(v string) { global.SetAdminTransPath(v) }
|
||||
|
||||
// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field
|
||||
func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) {
|
||||
// GetAdminMediaPruneDryRun safely fetches the Configuration value for state's 'AdminMediaPruneDryRun' field
|
||||
func (st *ConfigState) GetAdminMediaPruneDryRun() (v bool) {
|
||||
st.mutex.Lock()
|
||||
v = st.config.AdvancedCookiesSamesite
|
||||
v = st.config.AdminMediaPruneDryRun
|
||||
st.mutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field
|
||||
func (st *ConfigState) SetAdvancedCookiesSamesite(v string) {
|
||||
// SetAdminMediaPruneDryRun safely sets the Configuration value for state's 'AdminMediaPruneDryRun' field
|
||||
func (st *ConfigState) SetAdminMediaPruneDryRun(v bool) {
|
||||
st.mutex.Lock()
|
||||
defer st.mutex.Unlock()
|
||||
st.config.AdvancedCookiesSamesite = v
|
||||
st.config.AdminMediaPruneDryRun = v
|
||||
st.reloadToViper()
|
||||
}
|
||||
|
||||
// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field
|
||||
func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" }
|
||||
// AdminMediaPruneDryRunFlag returns the flag name for the 'AdminMediaPruneDryRun' field
|
||||
func AdminMediaPruneDryRunFlag() string { return "dry-run" }
|
||||
|
||||
// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field
|
||||
func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() }
|
||||
// GetAdminMediaPruneDryRun safely fetches the value for global configuration 'AdminMediaPruneDryRun' field
|
||||
func GetAdminMediaPruneDryRun() bool { return global.GetAdminMediaPruneDryRun() }
|
||||
|
||||
// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field
|
||||
func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) }
|
||||
|
||||
// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field
|
||||
func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) {
|
||||
st.mutex.Lock()
|
||||
v = st.config.AdvancedRateLimitRequests
|
||||
st.mutex.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field
|
||||
func (st *ConfigState) SetAdvancedRateLimitRequests(v int) {
|
||||
st.mutex.Lock()
|
||||
defer st.mutex.Unlock()
|
||||
st.config.AdvancedRateLimitRequests = v
|
||||
st.reloadToViper()
|
||||
}
|
||||
|
||||
// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field
|
||||
func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" }
|
||||
|
||||
// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field
|
||||
func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() }
|
||||
|
||||
// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field
|
||||
func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) }
|
||||
// SetAdminMediaPruneDryRun safely sets the value for global configuration 'AdminMediaPruneDryRun' field
|
||||
func SetAdminMediaPruneDryRun(v bool) { global.SetAdminMediaPruneDryRun(v) }
|
||||
|
|
|
|||
|
|
@ -91,6 +91,12 @@ type Manager interface {
|
|||
//
|
||||
// The returned int is the amount of media that was pruned by this function.
|
||||
PruneUnusedLocalAttachments(ctx context.Context) (int, error)
|
||||
// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
|
||||
// entry in the database.
|
||||
//
|
||||
// If dry is true, then nothing will be changed, only the amount that *would* be removed
|
||||
// is returned to the caller.
|
||||
PruneOrphaned(ctx context.Context, dry bool) (int, error)
|
||||
|
||||
// Stop stops the underlying worker pool of the manager. It should be called
|
||||
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
|
||||
|
|
|
|||
127
internal/media/pruneorphaned.go
Normal file
127
internal/media/pruneorphaned.go
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
GoToSocial
|
||||
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package media
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
)
|
||||
|
||||
func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
|
||||
var totalPruned int
|
||||
|
||||
// keys in storage will look like the following:
|
||||
// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]`
|
||||
// we can filter out keys we're not interested in by
|
||||
// matching through a regex
|
||||
var matchCount int
|
||||
match := func(storageKey string) bool {
|
||||
if regexes.FilePath.MatchString(storageKey) {
|
||||
matchCount++
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
log.Info("checking storage keys for orphaned pruning candidates...")
|
||||
iterator, err := m.storage.Iterator(ctx, match)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %s", err)
|
||||
}
|
||||
|
||||
// make sure we have some keys, and also advance
|
||||
// the iterator to the first non-empty key
|
||||
if !iterator.Next() {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// for each key in the iterator, check if entry is orphaned
|
||||
log.Info("got %d orphaned pruning candidates, checking for orphaned status, please wait...")
|
||||
var checkedKeys int
|
||||
orphanedKeys := make([]string, 0, matchCount)
|
||||
for key := iterator.Key(); iterator.Next(); key = iterator.Key() {
|
||||
if m.orphaned(ctx, key) {
|
||||
orphanedKeys = append(orphanedKeys, key)
|
||||
}
|
||||
checkedKeys++
|
||||
if checkedKeys%50 == 0 {
|
||||
log.Infof("checked %d of %d orphaned pruning candidates...", checkedKeys, matchCount)
|
||||
}
|
||||
}
|
||||
iterator.Release()
|
||||
|
||||
if !dry {
|
||||
// the real deal, we have to delete stuff
|
||||
for _, key := range orphanedKeys {
|
||||
log.Infof("key %s corresponds to orphaned media, will remove it now", key)
|
||||
if err := m.storage.Delete(ctx, key); err != nil {
|
||||
log.Errorf("error deleting item with key %s from storage: %s", key, err)
|
||||
continue
|
||||
}
|
||||
totalPruned++
|
||||
}
|
||||
} else {
|
||||
// just a dry run, don't delete anything
|
||||
for _, key := range orphanedKeys {
|
||||
log.Infof("DRY RUN: key %s corresponds to orphaned media which would be deleted", key)
|
||||
totalPruned++
|
||||
}
|
||||
}
|
||||
|
||||
return totalPruned, nil
|
||||
}
|
||||
|
||||
func (m *manager) orphaned(ctx context.Context, key string) bool {
|
||||
pathParts := regexes.FilePath.FindStringSubmatch(key)
|
||||
if len(pathParts) != 6 {
|
||||
return false
|
||||
}
|
||||
|
||||
mediaType := pathParts[2]
|
||||
mediaID := pathParts[4]
|
||||
|
||||
var orphaned bool
|
||||
switch Type(mediaType) {
|
||||
case TypeAttachment, TypeHeader, TypeAvatar:
|
||||
if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil {
|
||||
if errors.Is(err, db.ErrNoEntries) {
|
||||
orphaned = true
|
||||
} else {
|
||||
log.Errorf("orphaned: error calling GetAttachmentByID: %s", err)
|
||||
}
|
||||
}
|
||||
case TypeEmoji:
|
||||
if _, err := m.db.GetEmojiByID(ctx, mediaID); err != nil {
|
||||
if errors.Is(err, db.ErrNoEntries) {
|
||||
orphaned = true
|
||||
} else {
|
||||
log.Errorf("orphaned: error calling GetEmojiByID: %s", err)
|
||||
}
|
||||
}
|
||||
default:
|
||||
orphaned = true
|
||||
}
|
||||
|
||||
return orphaned
|
||||
}
|
||||
82
internal/media/pruneorphaned_test.go
Normal file
82
internal/media/pruneorphaned_test.go
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
GoToSocial
|
||||
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package media_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
type PruneOrphanedTestSuite struct {
|
||||
MediaStandardTestSuite
|
||||
}
|
||||
|
||||
func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() {
|
||||
// add a big orphan panda to store
|
||||
b, err := os.ReadFile("./test/big-panda.gif")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
|
||||
if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// dry run should show up 1 orphaned panda
|
||||
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true)
|
||||
suite.NoError(err)
|
||||
suite.Equal(1, totalPruned)
|
||||
|
||||
// panda should still be in storage
|
||||
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
|
||||
suite.NoError(err)
|
||||
suite.True(hasKey)
|
||||
}
|
||||
|
||||
func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() {
|
||||
// add a big orphan panda to store
|
||||
b, err := os.ReadFile("./test/big-panda.gif")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
|
||||
if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// should show up 1 orphaned panda
|
||||
totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false)
|
||||
suite.NoError(err)
|
||||
suite.Equal(1, totalPruned)
|
||||
|
||||
// panda should no longer be in storage
|
||||
hasKey, err := suite.storage.Has(context.Background(), pandaPath)
|
||||
suite.NoError(err)
|
||||
suite.False(hasKey)
|
||||
}
|
||||
|
||||
func TestPruneOrphanedTestSuite(t *testing.T) {
|
||||
suite.Run(t, &PruneOrphanedTestSuite{})
|
||||
}
|
||||
|
|
@ -140,6 +140,13 @@ var (
|
|||
// BlockPath parses a path that validates and captures the username part and the ulid part
|
||||
// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
|
||||
BlockPath = regexp.MustCompile(blockPath)
|
||||
|
||||
filePath = fmt.Sprintf(`^(%s)/([a-z]+)/([a-z]+)/(%s)\.([a-z]+)$`, ulid, ulid)
|
||||
// FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]
|
||||
// eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg
|
||||
// It captures the account id, media type, media size, file name, and file extension, eg
|
||||
// `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`.
|
||||
FilePath = regexp.MustCompile(filePath)
|
||||
)
|
||||
|
||||
// bufpool is a memory pool of byte buffers for use in our regex utility functions.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue