From b13a6437ff2da5c802a218e8938ab2eb805f52ef Mon Sep 17 00:00:00 2001 From: kim Date: Fri, 6 Jun 2025 16:35:39 +0200 Subject: [PATCH] [chore] move s3 storage key prefixing into the storage library itself (#4246) This is just a useful feature that it seemed more semantically correct to have in the storage library itself! Still, thank you to @vdyotte for the original change :) Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4246 Co-authored-by: kim Co-committed-by: kim --- go.mod | 2 +- go.sum | 4 +- internal/cleaner/media.go | 7 +- internal/storage/storage.go | 13 +-- vendor/codeberg.org/gruf/go-storage/s3/s3.go | 85 ++++++++++++++++---- vendor/modules.txt | 2 +- 6 files changed, 79 insertions(+), 34 deletions(-) diff --git a/go.mod b/go.mod index 7a9211f68..ac9206afe 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( codeberg.org/gruf/go-runners v1.6.3 codeberg.org/gruf/go-sched v1.2.4 codeberg.org/gruf/go-split v1.2.0 - codeberg.org/gruf/go-storage v0.2.1 + codeberg.org/gruf/go-storage v0.3.1 codeberg.org/gruf/go-structr v0.9.7 github.com/DmitriyVTitov/size v1.5.0 github.com/KimMachineGun/automemlimit v0.7.2 diff --git a/go.sum b/go.sum index b3866bb66..0db72cc56 100644 --- a/go.sum +++ b/go.sum @@ -50,8 +50,8 @@ codeberg.org/gruf/go-sched v1.2.4 h1:ddBB9o0D/2oU8NbQ0ldN5aWxogpXPRBATWi58+p++Hw codeberg.org/gruf/go-sched v1.2.4/go.mod h1:wad6l+OcYGWMA2TzNLMmLObsrbBDxdJfEy5WvTgBjNk= codeberg.org/gruf/go-split v1.2.0 h1:PmzL23nVEVHm8VxjsJmv4m4wGQz2bGgQw52dgSSj65c= codeberg.org/gruf/go-split v1.2.0/go.mod h1:0rejWJpqvOoFAd7nwm5tIXYKaAqjtFGOXmTqQV+VO38= -codeberg.org/gruf/go-storage v0.2.1 h1:AHVU+7ZKpaL6fTzcH7GU+JwY3HQVYOZ84U4HV//K1GA= -codeberg.org/gruf/go-storage v0.2.1/go.mod h1:zJ5Nd2rKv0R5vF1rYbH+IEggUx8cIv72Vj2d8e//IAw= +codeberg.org/gruf/go-storage v0.3.1 h1:g66UIM/xXnEk9ejT+W0T9s/PODBZhXa/8ajzeY/MELI= +codeberg.org/gruf/go-storage v0.3.1/go.mod h1:r43n/zi7YGOCl2iSl7AMI27D1zcWS65Bi2+5xDzypeo= codeberg.org/gruf/go-structr v0.9.7 h1:yQeIxTjYb6reNdgESk915twyjolydYBqat/mlZrP7bg= codeberg.org/gruf/go-structr v0.9.7/go.mod h1:9k5hYztZ4PsBS+m1v5hUTeFiVUBTLF5VA7d9cd1OEMs= codeberg.org/superseriousbusiness/go-swagger v0.31.0-gts-go1.23-fix h1:+JvBZqsQfdT+ROnk2DkvXsKQ9QBorKKKBk5fBqw62I8= diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 043d7cf5f..84473bc22 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -20,7 +20,6 @@ package cleaner import ( "context" "errors" - "strings" "time" "code.superseriousbusiness.org/gotosocial/internal/db" @@ -94,10 +93,10 @@ func (m *Media) LogFixCacheStates(ctx context.Context) { func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { var files []string - // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} + // All media in storage will have path: {$account}/{$type}/{$size}/{$id}.{$ext} if err := m.state.Storage.WalkKeys(ctx, func(path string) error { - // Check for our expected fileserver path format. - path = strings.TrimPrefix(path, m.state.Storage.KeyPrefix) + + // Check for expected fileserver path format. if !regexes.FilePath.MatchString(path) { log.Warnf(ctx, "unexpected storage item: %s", path) return nil diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 32e9ccd27..6c645f0ff 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -78,32 +78,28 @@ type Driver struct { // S3-only parameters Proxy bool Bucket string - KeyPrefix string PresignedCache *ttl.Cache[string, PresignedURL] RedirectURL string } // Get returns the byte value for key in storage. func (d *Driver) Get(ctx context.Context, key string) ([]byte, error) { - key = d.KeyPrefix + key return d.Storage.ReadBytes(ctx, key) } // GetStream returns an io.ReadCloser for the value bytes at key in the storage. func (d *Driver) GetStream(ctx context.Context, key string) (io.ReadCloser, error) { - key = d.KeyPrefix + key return d.Storage.ReadStream(ctx, key) } // Put writes the supplied value bytes at key in the storage func (d *Driver) Put(ctx context.Context, key string, value []byte) (int, error) { - key = d.KeyPrefix + key return d.Storage.WriteBytes(ctx, key, value) } // PutFile moves the contents of file at path, to storage.Driver{} under given key (with content-type if supported). func (d *Driver) PutFile(ctx context.Context, key, filepath, contentType string) (int64, error) { - key = d.KeyPrefix + key + // Open file at path for reading. file, err := os.Open(filepath) if err != nil { @@ -149,13 +145,11 @@ func (d *Driver) PutFile(ctx context.Context, key, filepath, contentType string) // Delete attempts to remove the supplied key (and corresponding value) from storage. func (d *Driver) Delete(ctx context.Context, key string) error { - key = d.KeyPrefix + key return d.Storage.Remove(ctx, key) } // Has checks if the supplied key is in the storage. func (d *Driver) Has(ctx context.Context, key string) (bool, error) { - key = d.KeyPrefix + key stat, err := d.Storage.Stat(ctx, key) return (stat != nil), err } @@ -163,7 +157,6 @@ func (d *Driver) Has(ctx context.Context, key string) (bool, error) { // WalkKeys walks the keys in the storage. func (d *Driver) WalkKeys(ctx context.Context, walk func(string) error) error { return d.Storage.WalkKeys(ctx, storage.WalkKeysOpts{ - Prefix: d.KeyPrefix, Step: func(entry storage.Entry) error { return walk(entry.Key) }, @@ -172,7 +165,7 @@ func (d *Driver) WalkKeys(ctx context.Context, walk func(string) error) error { // URL will return a presigned GET object URL, but only if running on S3 storage with proxying disabled. func (d *Driver) URL(ctx context.Context, key string) *PresignedURL { - key = d.KeyPrefix + key + // Check whether S3 *without* proxying is enabled s3, ok := d.Storage.(*s3.S3Storage) if !ok || d.Proxy { @@ -339,6 +332,7 @@ func NewS3Storage() (*Driver, error) { // Open the s3 storage implementation s3, err := s3.Open(endpoint, bucket, &s3.Config{ + KeyPrefix: config.GetStorageS3KeyPrefix(), CoreOpts: minio.Options{ Creds: credentials.NewStaticV4(access, secret, ""), Secure: secure, @@ -358,7 +352,6 @@ func NewS3Storage() (*Driver, error) { return &Driver{ Proxy: config.GetStorageS3Proxy(), Bucket: config.GetStorageS3BucketName(), - KeyPrefix: config.GetStorageS3KeyPrefix(), Storage: s3, PresignedCache: presignedCache, RedirectURL: redirectURL, diff --git a/vendor/codeberg.org/gruf/go-storage/s3/s3.go b/vendor/codeberg.org/gruf/go-storage/s3/s3.go index ad7686737..c53560161 100644 --- a/vendor/codeberg.org/gruf/go-storage/s3/s3.go +++ b/vendor/codeberg.org/gruf/go-storage/s3/s3.go @@ -6,10 +6,12 @@ import ( "errors" "io" "net/http" + "strings" "codeberg.org/gruf/go-storage" "codeberg.org/gruf/go-storage/internal" "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/s3utils" ) // ensure S3Storage conforms to storage.Storage. @@ -42,6 +44,13 @@ var defaultConfig = Config{ // Config defines options to be used when opening an S3Storage, // mostly options for underlying S3 client library. type Config struct { + + // KeyPrefix allows specifying a + // prefix applied to all S3 object + // requests, e.g. allowing you to + // partition a bucket by key prefix. + KeyPrefix string + // CoreOpts are S3 client options // passed during initialization. CoreOpts minio.Options @@ -78,6 +87,7 @@ func getS3Config(cfg *Config) Config { } return Config{ + KeyPrefix: cfg.KeyPrefix, CoreOpts: cfg.CoreOpts, PutChunkSize: cfg.PutChunkSize, ListSize: cfg.ListSize, @@ -94,17 +104,22 @@ type S3Storage struct { // Open opens a new S3Storage instance with given S3 endpoint URL, bucket name and configuration. func Open(endpoint string, bucket string, cfg *Config) (*S3Storage, error) { - // Check + set config defaults. + ctx := context.Background() + + // Check/set config defaults. config := getS3Config(cfg) + // Validate configured key prefix (if any), handles case of an empty string. + if err := s3utils.CheckValidObjectNamePrefix(config.KeyPrefix); err != nil { + return nil, err + } + // Create new S3 client connection to given endpoint. client, err := minio.NewCore(endpoint, &config.CoreOpts) if err != nil { return nil, err } - ctx := context.Background() - // Check that provided bucket actually exists. exists, err := client.BucketExists(ctx, bucket) if err != nil { @@ -132,7 +147,8 @@ func (st *S3Storage) Clean(ctx context.Context) error { // ReadBytes: implements Storage.ReadBytes(). func (st *S3Storage) ReadBytes(ctx context.Context, key string) ([]byte, error) { - // Get stream reader for key + + // Get stream reader for key. rc, err := st.ReadStream(ctx, key) if err != nil { return nil, err @@ -162,6 +178,9 @@ func (st *S3Storage) ReadStream(ctx context.Context, key string) (io.ReadCloser, // GetObject wraps minio.Core{}.GetObject() to handle wrapping with our own storage library error types. func (st *S3Storage) GetObject(ctx context.Context, key string, opts minio.GetObjectOptions) (io.ReadCloser, minio.ObjectInfo, http.Header, error) { + // Update given key with prefix. + key = st.config.KeyPrefix + key + // Query bucket for object data and info. rc, info, hdr, err := st.client.GetObject( ctx, @@ -199,6 +218,10 @@ func (st *S3Storage) WriteStream(ctx context.Context, key string, r io.Reader) ( // PutObject wraps minio.Core{}.PutObject() to handle wrapping with our own storage library error types, and in the case of an io.Reader // that does not implement ReaderSize{}, it will instead handle upload by using minio.Core{}.NewMultipartUpload() in chunks of PutChunkSize. func (st *S3Storage) PutObject(ctx context.Context, key string, r io.Reader, opts minio.PutObjectOptions) (minio.UploadInfo, error) { + + // Update given key with prefix. + key = st.config.KeyPrefix + key + if rs, ok := r.(ReaderSize); ok { // This reader supports providing us the size of // the encompassed data, allowing us to perform @@ -358,6 +381,9 @@ func (st *S3Storage) Stat(ctx context.Context, key string) (*storage.Entry, erro // StatObject wraps minio.Core{}.StatObject() to handle wrapping with our own storage library error types. func (st *S3Storage) StatObject(ctx context.Context, key string, opts minio.StatObjectOptions) (minio.ObjectInfo, error) { + // Update given key with prefix. + key = st.config.KeyPrefix + key + // Query bucket for object info. info, err := st.client.StatObject( ctx, @@ -392,6 +418,9 @@ func (st *S3Storage) Remove(ctx context.Context, key string) error { // RemoveObject wraps minio.Core{}.RemoveObject() to handle wrapping with our own storage library error types. func (st *S3Storage) RemoveObject(ctx context.Context, key string, opts minio.RemoveObjectOptions) error { + // Update given key with prefix. + key = st.config.KeyPrefix + key + // Remove object from S3 bucket err := st.client.RemoveObject( ctx, @@ -426,6 +455,9 @@ func (st *S3Storage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) er token string ) + // Update options prefix to include global prefix. + opts.Prefix = st.config.KeyPrefix + opts.Prefix + for { // List objects in bucket starting at marker. result, err := st.client.ListObjectsV2( @@ -440,21 +472,42 @@ func (st *S3Storage) WalkKeys(ctx context.Context, opts storage.WalkKeysOpts) er return err } - // Iterate through list result contents. - for _, obj := range result.Contents { + // Iterate through list contents. + // + // Use different loops depending + // on if filter func was provided, + // to reduce loop operations. + if opts.Filter != nil { + for _, obj := range result.Contents { + // Trim any global prefix from returned object key. + key := strings.TrimPrefix(obj.Key, st.config.KeyPrefix) - // Skip filtered obj keys. - if opts.Filter != nil && - opts.Filter(obj.Key) { - continue + // Skip filtered obj keys. + if opts.Filter != nil && + opts.Filter(key) { + continue + } + + // Pass each obj through step func. + if err := opts.Step(storage.Entry{ + Key: key, + Size: obj.Size, + }); err != nil { + return err + } } + } else { + for _, obj := range result.Contents { + // Trim any global prefix from returned object key. + key := strings.TrimPrefix(obj.Key, st.config.KeyPrefix) - // Pass each obj through step func. - if err := opts.Step(storage.Entry{ - Key: obj.Key, - Size: obj.Size, - }); err != nil { - return err + // Pass each obj through step func. + if err := opts.Step(storage.Entry{ + Key: key, + Size: obj.Size, + }); err != nil { + return err + } } } diff --git a/vendor/modules.txt b/vendor/modules.txt index 38a368696..37d6ae13c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -276,7 +276,7 @@ codeberg.org/gruf/go-sched # codeberg.org/gruf/go-split v1.2.0 ## explicit; go 1.20 codeberg.org/gruf/go-split -# codeberg.org/gruf/go-storage v0.2.1 +# codeberg.org/gruf/go-storage v0.3.1 ## explicit; go 1.23.0 codeberg.org/gruf/go-storage codeberg.org/gruf/go-storage/disk