mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-02 07:52:25 -06:00
[performance] media processing improvements (#1288)
* media processor consolidation and reformatting, reduce amount of required syscalls
Signed-off-by: kim <grufwub@gmail.com>
* update go-store library, stream jpeg/png encoding + use buffer pools, improved media processing AlreadyExists error handling
Signed-off-by: kim <grufwub@gmail.com>
* fix duration not being set, fix mp4 test expecting error
Signed-off-by: kim <grufwub@gmail.com>
* fix test expecting media files with different extension
Signed-off-by: kim <grufwub@gmail.com>
* remove unused code
Signed-off-by: kim <grufwub@gmail.com>
* fix expected storage paths in tests, update expected test thumbnails
Signed-off-by: kim <grufwub@gmail.com>
* remove dead code
Signed-off-by: kim <grufwub@gmail.com>
* fix cached presigned s3 url fetching
Signed-off-by: kim <grufwub@gmail.com>
* fix tests
Signed-off-by: kim <grufwub@gmail.com>
* fix test models
Signed-off-by: kim <grufwub@gmail.com>
* update media processing to use sync.Once{} for concurrency protection
Signed-off-by: kim <grufwub@gmail.com>
* shutup linter
Signed-off-by: kim <grufwub@gmail.com>
* fix passing in KVStore GetStream() as stream to PutStream()
Signed-off-by: kim <grufwub@gmail.com>
* fix unlocks of storage keys
Signed-off-by: kim <grufwub@gmail.com>
* whoops, return the error...
Signed-off-by: kim <grufwub@gmail.com>
* pour one out for tobi's code <3
Signed-off-by: kim <grufwub@gmail.com>
* add back the byte slurping code
Signed-off-by: kim <grufwub@gmail.com>
* check for both ErrUnexpectedEOF and EOF
Signed-off-by: kim <grufwub@gmail.com>
* add back links to file format header information
Signed-off-by: kim <grufwub@gmail.com>
Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
parent
3512325e46
commit
5318054808
64 changed files with 1279 additions and 1405 deletions
|
|
@ -24,84 +24,74 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"codeberg.org/gruf/go-bytesize"
|
||||
gostore "codeberg.org/gruf/go-store/v2/storage"
|
||||
"github.com/h2non/filetype"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/db"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/id"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/storage"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/uris"
|
||||
)
|
||||
|
||||
// ProcessingEmoji represents an emoji currently processing. It exposes
|
||||
// various functions for retrieving data from the process.
|
||||
type ProcessingEmoji struct {
|
||||
mu sync.Mutex
|
||||
|
||||
// id of this instance's account -- pinned for convenience here so we only need to fetch it once
|
||||
instanceAccountID string
|
||||
|
||||
/*
|
||||
below fields should be set on newly created media;
|
||||
emoji will be updated incrementally as media goes through processing
|
||||
*/
|
||||
|
||||
emoji *gtsmodel.Emoji
|
||||
data DataFunc
|
||||
postData PostDataCallbackFunc
|
||||
read bool // bool indicating that data function has been triggered already
|
||||
|
||||
/*
|
||||
below fields represent the processing state of the static of the emoji
|
||||
*/
|
||||
staticState int32
|
||||
|
||||
/*
|
||||
below pointers to database and storage are maintained so that
|
||||
the media can store and update itself during processing steps
|
||||
*/
|
||||
|
||||
database db.DB
|
||||
storage *storage.Driver
|
||||
|
||||
err error // error created during processing, if any
|
||||
|
||||
// track whether this emoji has already been put in the databse
|
||||
insertedInDB bool
|
||||
|
||||
// is this a refresh of an existing emoji?
|
||||
refresh bool
|
||||
// if it is a refresh, which alternate ID should we use in the storage and URL paths?
|
||||
newPathID string
|
||||
instAccID string // instance account ID
|
||||
emoji *gtsmodel.Emoji // processing emoji details
|
||||
refresh bool // whether this is an existing emoji being refreshed
|
||||
newPathID string // new emoji path ID to use if refreshed
|
||||
dataFn DataFunc // load-data function, returns media stream
|
||||
postFn PostDataCallbackFunc // post data callback function
|
||||
err error // error encountered during processing
|
||||
manager *manager // manager instance (access to db / storage)
|
||||
once sync.Once // once ensures processing only occurs once
|
||||
}
|
||||
|
||||
// EmojiID returns the ID of the underlying emoji without blocking processing.
|
||||
func (p *ProcessingEmoji) EmojiID() string {
|
||||
return p.emoji.ID
|
||||
return p.emoji.ID // immutable, safe outside mutex.
|
||||
}
|
||||
|
||||
// LoadEmoji blocks until the static and fullsize image
|
||||
// has been processed, and then returns the completed emoji.
|
||||
func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
// only process once.
|
||||
p.once.Do(func() {
|
||||
var err error
|
||||
|
||||
if err := p.store(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if err != nil {
|
||||
rOld := r // wrap the panic so we don't lose existing returned error
|
||||
r = fmt.Errorf("panic occured after error %q: %v", err.Error(), rOld)
|
||||
}
|
||||
|
||||
if err := p.loadStatic(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Catch any panics and wrap as error.
|
||||
err = fmt.Errorf("caught panic: %v", r)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// Store error.
|
||||
p.err = err
|
||||
}
|
||||
}()
|
||||
|
||||
// Attempt to store media and calculate
|
||||
// full-size media attachment details.
|
||||
if err = p.store(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Finish processing by reloading media into
|
||||
// memory to get dimension and generate a thumb.
|
||||
if err = p.finish(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// store the result in the database before returning it
|
||||
if !p.insertedInDB {
|
||||
if p.refresh {
|
||||
columns := []string{
|
||||
"updated_at",
|
||||
|
|
@ -118,176 +108,195 @@ func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error
|
|||
"shortcode",
|
||||
"uri",
|
||||
}
|
||||
if _, err := p.database.UpdateEmoji(ctx, p.emoji, columns...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
if err := p.database.PutEmoji(ctx, p.emoji); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Existing emoji we're refreshing, so only need to update.
|
||||
_, err = p.manager.db.UpdateEmoji(ctx, p.emoji, columns...)
|
||||
return
|
||||
}
|
||||
p.insertedInDB = true
|
||||
|
||||
// New emoji media, first time caching.
|
||||
err = p.manager.db.PutEmoji(ctx, p.emoji)
|
||||
return //nolint shutup linter i like this here
|
||||
})
|
||||
|
||||
if p.err != nil {
|
||||
return nil, p.err
|
||||
}
|
||||
|
||||
return p.emoji, nil
|
||||
}
|
||||
|
||||
// Finished returns true if processing has finished for both the thumbnail
|
||||
// and full fized version of this piece of media.
|
||||
func (p *ProcessingEmoji) Finished() bool {
|
||||
return atomic.LoadInt32(&p.staticState) == int32(complete)
|
||||
}
|
||||
|
||||
func (p *ProcessingEmoji) loadStatic(ctx context.Context) error {
|
||||
staticState := atomic.LoadInt32(&p.staticState)
|
||||
switch processState(staticState) {
|
||||
case received:
|
||||
// stream the original file out of storage...
|
||||
stored, err := p.storage.GetStream(ctx, p.emoji.ImagePath)
|
||||
if err != nil {
|
||||
p.err = fmt.Errorf("loadStatic: error fetching file from storage: %s", err)
|
||||
atomic.StoreInt32(&p.staticState, int32(errored))
|
||||
return p.err
|
||||
}
|
||||
defer stored.Close()
|
||||
|
||||
// we haven't processed a static version of this emoji yet so do it now
|
||||
static, err := deriveStaticEmoji(stored, p.emoji.ImageContentType)
|
||||
if err != nil {
|
||||
p.err = fmt.Errorf("loadStatic: error deriving static: %s", err)
|
||||
atomic.StoreInt32(&p.staticState, int32(errored))
|
||||
return p.err
|
||||
}
|
||||
|
||||
// Close stored emoji now we're done
|
||||
if err := stored.Close(); err != nil {
|
||||
log.Errorf("loadStatic: error closing stored full size: %s", err)
|
||||
}
|
||||
|
||||
// put the static image in storage
|
||||
if err := p.storage.Put(ctx, p.emoji.ImageStaticPath, static.small); err != nil && err != storage.ErrAlreadyExists {
|
||||
p.err = fmt.Errorf("loadStatic: error storing static: %s", err)
|
||||
atomic.StoreInt32(&p.staticState, int32(errored))
|
||||
return p.err
|
||||
}
|
||||
|
||||
p.emoji.ImageStaticFileSize = len(static.small)
|
||||
|
||||
// we're done processing the static version of the emoji!
|
||||
atomic.StoreInt32(&p.staticState, int32(complete))
|
||||
fallthrough
|
||||
case complete:
|
||||
return nil
|
||||
case errored:
|
||||
return p.err
|
||||
}
|
||||
|
||||
return fmt.Errorf("static processing status %d unknown", p.staticState)
|
||||
}
|
||||
|
||||
// store calls the data function attached to p if it hasn't been called yet,
|
||||
// and updates the underlying attachment fields as necessary. It will then stream
|
||||
// bytes from p's reader directly into storage so that it can be retrieved later.
|
||||
func (p *ProcessingEmoji) store(ctx context.Context) error {
|
||||
// check if we've already done this and bail early if we have
|
||||
if p.read {
|
||||
return nil
|
||||
}
|
||||
|
||||
// execute the data function to get the readcloser out of it
|
||||
rc, fileSize, err := p.data(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: error executing data function: %s", err)
|
||||
}
|
||||
|
||||
// defer closing the reader when we're done with it
|
||||
defer func() {
|
||||
if p.postFn == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Ensure post callback gets called.
|
||||
if err := p.postFn(ctx); err != nil {
|
||||
log.Errorf("error executing postdata function: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Load media from provided data fn.
|
||||
rc, sz, err := p.dataFn(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error executing data function: %w", err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
// Ensure data reader gets closed on return.
|
||||
if err := rc.Close(); err != nil {
|
||||
log.Errorf("store: error closing readcloser: %s", err)
|
||||
log.Errorf("error closing data reader: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// execute the postData function no matter what happens
|
||||
defer func() {
|
||||
if p.postData != nil {
|
||||
if err := p.postData(ctx); err != nil {
|
||||
log.Errorf("store: error executing postData: %s", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
// Byte buffer to read file header into.
|
||||
// See: https://en.wikipedia.org/wiki/File_format#File_header
|
||||
// and https://github.com/h2non/filetype
|
||||
hdrBuf := make([]byte, 261)
|
||||
|
||||
// extract no more than 261 bytes from the beginning of the file -- this is the header
|
||||
firstBytes := make([]byte, maxFileHeaderBytes)
|
||||
if _, err := rc.Read(firstBytes); err != nil {
|
||||
return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err)
|
||||
// Read the first 261 header bytes into buffer.
|
||||
if _, err := io.ReadFull(rc, hdrBuf); err != nil {
|
||||
return fmt.Errorf("error reading incoming media: %w", err)
|
||||
}
|
||||
|
||||
// now we have the file header we can work out the content type from it
|
||||
contentType, err := parseContentType(firstBytes)
|
||||
// Parse file type info from header buffer.
|
||||
info, err := filetype.Match(hdrBuf)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store: error parsing content type: %s", err)
|
||||
return fmt.Errorf("error parsing file type: %w", err)
|
||||
}
|
||||
|
||||
// bail if this is a type we can't process
|
||||
if !supportedEmoji(contentType) {
|
||||
return fmt.Errorf("store: content type %s was not valid for an emoji", contentType)
|
||||
switch info.Extension {
|
||||
// only supported emoji types
|
||||
case "gif", "png":
|
||||
|
||||
// unhandled
|
||||
default:
|
||||
return fmt.Errorf("unsupported emoji filetype: %s", info.Extension)
|
||||
}
|
||||
|
||||
// extract the file extension
|
||||
split := strings.Split(contentType, "/")
|
||||
extension := split[1] // something like 'gif'
|
||||
// Recombine header bytes with remaining stream
|
||||
r := io.MultiReader(bytes.NewReader(hdrBuf), rc)
|
||||
|
||||
var maxSize bytesize.Size
|
||||
|
||||
if p.emoji.Domain == "" {
|
||||
// this is a local emoji upload
|
||||
maxSize = config.GetMediaEmojiLocalMaxSize()
|
||||
} else {
|
||||
// this is a remote incoming emoji
|
||||
maxSize = config.GetMediaEmojiRemoteMaxSize()
|
||||
}
|
||||
|
||||
// Check that provided size isn't beyond max. We check beforehand
|
||||
// so that we don't attempt to stream the emoji into storage if not needed.
|
||||
if size := bytesize.Size(sz); sz > 0 && size > maxSize {
|
||||
return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize)
|
||||
}
|
||||
|
||||
// set some additional fields on the emoji now that
|
||||
// we know more about what the underlying image actually is
|
||||
var pathID string
|
||||
|
||||
if p.refresh {
|
||||
// This is a refreshed emoji with a new
|
||||
// path ID that this will be stored under.
|
||||
pathID = p.newPathID
|
||||
} else {
|
||||
// This is a new emoji, simply use provided ID.
|
||||
pathID = p.emoji.ID
|
||||
}
|
||||
p.emoji.ImageURL = uris.GenerateURIForAttachment(p.instanceAccountID, string(TypeEmoji), string(SizeOriginal), pathID, extension)
|
||||
p.emoji.ImagePath = fmt.Sprintf("%s/%s/%s/%s.%s", p.instanceAccountID, TypeEmoji, SizeOriginal, pathID, extension)
|
||||
p.emoji.ImageContentType = contentType
|
||||
|
||||
// concatenate the first bytes with the existing bytes still in the reader (thanks Mara)
|
||||
readerToStore := io.MultiReader(bytes.NewBuffer(firstBytes), rc)
|
||||
// Calculate emoji file path.
|
||||
p.emoji.ImagePath = fmt.Sprintf(
|
||||
"%s/%s/%s/%s.%s",
|
||||
p.instAccID,
|
||||
TypeEmoji,
|
||||
SizeOriginal,
|
||||
pathID,
|
||||
info.Extension,
|
||||
)
|
||||
|
||||
var maxEmojiSize int64
|
||||
if p.emoji.Domain == "" {
|
||||
maxEmojiSize = int64(config.GetMediaEmojiLocalMaxSize())
|
||||
} else {
|
||||
maxEmojiSize = int64(config.GetMediaEmojiRemoteMaxSize())
|
||||
}
|
||||
// This shouldn't already exist, but we do a check as it's worth logging.
|
||||
if have, _ := p.manager.storage.Has(ctx, p.emoji.ImagePath); have {
|
||||
log.Warnf("emoji already exists at storage path: %s", p.emoji.ImagePath)
|
||||
|
||||
// if we know the fileSize already, make sure it's not bigger than our limit
|
||||
var checkedSize bool
|
||||
if fileSize > 0 {
|
||||
checkedSize = true
|
||||
if fileSize > maxEmojiSize {
|
||||
return fmt.Errorf("store: given emoji fileSize (%db) is larger than allowed size (%db)", fileSize, maxEmojiSize)
|
||||
// Attempt to remove existing emoji at storage path (might be broken / out-of-date)
|
||||
if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil {
|
||||
return fmt.Errorf("error removing emoji from storage: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// store this for now -- other processes can pull it out of storage as they please
|
||||
if fileSize, err = putStream(ctx, p.storage, p.emoji.ImagePath, readerToStore, fileSize); err != nil {
|
||||
if !errors.Is(err, storage.ErrAlreadyExists) {
|
||||
return fmt.Errorf("store: error storing stream: %s", err)
|
||||
}
|
||||
log.Warnf("emoji %s already exists at storage path: %s", p.emoji.ID, p.emoji.ImagePath)
|
||||
// Write the final image reader stream to our storage.
|
||||
sz, err = p.manager.storage.PutStream(ctx, p.emoji.ImagePath, r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing emoji to storage: %w", err)
|
||||
}
|
||||
|
||||
// if we didn't know the fileSize yet, we do now, so check if we need to
|
||||
if !checkedSize && fileSize > maxEmojiSize {
|
||||
err = fmt.Errorf("store: discovered emoji fileSize (%db) is larger than allowed emojiRemoteMaxSize (%db), will delete from the store now", fileSize, maxEmojiSize)
|
||||
log.Warn(err)
|
||||
if deleteErr := p.storage.Delete(ctx, p.emoji.ImagePath); deleteErr != nil {
|
||||
log.Errorf("store: error removing too-large emoji from the store: %s", deleteErr)
|
||||
// Once again check size in case none was provided previously.
|
||||
if size := bytesize.Size(sz); size > maxSize {
|
||||
if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil {
|
||||
log.Errorf("error removing too-large-emoji from storage: %v", err)
|
||||
}
|
||||
return err
|
||||
return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize)
|
||||
}
|
||||
|
||||
p.emoji.ImageFileSize = int(fileSize)
|
||||
p.read = true
|
||||
// Fill in remaining attachment data now it's stored.
|
||||
p.emoji.ImageURL = uris.GenerateURIForAttachment(
|
||||
p.instAccID,
|
||||
string(TypeEmoji),
|
||||
string(SizeOriginal),
|
||||
pathID,
|
||||
info.Extension,
|
||||
)
|
||||
p.emoji.ImageContentType = info.MIME.Value
|
||||
p.emoji.ImageFileSize = int(sz)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *ProcessingEmoji) finish(ctx context.Context) error {
|
||||
// Fetch a stream to the original file in storage.
|
||||
rc, err := p.manager.storage.GetStream(ctx, p.emoji.ImagePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading file from storage: %w", err)
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
// Decode the image from storage.
|
||||
staticImg, err := decodeImage(rc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error decoding image: %w", err)
|
||||
}
|
||||
|
||||
// The image should be in-memory by now.
|
||||
if err := rc.Close(); err != nil {
|
||||
return fmt.Errorf("error closing file: %w", err)
|
||||
}
|
||||
|
||||
// This shouldn't already exist, but we do a check as it's worth logging.
|
||||
if have, _ := p.manager.storage.Has(ctx, p.emoji.ImageStaticPath); have {
|
||||
log.Warnf("static emoji already exists at storage path: %s", p.emoji.ImagePath)
|
||||
|
||||
// Attempt to remove static existing emoji at storage path (might be broken / out-of-date)
|
||||
if err := p.manager.storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil {
|
||||
return fmt.Errorf("error removing static emoji from storage: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create an emoji PNG encoder stream.
|
||||
enc := staticImg.ToPNG()
|
||||
|
||||
// Stream-encode the PNG static image into storage.
|
||||
sz, err := p.manager.storage.PutStream(ctx, p.emoji.ImageStaticPath, enc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error stream-encoding static emoji to storage: %w", err)
|
||||
}
|
||||
|
||||
// Set written image size.
|
||||
p.emoji.ImageStaticFileSize = int(sz)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -406,15 +415,13 @@ func (m *manager) preProcessEmoji(ctx context.Context, data DataFunc, postData P
|
|||
}
|
||||
|
||||
processingEmoji := &ProcessingEmoji{
|
||||
instanceAccountID: instanceAccount.ID,
|
||||
emoji: emoji,
|
||||
data: data,
|
||||
postData: postData,
|
||||
staticState: int32(received),
|
||||
database: m.db,
|
||||
storage: m.storage,
|
||||
refresh: refresh,
|
||||
newPathID: newPathID,
|
||||
instAccID: instanceAccount.ID,
|
||||
emoji: emoji,
|
||||
refresh: refresh,
|
||||
newPathID: newPathID,
|
||||
dataFn: data,
|
||||
postFn: postData,
|
||||
manager: m,
|
||||
}
|
||||
|
||||
return processingEmoji, nil
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue