[performance] media processing improvements (#1288)

* media processor consolidation and reformatting, reduce amount of required syscalls

Signed-off-by: kim <grufwub@gmail.com>

* update go-store library, stream jpeg/png encoding + use buffer pools, improved media processing AlreadyExists error handling

Signed-off-by: kim <grufwub@gmail.com>

* fix duration not being set, fix mp4 test expecting error

Signed-off-by: kim <grufwub@gmail.com>

* fix test expecting media files with different extension

Signed-off-by: kim <grufwub@gmail.com>

* remove unused code

Signed-off-by: kim <grufwub@gmail.com>

* fix expected storage paths in tests, update expected test thumbnails

Signed-off-by: kim <grufwub@gmail.com>

* remove dead code

Signed-off-by: kim <grufwub@gmail.com>

* fix cached presigned s3 url fetching

Signed-off-by: kim <grufwub@gmail.com>

* fix tests

Signed-off-by: kim <grufwub@gmail.com>

* fix test models

Signed-off-by: kim <grufwub@gmail.com>

* update media processing to use sync.Once{} for concurrency protection

Signed-off-by: kim <grufwub@gmail.com>

* shutup linter

Signed-off-by: kim <grufwub@gmail.com>

* fix passing in KVStore GetStream() as stream to PutStream()

Signed-off-by: kim <grufwub@gmail.com>

* fix unlocks of storage keys

Signed-off-by: kim <grufwub@gmail.com>

* whoops, return the error...

Signed-off-by: kim <grufwub@gmail.com>

* pour one out for tobi's code <3

Signed-off-by: kim <grufwub@gmail.com>

* add back the byte slurping code

Signed-off-by: kim <grufwub@gmail.com>

* check for both ErrUnexpectedEOF and EOF

Signed-off-by: kim <grufwub@gmail.com>

* add back links to file format header information

Signed-off-by: kim <grufwub@gmail.com>

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2023-01-11 11:13:13 +00:00 committed by GitHub
commit 5318054808
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
64 changed files with 1279 additions and 1405 deletions

View file

@ -19,182 +19,167 @@
package media
import (
"bytes"
"errors"
"fmt"
"bufio"
"image"
"image/gif"
"image/color"
"image/draw"
"image/jpeg"
"image/png"
"io"
"sync"
"github.com/buckket/go-blurhash"
"github.com/disintegration/imaging"
_ "golang.org/x/image/webp" // blank import to support WebP decoding
"github.com/superseriousbusiness/gotosocial/internal/iotools"
// import to init webp encode/decoding.
_ "golang.org/x/image/webp"
)
const (
thumbnailMaxWidth = 512
thumbnailMaxHeight = 512
var (
// pngEncoder provides our global PNG encoding with
// specified compression level, and memory pooled buffers.
pngEncoder = png.Encoder{
CompressionLevel: png.DefaultCompression,
BufferPool: &pngEncoderBufferPool{},
}
// jpegBufferPool is a memory pool of byte buffers for JPEG encoding.
jpegBufferPool = sync.Pool{
New: func() any {
return bufio.NewWriter(nil)
},
}
)
func decodeGif(r io.Reader) (*mediaMeta, error) {
gif, err := gif.DecodeAll(r)
// gtsImage is a thin wrapper around the standard library image
// interface to provide our own useful helper functions for image
// size and aspect ratio calculations, streamed encoding to various
// types, and creating reduced size thumbnail images.
type gtsImage struct{ image image.Image }
// blankImage generates a blank image of given dimensions.
func blankImage(width int, height int) *gtsImage {
// create a rectangle with the same dimensions as the video
img := image.NewRGBA(image.Rect(0, 0, width, height))
// fill the rectangle with our desired fill color.
draw.Draw(img, img.Bounds(), &image.Uniform{
color.RGBA{42, 43, 47, 0},
}, image.Point{}, draw.Src)
return &gtsImage{image: img}
}
// decodeImage will decode image from reader stream and return image wrapped in our own gtsImage{} type.
func decodeImage(r io.Reader, opts ...imaging.DecodeOption) (*gtsImage, error) {
img, err := imaging.Decode(r, opts...)
if err != nil {
return nil, err
}
// use the first frame to get the static characteristics
width := gif.Config.Width
height := gif.Config.Height
size := width * height
aspect := float32(width) / float32(height)
return &mediaMeta{
width: width,
height: height,
size: size,
aspect: aspect,
}, nil
return &gtsImage{image: img}, nil
}
func decodeImage(r io.Reader, contentType string) (*mediaMeta, error) {
var i image.Image
var err error
switch contentType {
case mimeImageJpeg, mimeImageWebp:
i, err = imaging.Decode(r, imaging.AutoOrientation(true))
case mimeImagePng:
strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{
Reader: r,
})
i, err = imaging.Decode(strippedPngReader, imaging.AutoOrientation(true))
default:
err = fmt.Errorf("content type %s not recognised", contentType)
}
if err != nil {
return nil, err
}
if i == nil {
return nil, errors.New("processed image was nil")
}
width := i.Bounds().Size().X
height := i.Bounds().Size().Y
size := width * height
aspect := float32(width) / float32(height)
return &mediaMeta{
width: width,
height: height,
size: size,
aspect: aspect,
}, nil
// Width returns the image width in pixels.
func (m *gtsImage) Width() uint32 {
return uint32(m.image.Bounds().Size().X)
}
// deriveStaticEmojji takes a given gif or png of an emoji, decodes it, and re-encodes it as a static png.
func deriveStaticEmoji(r io.Reader, contentType string) (*mediaMeta, error) {
var i image.Image
var err error
switch contentType {
case mimeImagePng:
i, err = StrippedPngDecode(r)
if err != nil {
return nil, err
}
case mimeImageGif:
i, err = gif.Decode(r)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("content type %s not allowed for emoji", contentType)
}
out := &bytes.Buffer{}
if err := png.Encode(out, i); err != nil {
return nil, err
}
return &mediaMeta{
small: out.Bytes(),
}, nil
// Height returns the image height in pixels.
func (m *gtsImage) Height() uint32 {
return uint32(m.image.Bounds().Size().Y)
}
// deriveThumbnailFromImage returns a byte slice and metadata for a thumbnail
// of a given piece of media, or an error if something goes wrong.
//
// If createBlurhash is true, then a blurhash will also be generated from a tiny
// version of the image. This costs precious CPU cycles, so only use it if you
// really need a blurhash and don't have one already.
//
// If createBlurhash is false, then the blurhash field on the returned ImageAndMeta
// will be an empty string.
func deriveThumbnailFromImage(r io.Reader, contentType string, createBlurhash bool) (*mediaMeta, error) {
var i image.Image
var err error
switch contentType {
case mimeImageJpeg, mimeImageGif, mimeImageWebp:
i, err = imaging.Decode(r, imaging.AutoOrientation(true))
case mimeImagePng:
strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{
Reader: r,
})
i, err = imaging.Decode(strippedPngReader, imaging.AutoOrientation(true))
default:
err = fmt.Errorf("content type %s can't be thumbnailed as an image", contentType)
}
if err != nil {
return nil, fmt.Errorf("error decoding %s: %s", contentType, err)
}
originalX := i.Bounds().Size().X
originalY := i.Bounds().Size().Y
var thumb image.Image
if originalX <= thumbnailMaxWidth && originalY <= thumbnailMaxHeight {
// it's already small, no need to resize
thumb = i
} else {
thumb = imaging.Fit(i, thumbnailMaxWidth, thumbnailMaxHeight, imaging.Linear)
}
thumbX := thumb.Bounds().Size().X
thumbY := thumb.Bounds().Size().Y
size := thumbX * thumbY
aspect := float32(thumbX) / float32(thumbY)
im := &mediaMeta{
width: thumbX,
height: thumbY,
size: size,
aspect: aspect,
}
if createBlurhash {
// for generating blurhashes, it's more cost effective to lose detail rather than
// pass a big image into the blurhash algorithm, so make a teeny tiny version
tiny := imaging.Resize(thumb, 32, 0, imaging.NearestNeighbor)
bh, err := blurhash.Encode(4, 3, tiny)
if err != nil {
return nil, fmt.Errorf("error creating blurhash: %s", err)
}
im.blurhash = bh
}
out := &bytes.Buffer{}
if err := jpeg.Encode(out, thumb, &jpeg.Options{
// Quality isn't extremely important for thumbnails, so 75 is "good enough"
Quality: 75,
}); err != nil {
return nil, fmt.Errorf("error encoding thumbnail: %s", err)
}
im.small = out.Bytes()
return im, nil
// Size returns the total number of image pixels.
func (m *gtsImage) Size() uint64 {
return uint64(m.image.Bounds().Size().X) *
uint64(m.image.Bounds().Size().Y)
}
// AspectRatio returns the image ratio of width:height.
func (m *gtsImage) AspectRatio() float32 {
return float32(m.image.Bounds().Size().X) /
float32(m.image.Bounds().Size().Y)
}
// Thumbnail returns a small sized copy of gtsImage{}, limited to 512x512 if not small enough.
func (m *gtsImage) Thumbnail() *gtsImage {
const (
// max thumb
// dimensions.
maxWidth = 512
maxHeight = 512
)
// Check the receiving image is within max thumnail bounds.
if m.Width() <= maxWidth && m.Height() <= maxHeight {
return &gtsImage{image: imaging.Clone(m.image)}
}
// Image is too large, needs to be resized to thumbnail max.
img := imaging.Fit(m.image, maxWidth, maxHeight, imaging.Linear)
return &gtsImage{image: img}
}
// Blurhash calculates the blurhash for the receiving image data.
func (m *gtsImage) Blurhash() (string, error) {
// for generating blurhashes, it's more cost effective to
// lose detail since it's blurry, so make a tiny version.
tiny := imaging.Resize(m.image, 32, 0, imaging.NearestNeighbor)
// Encode blurhash from resized version
return blurhash.Encode(4, 3, tiny)
}
// ToJPEG creates a new streaming JPEG encoder from receiving image, and a size ptr
// which stores the number of bytes written during the image encoding process.
func (m *gtsImage) ToJPEG(opts *jpeg.Options) io.Reader {
return iotools.StreamWriteFunc(func(w io.Writer) error {
// Get encoding buffer
bw := getJPEGBuffer(w)
// Encode JPEG to buffered writer.
err := jpeg.Encode(bw, m.image, opts)
// Replace buffer.
//
// NOTE: jpeg.Encode() already
// performs a bufio.Writer.Flush().
putJPEGBuffer(bw)
return err
})
}
// ToPNG creates a new streaming PNG encoder from receiving image, and a size ptr
// which stores the number of bytes written during the image encoding process.
func (m *gtsImage) ToPNG() io.Reader {
return iotools.StreamWriteFunc(func(w io.Writer) error {
return pngEncoder.Encode(w, m.image)
})
}
// getJPEGBuffer fetches a reset JPEG encoding buffer from global JPEG buffer pool.
func getJPEGBuffer(w io.Writer) *bufio.Writer {
buf, _ := jpegBufferPool.Get().(*bufio.Writer)
buf.Reset(w)
return buf
}
// putJPEGBuffer resets the given bufio writer and places in global JPEG buffer pool.
func putJPEGBuffer(buf *bufio.Writer) {
buf.Reset(nil)
jpegBufferPool.Put(buf)
}
// pngEncoderBufferPool implements png.EncoderBufferPool.
type pngEncoderBufferPool sync.Pool
func (p *pngEncoderBufferPool) Get() *png.EncoderBuffer {
buf, _ := (*sync.Pool)(p).Get().(*png.EncoderBuffer)
return buf
}
func (p *pngEncoderBufferPool) Put(buf *png.EncoderBuffer) {
(*sync.Pool)(p).Put(buf)
}

View file

@ -148,9 +148,6 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
// Prepare the media worker pool
m.mediaWorker = concurrency.NewWorkerPool[*ProcessingMedia](-1, 10)
m.mediaWorker.SetProcessor(func(ctx context.Context, media *ProcessingMedia) error {
if err := ctx.Err(); err != nil {
return err
}
if _, err := media.LoadAttachment(ctx); err != nil {
return fmt.Errorf("error loading media %s: %v", media.AttachmentID(), err)
}
@ -160,9 +157,6 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) {
// Prepare the emoji worker pool
m.emojiWorker = concurrency.NewWorkerPool[*ProcessingEmoji](-1, 10)
m.emojiWorker.SetProcessor(func(ctx context.Context, emoji *ProcessingEmoji) error {
if err := ctx.Err(); err != nil {
return err
}
if _, err := emoji.LoadEmoji(ctx); err != nil {
return fmt.Errorf("error loading emoji %s: %v", emoji.EmojiID(), err)
}

View file

@ -26,6 +26,7 @@ import (
"os"
"path"
"testing"
"time"
"codeberg.org/gruf/go-store/v2/kv"
"codeberg.org/gruf/go-store/v2/storage"
@ -33,7 +34,6 @@ import (
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/testrig"
)
type ManagerTestSuite struct {
@ -214,7 +214,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() {
// do a blocking call to fetch the emoji
emoji, err := processingEmoji.LoadEmoji(ctx)
suite.EqualError(err, "store: given emoji fileSize (645688b) is larger than allowed size (51200b)")
suite.EqualError(err, "given emoji size 630kiB greater than max allowed 50.0kiB")
suite.Nil(emoji)
}
@ -227,7 +227,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() {
if err != nil {
panic(err)
}
return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil
return io.NopCloser(bytes.NewBuffer(b)), -1, nil
}
emojiID := "01GDQ9G782X42BAMFASKP64343"
@ -238,7 +238,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() {
// do a blocking call to fetch the emoji
emoji, err := processingEmoji.LoadEmoji(ctx)
suite.EqualError(err, "store: given emoji fileSize (645688b) is larger than allowed size (51200b)")
suite.EqualError(err, "calculated emoji size 630kiB greater than max allowed 50.0kiB")
suite.Nil(emoji)
}
@ -396,6 +396,9 @@ func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() {
// fetch the attachment id from the processing media
attachmentID := processingMedia.AttachmentID()
// Give time for processing
time.Sleep(time.Second * 3)
// do a blocking call to fetch the attachment
attachment, err := processingMedia.LoadAttachment(ctx)
suite.NoError(err)
@ -420,7 +423,7 @@ func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() {
suite.Equal("video/mp4", attachment.File.ContentType)
suite.Equal("image/jpeg", attachment.Thumbnail.ContentType)
suite.Equal(312413, attachment.File.FileSize)
suite.Equal("", attachment.Blurhash)
suite.Equal("L00000fQfQfQfQfQfQfQfQfQfQfQ", attachment.Blurhash)
// now make sure the attachment is in the database
dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID)
@ -491,12 +494,12 @@ func (suite *ManagerTestSuite) TestLongerMp4ProcessBlocking() {
suite.EqualValues(10, *attachment.FileMeta.Original.Framerate)
suite.EqualValues(0xc8fb, *attachment.FileMeta.Original.Bitrate)
suite.EqualValues(gtsmodel.Small{
Width: 600, Height: 330, Size: 198000, Aspect: 1.8181819,
Width: 512, Height: 281, Size: 143872, Aspect: 1.822064,
}, attachment.FileMeta.Small)
suite.Equal("video/mp4", attachment.File.ContentType)
suite.Equal("image/jpeg", attachment.Thumbnail.ContentType)
suite.Equal(109549, attachment.File.FileSize)
suite.Equal("", attachment.Blurhash)
suite.Equal("L00000fQfQfQfQfQfQfQfQfQfQfQ", attachment.Blurhash)
// now make sure the attachment is in the database
dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID)
@ -550,7 +553,7 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() {
// we should get an error while loading
attachment, err := processingMedia.LoadAttachment(ctx)
suite.EqualError(err, "\"video width could not be discovered\",\"video height could not be discovered\",\"video duration could not be discovered\",\"video framerate could not be discovered\",\"video bitrate could not be discovered\"")
suite.EqualError(err, "error decoding video: error determining video metadata: [width height duration framerate bitrate]")
suite.Nil(attachment)
}
@ -928,7 +931,8 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithCallback() {
}
func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() {
ctx := context.Background()
ctx, cncl := context.WithTimeout(context.Background(), time.Second*30)
defer cncl()
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
@ -944,15 +948,12 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() {
// process the media with no additional info provided
processingMedia, err := suite.manager.ProcessMedia(ctx, data, nil, accountID, nil)
suite.NoError(err)
// fetch the attachment id from the processing media
attachmentID := processingMedia.AttachmentID()
// wait for the media to finish processing
if !testrig.WaitFor(func() bool {
return processingMedia.Finished()
}) {
suite.FailNow("timed out waiting for media to be processed")
}
// Give time for processing to happen.
time.Sleep(time.Second * 3)
// fetch the attachment from the database
attachment, err := suite.db.GetAttachmentByID(ctx, attachmentID)

View file

@ -75,8 +75,6 @@ package media
import (
"encoding/binary"
"image"
"image/png"
"io"
)
@ -192,13 +190,3 @@ func (r *PNGAncillaryChunkStripper) Read(p []byte) (int, error) {
}
}
}
// StrippedPngDecode strips ancillary data from png to allow more lenient decoding of pngs
// see: https://github.com/golang/go/issues/43382
// and: https://github.com/google/wuffs/blob/414a011491ff513b86d8694c5d71800f3cb5a715/script/strip-png-ancillary-chunks.go
func StrippedPngDecode(r io.Reader) (image.Image, error) {
strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{
Reader: r,
})
return png.Decode(strippedPngReader)
}

View file

@ -24,84 +24,74 @@ import (
"errors"
"fmt"
"io"
"strings"
"sync"
"sync/atomic"
"time"
"codeberg.org/gruf/go-bytesize"
gostore "codeberg.org/gruf/go-store/v2/storage"
"github.com/h2non/filetype"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// ProcessingEmoji represents an emoji currently processing. It exposes
// various functions for retrieving data from the process.
type ProcessingEmoji struct {
mu sync.Mutex
// id of this instance's account -- pinned for convenience here so we only need to fetch it once
instanceAccountID string
/*
below fields should be set on newly created media;
emoji will be updated incrementally as media goes through processing
*/
emoji *gtsmodel.Emoji
data DataFunc
postData PostDataCallbackFunc
read bool // bool indicating that data function has been triggered already
/*
below fields represent the processing state of the static of the emoji
*/
staticState int32
/*
below pointers to database and storage are maintained so that
the media can store and update itself during processing steps
*/
database db.DB
storage *storage.Driver
err error // error created during processing, if any
// track whether this emoji has already been put in the databse
insertedInDB bool
// is this a refresh of an existing emoji?
refresh bool
// if it is a refresh, which alternate ID should we use in the storage and URL paths?
newPathID string
instAccID string // instance account ID
emoji *gtsmodel.Emoji // processing emoji details
refresh bool // whether this is an existing emoji being refreshed
newPathID string // new emoji path ID to use if refreshed
dataFn DataFunc // load-data function, returns media stream
postFn PostDataCallbackFunc // post data callback function
err error // error encountered during processing
manager *manager // manager instance (access to db / storage)
once sync.Once // once ensures processing only occurs once
}
// EmojiID returns the ID of the underlying emoji without blocking processing.
func (p *ProcessingEmoji) EmojiID() string {
return p.emoji.ID
return p.emoji.ID // immutable, safe outside mutex.
}
// LoadEmoji blocks until the static and fullsize image
// has been processed, and then returns the completed emoji.
func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error) {
p.mu.Lock()
defer p.mu.Unlock()
// only process once.
p.once.Do(func() {
var err error
if err := p.store(ctx); err != nil {
return nil, err
}
defer func() {
if r := recover(); r != nil {
if err != nil {
rOld := r // wrap the panic so we don't lose existing returned error
r = fmt.Errorf("panic occured after error %q: %v", err.Error(), rOld)
}
if err := p.loadStatic(ctx); err != nil {
return nil, err
}
// Catch any panics and wrap as error.
err = fmt.Errorf("caught panic: %v", r)
}
if err != nil {
// Store error.
p.err = err
}
}()
// Attempt to store media and calculate
// full-size media attachment details.
if err = p.store(ctx); err != nil {
return
}
// Finish processing by reloading media into
// memory to get dimension and generate a thumb.
if err = p.finish(ctx); err != nil {
return
}
// store the result in the database before returning it
if !p.insertedInDB {
if p.refresh {
columns := []string{
"updated_at",
@ -118,176 +108,195 @@ func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error
"shortcode",
"uri",
}
if _, err := p.database.UpdateEmoji(ctx, p.emoji, columns...); err != nil {
return nil, err
}
} else {
if err := p.database.PutEmoji(ctx, p.emoji); err != nil {
return nil, err
}
// Existing emoji we're refreshing, so only need to update.
_, err = p.manager.db.UpdateEmoji(ctx, p.emoji, columns...)
return
}
p.insertedInDB = true
// New emoji media, first time caching.
err = p.manager.db.PutEmoji(ctx, p.emoji)
return //nolint shutup linter i like this here
})
if p.err != nil {
return nil, p.err
}
return p.emoji, nil
}
// Finished returns true if processing has finished for both the thumbnail
// and full fized version of this piece of media.
func (p *ProcessingEmoji) Finished() bool {
return atomic.LoadInt32(&p.staticState) == int32(complete)
}
func (p *ProcessingEmoji) loadStatic(ctx context.Context) error {
staticState := atomic.LoadInt32(&p.staticState)
switch processState(staticState) {
case received:
// stream the original file out of storage...
stored, err := p.storage.GetStream(ctx, p.emoji.ImagePath)
if err != nil {
p.err = fmt.Errorf("loadStatic: error fetching file from storage: %s", err)
atomic.StoreInt32(&p.staticState, int32(errored))
return p.err
}
defer stored.Close()
// we haven't processed a static version of this emoji yet so do it now
static, err := deriveStaticEmoji(stored, p.emoji.ImageContentType)
if err != nil {
p.err = fmt.Errorf("loadStatic: error deriving static: %s", err)
atomic.StoreInt32(&p.staticState, int32(errored))
return p.err
}
// Close stored emoji now we're done
if err := stored.Close(); err != nil {
log.Errorf("loadStatic: error closing stored full size: %s", err)
}
// put the static image in storage
if err := p.storage.Put(ctx, p.emoji.ImageStaticPath, static.small); err != nil && err != storage.ErrAlreadyExists {
p.err = fmt.Errorf("loadStatic: error storing static: %s", err)
atomic.StoreInt32(&p.staticState, int32(errored))
return p.err
}
p.emoji.ImageStaticFileSize = len(static.small)
// we're done processing the static version of the emoji!
atomic.StoreInt32(&p.staticState, int32(complete))
fallthrough
case complete:
return nil
case errored:
return p.err
}
return fmt.Errorf("static processing status %d unknown", p.staticState)
}
// store calls the data function attached to p if it hasn't been called yet,
// and updates the underlying attachment fields as necessary. It will then stream
// bytes from p's reader directly into storage so that it can be retrieved later.
func (p *ProcessingEmoji) store(ctx context.Context) error {
// check if we've already done this and bail early if we have
if p.read {
return nil
}
// execute the data function to get the readcloser out of it
rc, fileSize, err := p.data(ctx)
if err != nil {
return fmt.Errorf("store: error executing data function: %s", err)
}
// defer closing the reader when we're done with it
defer func() {
if p.postFn == nil {
return
}
// Ensure post callback gets called.
if err := p.postFn(ctx); err != nil {
log.Errorf("error executing postdata function: %v", err)
}
}()
// Load media from provided data fn.
rc, sz, err := p.dataFn(ctx)
if err != nil {
return fmt.Errorf("error executing data function: %w", err)
}
defer func() {
// Ensure data reader gets closed on return.
if err := rc.Close(); err != nil {
log.Errorf("store: error closing readcloser: %s", err)
log.Errorf("error closing data reader: %v", err)
}
}()
// execute the postData function no matter what happens
defer func() {
if p.postData != nil {
if err := p.postData(ctx); err != nil {
log.Errorf("store: error executing postData: %s", err)
}
}
}()
// Byte buffer to read file header into.
// See: https://en.wikipedia.org/wiki/File_format#File_header
// and https://github.com/h2non/filetype
hdrBuf := make([]byte, 261)
// extract no more than 261 bytes from the beginning of the file -- this is the header
firstBytes := make([]byte, maxFileHeaderBytes)
if _, err := rc.Read(firstBytes); err != nil {
return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err)
// Read the first 261 header bytes into buffer.
if _, err := io.ReadFull(rc, hdrBuf); err != nil {
return fmt.Errorf("error reading incoming media: %w", err)
}
// now we have the file header we can work out the content type from it
contentType, err := parseContentType(firstBytes)
// Parse file type info from header buffer.
info, err := filetype.Match(hdrBuf)
if err != nil {
return fmt.Errorf("store: error parsing content type: %s", err)
return fmt.Errorf("error parsing file type: %w", err)
}
// bail if this is a type we can't process
if !supportedEmoji(contentType) {
return fmt.Errorf("store: content type %s was not valid for an emoji", contentType)
switch info.Extension {
// only supported emoji types
case "gif", "png":
// unhandled
default:
return fmt.Errorf("unsupported emoji filetype: %s", info.Extension)
}
// extract the file extension
split := strings.Split(contentType, "/")
extension := split[1] // something like 'gif'
// Recombine header bytes with remaining stream
r := io.MultiReader(bytes.NewReader(hdrBuf), rc)
var maxSize bytesize.Size
if p.emoji.Domain == "" {
// this is a local emoji upload
maxSize = config.GetMediaEmojiLocalMaxSize()
} else {
// this is a remote incoming emoji
maxSize = config.GetMediaEmojiRemoteMaxSize()
}
// Check that provided size isn't beyond max. We check beforehand
// so that we don't attempt to stream the emoji into storage if not needed.
if size := bytesize.Size(sz); sz > 0 && size > maxSize {
return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize)
}
// set some additional fields on the emoji now that
// we know more about what the underlying image actually is
var pathID string
if p.refresh {
// This is a refreshed emoji with a new
// path ID that this will be stored under.
pathID = p.newPathID
} else {
// This is a new emoji, simply use provided ID.
pathID = p.emoji.ID
}
p.emoji.ImageURL = uris.GenerateURIForAttachment(p.instanceAccountID, string(TypeEmoji), string(SizeOriginal), pathID, extension)
p.emoji.ImagePath = fmt.Sprintf("%s/%s/%s/%s.%s", p.instanceAccountID, TypeEmoji, SizeOriginal, pathID, extension)
p.emoji.ImageContentType = contentType
// concatenate the first bytes with the existing bytes still in the reader (thanks Mara)
readerToStore := io.MultiReader(bytes.NewBuffer(firstBytes), rc)
// Calculate emoji file path.
p.emoji.ImagePath = fmt.Sprintf(
"%s/%s/%s/%s.%s",
p.instAccID,
TypeEmoji,
SizeOriginal,
pathID,
info.Extension,
)
var maxEmojiSize int64
if p.emoji.Domain == "" {
maxEmojiSize = int64(config.GetMediaEmojiLocalMaxSize())
} else {
maxEmojiSize = int64(config.GetMediaEmojiRemoteMaxSize())
}
// This shouldn't already exist, but we do a check as it's worth logging.
if have, _ := p.manager.storage.Has(ctx, p.emoji.ImagePath); have {
log.Warnf("emoji already exists at storage path: %s", p.emoji.ImagePath)
// if we know the fileSize already, make sure it's not bigger than our limit
var checkedSize bool
if fileSize > 0 {
checkedSize = true
if fileSize > maxEmojiSize {
return fmt.Errorf("store: given emoji fileSize (%db) is larger than allowed size (%db)", fileSize, maxEmojiSize)
// Attempt to remove existing emoji at storage path (might be broken / out-of-date)
if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil {
return fmt.Errorf("error removing emoji from storage: %v", err)
}
}
// store this for now -- other processes can pull it out of storage as they please
if fileSize, err = putStream(ctx, p.storage, p.emoji.ImagePath, readerToStore, fileSize); err != nil {
if !errors.Is(err, storage.ErrAlreadyExists) {
return fmt.Errorf("store: error storing stream: %s", err)
}
log.Warnf("emoji %s already exists at storage path: %s", p.emoji.ID, p.emoji.ImagePath)
// Write the final image reader stream to our storage.
sz, err = p.manager.storage.PutStream(ctx, p.emoji.ImagePath, r)
if err != nil {
return fmt.Errorf("error writing emoji to storage: %w", err)
}
// if we didn't know the fileSize yet, we do now, so check if we need to
if !checkedSize && fileSize > maxEmojiSize {
err = fmt.Errorf("store: discovered emoji fileSize (%db) is larger than allowed emojiRemoteMaxSize (%db), will delete from the store now", fileSize, maxEmojiSize)
log.Warn(err)
if deleteErr := p.storage.Delete(ctx, p.emoji.ImagePath); deleteErr != nil {
log.Errorf("store: error removing too-large emoji from the store: %s", deleteErr)
// Once again check size in case none was provided previously.
if size := bytesize.Size(sz); size > maxSize {
if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil {
log.Errorf("error removing too-large-emoji from storage: %v", err)
}
return err
return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize)
}
p.emoji.ImageFileSize = int(fileSize)
p.read = true
// Fill in remaining attachment data now it's stored.
p.emoji.ImageURL = uris.GenerateURIForAttachment(
p.instAccID,
string(TypeEmoji),
string(SizeOriginal),
pathID,
info.Extension,
)
p.emoji.ImageContentType = info.MIME.Value
p.emoji.ImageFileSize = int(sz)
return nil
}
func (p *ProcessingEmoji) finish(ctx context.Context) error {
// Fetch a stream to the original file in storage.
rc, err := p.manager.storage.GetStream(ctx, p.emoji.ImagePath)
if err != nil {
return fmt.Errorf("error loading file from storage: %w", err)
}
defer rc.Close()
// Decode the image from storage.
staticImg, err := decodeImage(rc)
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
}
// The image should be in-memory by now.
if err := rc.Close(); err != nil {
return fmt.Errorf("error closing file: %w", err)
}
// This shouldn't already exist, but we do a check as it's worth logging.
if have, _ := p.manager.storage.Has(ctx, p.emoji.ImageStaticPath); have {
log.Warnf("static emoji already exists at storage path: %s", p.emoji.ImagePath)
// Attempt to remove static existing emoji at storage path (might be broken / out-of-date)
if err := p.manager.storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil {
return fmt.Errorf("error removing static emoji from storage: %v", err)
}
}
// Create an emoji PNG encoder stream.
enc := staticImg.ToPNG()
// Stream-encode the PNG static image into storage.
sz, err := p.manager.storage.PutStream(ctx, p.emoji.ImageStaticPath, enc)
if err != nil {
return fmt.Errorf("error stream-encoding static emoji to storage: %w", err)
}
// Set written image size.
p.emoji.ImageStaticFileSize = int(sz)
return nil
}
@ -406,15 +415,13 @@ func (m *manager) preProcessEmoji(ctx context.Context, data DataFunc, postData P
}
processingEmoji := &ProcessingEmoji{
instanceAccountID: instanceAccount.ID,
emoji: emoji,
data: data,
postData: postData,
staticState: int32(received),
database: m.db,
storage: m.storage,
refresh: refresh,
newPathID: newPathID,
instAccID: instanceAccount.ID,
emoji: emoji,
refresh: refresh,
newPathID: newPathID,
dataFn: data,
postFn: postData,
manager: m,
}
return processingEmoji, nil

View file

@ -21,387 +21,329 @@ package media
import (
"bytes"
"context"
"errors"
"fmt"
"image/jpeg"
"io"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/disintegration/imaging"
"github.com/h2non/filetype"
terminator "github.com/superseriousbusiness/exif-terminator"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// ProcessingMedia represents a piece of media that is currently being processed. It exposes
// various functions for retrieving data from the process.
type ProcessingMedia struct {
mu sync.Mutex
/*
below fields should be set on newly created media;
attachment will be updated incrementally as media goes through processing
*/
attachment *gtsmodel.MediaAttachment
data DataFunc
postData PostDataCallbackFunc
read bool // bool indicating that data function has been triggered already
thumbState int32 // the processing state of the media thumbnail
fullSizeState int32 // the processing state of the full-sized media
/*
below pointers to database and storage are maintained so that
the media can store and update itself during processing steps
*/
database db.DB
storage *storage.Driver
err error // error created during processing, if any
// track whether this media has already been put in the databse
insertedInDB bool
// true if this is a recache, false if it's brand new media
recache bool
media *gtsmodel.MediaAttachment // processing media attachment details
recache bool // recaching existing (uncached) media
dataFn DataFunc // load-data function, returns media stream
postFn PostDataCallbackFunc // post data callback function
err error // error encountered during processing
manager *manager // manager instance (access to db / storage)
once sync.Once // once ensures processing only occurs once
}
// AttachmentID returns the ID of the underlying media attachment without blocking processing.
func (p *ProcessingMedia) AttachmentID() string {
return p.attachment.ID
return p.media.ID // immutable, safe outside mutex.
}
// LoadAttachment blocks until the thumbnail and fullsize content
// has been processed, and then returns the completed attachment.
func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) {
p.mu.Lock()
defer p.mu.Unlock()
if err := p.store(ctx); err != nil {
return nil, err
}
if err := p.loadFullSize(ctx); err != nil {
return nil, err
}
if err := p.loadThumb(ctx); err != nil {
return nil, err
}
if !p.insertedInDB {
if p.recache {
// This is an existing media attachment we're recaching, so only need to update it
if err := p.database.UpdateByID(ctx, p.attachment, p.attachment.ID); err != nil {
return nil, err
}
} else {
// This is a new media attachment we're caching for first time
if err := p.database.Put(ctx, p.attachment); err != nil {
return nil, err
}
}
// Mark this as stored in DB
p.insertedInDB = true
}
log.Tracef("finished loading attachment %s", p.attachment.URL)
return p.attachment, nil
}
// Finished returns true if processing has finished for both the thumbnail
// and full fized version of this piece of media.
func (p *ProcessingMedia) Finished() bool {
return atomic.LoadInt32(&p.thumbState) == int32(complete) && atomic.LoadInt32(&p.fullSizeState) == int32(complete)
}
func (p *ProcessingMedia) loadThumb(ctx context.Context) error {
thumbState := atomic.LoadInt32(&p.thumbState)
switch processState(thumbState) {
case received:
// we haven't processed a thumbnail for this media yet so do it now
// check if we need to create a blurhash or if there's already one set
var createBlurhash bool
if p.attachment.Blurhash == "" {
// no blurhash created yet
createBlurhash = true
}
var (
thumb *mediaMeta
err error
)
switch ct := p.attachment.File.ContentType; ct {
case mimeImageJpeg, mimeImagePng, mimeImageWebp, mimeImageGif:
// thumbnail the image from the original stored full size version
stored, err := p.storage.GetStream(ctx, p.attachment.File.Path)
if err != nil {
p.err = fmt.Errorf("loadThumb: error fetching file from storage: %s", err)
atomic.StoreInt32(&p.thumbState, int32(errored))
return p.err
}
thumb, err = deriveThumbnailFromImage(stored, ct, createBlurhash)
// try to close the stored stream we had open, no matter what
if closeErr := stored.Close(); closeErr != nil {
log.Errorf("error closing stream: %s", closeErr)
}
// now check if we managed to get a thumbnail
if err != nil {
p.err = fmt.Errorf("loadThumb: error deriving thumbnail: %s", err)
atomic.StoreInt32(&p.thumbState, int32(errored))
return p.err
}
case mimeVideoMp4:
// create a generic thumbnail based on video height + width
thumb, err = deriveThumbnailFromVideo(p.attachment.FileMeta.Original.Height, p.attachment.FileMeta.Original.Width)
if err != nil {
p.err = fmt.Errorf("loadThumb: error deriving thumbnail: %s", err)
atomic.StoreInt32(&p.thumbState, int32(errored))
return p.err
}
default:
p.err = fmt.Errorf("loadThumb: content type %s not a processible image type", ct)
atomic.StoreInt32(&p.thumbState, int32(errored))
return p.err
}
// put the thumbnail in storage
if err := p.storage.Put(ctx, p.attachment.Thumbnail.Path, thumb.small); err != nil && err != storage.ErrAlreadyExists {
p.err = fmt.Errorf("loadThumb: error storing thumbnail: %s", err)
atomic.StoreInt32(&p.thumbState, int32(errored))
return p.err
}
// set appropriate fields on the attachment based on the thumbnail we derived
if createBlurhash {
p.attachment.Blurhash = thumb.blurhash
}
p.attachment.FileMeta.Small = gtsmodel.Small{
Width: thumb.width,
Height: thumb.height,
Size: thumb.size,
Aspect: thumb.aspect,
}
p.attachment.Thumbnail.FileSize = len(thumb.small)
// we're done processing the thumbnail!
atomic.StoreInt32(&p.thumbState, int32(complete))
log.Tracef("finished processing thumbnail for attachment %s", p.attachment.URL)
fallthrough
case complete:
return nil
case errored:
return p.err
}
return fmt.Errorf("loadThumb: thumbnail processing status %d unknown", p.thumbState)
}
func (p *ProcessingMedia) loadFullSize(ctx context.Context) error {
fullSizeState := atomic.LoadInt32(&p.fullSizeState)
switch processState(fullSizeState) {
case received:
// only process once.
p.once.Do(func() {
var err error
var decoded *mediaMeta
// stream the original file out of storage...
stored, err := p.storage.GetStream(ctx, p.attachment.File.Path)
if err != nil {
p.err = fmt.Errorf("loadFullSize: error fetching file from storage: %s", err)
atomic.StoreInt32(&p.fullSizeState, int32(errored))
return p.err
}
defer func() {
if err := stored.Close(); err != nil {
log.Errorf("loadFullSize: error closing stored full size: %s", err)
if r := recover(); r != nil {
if err != nil {
rOld := r // wrap the panic so we don't lose existing returned error
r = fmt.Errorf("panic occured after error %q: %v", err.Error(), rOld)
}
// Catch any panics and wrap as error.
err = fmt.Errorf("caught panic: %v", r)
}
if err != nil {
// Store error.
p.err = err
}
}()
// decode the image
ct := p.attachment.File.ContentType
switch ct {
case mimeImageJpeg, mimeImagePng, mimeImageWebp:
decoded, err = decodeImage(stored, ct)
case mimeImageGif:
decoded, err = decodeGif(stored)
case mimeVideoMp4:
decoded, err = decodeVideo(stored, ct)
default:
err = fmt.Errorf("loadFullSize: content type %s not a processible image type", ct)
// Attempt to store media and calculate
// full-size media attachment details.
if err = p.store(ctx); err != nil {
return
}
if err != nil {
p.err = err
atomic.StoreInt32(&p.fullSizeState, int32(errored))
return p.err
// Finish processing by reloading media into
// memory to get dimension and generate a thumb.
if err = p.finish(ctx); err != nil {
return
}
// set appropriate fields on the attachment based on the image we derived
// generic fields
p.attachment.File.UpdatedAt = time.Now()
p.attachment.FileMeta.Original = gtsmodel.Original{
Width: decoded.width,
Height: decoded.height,
Size: decoded.size,
Aspect: decoded.aspect,
if p.recache {
// Existing attachment we're recaching, so only need to update.
err = p.manager.db.UpdateByID(ctx, p.media, p.media.ID)
return
}
// nullable fields
if decoded.duration != 0 {
i := decoded.duration
p.attachment.FileMeta.Original.Duration = &i
}
if decoded.framerate != 0 {
i := decoded.framerate
p.attachment.FileMeta.Original.Framerate = &i
}
if decoded.bitrate != 0 {
i := decoded.bitrate
p.attachment.FileMeta.Original.Bitrate = &i
}
// New attachment, first time caching.
err = p.manager.db.Put(ctx, p.media)
return //nolint shutup linter i like this here
})
// we're done processing the full-size image
p.attachment.Processing = gtsmodel.ProcessingStatusProcessed
atomic.StoreInt32(&p.fullSizeState, int32(complete))
log.Tracef("finished processing full size image for attachment %s", p.attachment.URL)
fallthrough
case complete:
return nil
case errored:
return p.err
if p.err != nil {
return nil, p.err
}
return fmt.Errorf("loadFullSize: full size processing status %d unknown", p.fullSizeState)
return p.media, nil
}
// store calls the data function attached to p if it hasn't been called yet,
// and updates the underlying attachment fields as necessary. It will then stream
// bytes from p's reader directly into storage so that it can be retrieved later.
func (p *ProcessingMedia) store(ctx context.Context) error {
// check if we've already done this and bail early if we have
if p.read {
return nil
}
// execute the data function to get the readcloser out of it
rc, fileSize, err := p.data(ctx)
if err != nil {
return fmt.Errorf("store: error executing data function: %s", err)
}
// defer closing the reader when we're done with it
defer func() {
if p.postFn == nil {
return
}
// ensure post callback gets called.
if err := p.postFn(ctx); err != nil {
log.Errorf("error executing postdata function: %v", err)
}
}()
// Load media from provided data fun
rc, sz, err := p.dataFn(ctx)
if err != nil {
return fmt.Errorf("error executing data function: %w", err)
}
defer func() {
// Ensure data reader gets closed on return.
if err := rc.Close(); err != nil {
log.Errorf("store: error closing readcloser: %s", err)
log.Errorf("error closing data reader: %v", err)
}
}()
// execute the postData function no matter what happens
defer func() {
if p.postData != nil {
if err := p.postData(ctx); err != nil {
log.Errorf("store: error executing postData: %s", err)
}
}
}()
// Byte buffer to read file header into.
// See: https://en.wikipedia.org/wiki/File_format#File_header
// and https://github.com/h2non/filetype
hdrBuf := make([]byte, 261)
// extract no more than 261 bytes from the beginning of the file -- this is the header
firstBytes := make([]byte, maxFileHeaderBytes)
if _, err := rc.Read(firstBytes); err != nil {
return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err)
// Read the first 261 header bytes into buffer.
if _, err := io.ReadFull(rc, hdrBuf); err != nil {
return fmt.Errorf("error reading incoming media: %w", err)
}
// now we have the file header we can work out the content type from it
contentType, err := parseContentType(firstBytes)
// Parse file type info from header buffer.
info, err := filetype.Match(hdrBuf)
if err != nil {
return fmt.Errorf("store: error parsing content type: %s", err)
return fmt.Errorf("error parsing file type: %w", err)
}
// bail if this is a type we can't process
if !supportedAttachment(contentType) {
return fmt.Errorf("store: media type %s not (yet) supported", contentType)
}
// Recombine header bytes with remaining stream
r := io.MultiReader(bytes.NewReader(hdrBuf), rc)
// extract the file extension
split := strings.Split(contentType, "/")
if len(split) != 2 {
return fmt.Errorf("store: content type %s was not valid", contentType)
}
extension := split[1] // something like 'jpeg'
switch info.Extension {
case "mp4":
p.media.Type = gtsmodel.FileTypeVideo
// concatenate the cleaned up first bytes with the existing bytes still in the reader (thanks Mara)
multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), rc)
case "gif":
p.media.Type = gtsmodel.FileTypeImage
// use the extension to derive the attachment type
// and, while we're in here, clean up exif data from
// the image if we already know the fileSize
var readerToStore io.Reader
switch extension {
case mimeGif:
p.attachment.Type = gtsmodel.FileTypeImage
// nothing to terminate, we can just store the multireader
readerToStore = multiReader
case mimeJpeg, mimePng, mimeWebp:
p.attachment.Type = gtsmodel.FileTypeImage
if fileSize > 0 {
terminated, err := terminator.Terminate(multiReader, int(fileSize), extension)
case "jpg", "jpeg", "png", "webp":
p.media.Type = gtsmodel.FileTypeImage
if sz > 0 {
// A file size was provided so we can clean exif data from image.
r, err = terminator.Terminate(r, int(sz), info.Extension)
if err != nil {
return fmt.Errorf("store: exif error: %s", err)
return fmt.Errorf("error cleaning exif data: %w", err)
}
defer func() {
if closer, ok := terminated.(io.Closer); ok {
if err := closer.Close(); err != nil {
log.Errorf("store: error closing terminator reader: %s", err)
}
}
}()
// store the exif-terminated version of what was in the multireader
readerToStore = terminated
} else {
// can't terminate if we don't know the file size, so just store the multiReader
readerToStore = multiReader
}
case mimeMp4:
p.attachment.Type = gtsmodel.FileTypeVideo
// nothing to terminate, we can just store the multireader
readerToStore = multiReader
default:
return fmt.Errorf("store: couldn't process %s", extension)
return fmt.Errorf("unsupported file type: %s", info.Extension)
}
// now set some additional fields on the attachment since
// we know more about what the underlying media actually is
p.attachment.URL = uris.GenerateURIForAttachment(p.attachment.AccountID, string(TypeAttachment), string(SizeOriginal), p.attachment.ID, extension)
p.attachment.File.ContentType = contentType
p.attachment.File.Path = fmt.Sprintf("%s/%s/%s/%s.%s", p.attachment.AccountID, TypeAttachment, SizeOriginal, p.attachment.ID, extension)
// Calculate attachment file path.
p.media.File.Path = fmt.Sprintf(
"%s/%s/%s/%s.%s",
p.media.AccountID,
TypeAttachment,
SizeOriginal,
p.media.ID,
info.Extension,
)
// store this for now -- other processes can pull it out of storage as they please
if fileSize, err = putStream(ctx, p.storage, p.attachment.File.Path, readerToStore, fileSize); err != nil {
if !errors.Is(err, storage.ErrAlreadyExists) {
return fmt.Errorf("store: error storing stream: %s", err)
// This shouldn't already exist, but we do a check as it's worth logging.
if have, _ := p.manager.storage.Has(ctx, p.media.File.Path); have {
log.Warnf("media already exists at storage path: %s", p.media.File.Path)
// Attempt to remove existing media at storage path (might be broken / out-of-date)
if err := p.manager.storage.Delete(ctx, p.media.File.Path); err != nil {
return fmt.Errorf("error removing media from storage: %v", err)
}
log.Warnf("attachment %s already exists at storage path: %s", p.attachment.ID, p.attachment.File.Path)
}
cached := true
p.attachment.Cached = &cached
p.attachment.File.FileSize = int(fileSize)
p.read = true
// Write the final image reader stream to our storage.
sz, err = p.manager.storage.PutStream(ctx, p.media.File.Path, r)
if err != nil {
return fmt.Errorf("error writing media to storage: %w", err)
}
// Set written image size.
p.media.File.FileSize = int(sz)
// Fill in remaining attachment data now it's stored.
p.media.URL = uris.GenerateURIForAttachment(
p.media.AccountID,
string(TypeAttachment),
string(SizeOriginal),
p.media.ID,
info.Extension,
)
p.media.File.ContentType = info.MIME.Value
cached := true
p.media.Cached = &cached
return nil
}
func (p *ProcessingMedia) finish(ctx context.Context) error {
// Fetch a stream to the original file in storage.
rc, err := p.manager.storage.GetStream(ctx, p.media.File.Path)
if err != nil {
return fmt.Errorf("error loading file from storage: %w", err)
}
defer rc.Close()
var fullImg *gtsImage
switch p.media.File.ContentType {
// .jpeg, .gif, .webp image type
case mimeImageJpeg, mimeImageGif, mimeImageWebp:
fullImg, err = decodeImage(rc, imaging.AutoOrientation(true))
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
}
// .png image (requires ancillary chunk stripping)
case mimeImagePng:
fullImg, err = decodeImage(&PNGAncillaryChunkStripper{
Reader: rc,
}, imaging.AutoOrientation(true))
if err != nil {
return fmt.Errorf("error decoding image: %w", err)
}
// .mp4 video type
case mimeVideoMp4:
video, err := decodeVideoFrame(rc)
if err != nil {
return fmt.Errorf("error decoding video: %w", err)
}
// Set video frame as image.
fullImg = video.frame
// Set video metadata in attachment info.
p.media.FileMeta.Original.Duration = &video.duration
p.media.FileMeta.Original.Framerate = &video.framerate
p.media.FileMeta.Original.Bitrate = &video.bitrate
}
// The image should be in-memory by now.
if err := rc.Close(); err != nil {
return fmt.Errorf("error closing file: %w", err)
}
// Set full-size dimensions in attachment info.
p.media.FileMeta.Original.Width = int(fullImg.Width())
p.media.FileMeta.Original.Height = int(fullImg.Height())
p.media.FileMeta.Original.Size = int(fullImg.Size())
p.media.FileMeta.Original.Aspect = fullImg.AspectRatio()
// Calculate attachment thumbnail file path
p.media.Thumbnail.Path = fmt.Sprintf(
"%s/%s/%s/%s.jpg",
p.media.AccountID,
TypeAttachment,
SizeSmall,
p.media.ID,
)
// Get smaller thumbnail image
thumbImg := fullImg.Thumbnail()
// Garbage collector, you may
// now take our large son.
fullImg = nil
// Blurhash needs generating from thumb.
hash, err := thumbImg.Blurhash()
if err != nil {
return fmt.Errorf("error generating blurhash: %w", err)
}
// Set the attachment blurhash.
p.media.Blurhash = hash
// This shouldn't already exist, but we do a check as it's worth logging.
if have, _ := p.manager.storage.Has(ctx, p.media.Thumbnail.Path); have {
log.Warnf("thumbnail already exists at storage path: %s", p.media.Thumbnail.Path)
// Attempt to remove existing thumbnail at storage path (might be broken / out-of-date)
if err := p.manager.storage.Delete(ctx, p.media.Thumbnail.Path); err != nil {
return fmt.Errorf("error removing thumbnail from storage: %v", err)
}
}
// Create a thumbnail JPEG encoder stream.
enc := thumbImg.ToJPEG(&jpeg.Options{
Quality: 70, // enough for a thumbnail.
})
// Stream-encode the JPEG thumbnail image into storage.
sz, err := p.manager.storage.PutStream(ctx, p.media.Thumbnail.Path, enc)
if err != nil {
return fmt.Errorf("error stream-encoding thumbnail to storage: %w", err)
}
// Fill in remaining thumbnail now it's stored
p.media.Thumbnail.ContentType = mimeImageJpeg
p.media.Thumbnail.URL = uris.GenerateURIForAttachment(
p.media.AccountID,
string(TypeAttachment),
string(SizeSmall),
p.media.ID,
"jpg", // always jpeg
)
// Set thumbnail dimensions in attachment info.
p.media.FileMeta.Small = gtsmodel.Small{
Width: int(thumbImg.Width()),
Height: int(thumbImg.Height()),
Size: int(thumbImg.Size()),
Aspect: thumbImg.AspectRatio(),
}
// Set written image size.
p.media.Thumbnail.FileSize = int(sz)
// Finally set the attachment as processed and update time.
p.media.Processing = gtsmodel.ProcessingStatusProcessed
p.media.File.UpdatedAt = time.Now()
log.Tracef("finished storing initial data for attachment %s", p.attachment.URL)
return nil
}
@ -411,19 +353,6 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P
return nil, err
}
file := gtsmodel.File{
Path: "", // we don't know yet because it depends on the uncalled DataFunc
ContentType: "", // we don't know yet because it depends on the uncalled DataFunc
UpdatedAt: time.Now(),
}
thumbnail := gtsmodel.Thumbnail{
URL: uris.GenerateURIForAttachment(accountID, string(TypeAttachment), string(SizeSmall), id, mimeJpeg), // all thumbnails are encoded as jpeg,
Path: fmt.Sprintf("%s/%s/%s/%s.%s", accountID, TypeAttachment, SizeSmall, id, mimeJpeg), // all thumbnails are encoded as jpeg,
ContentType: mimeImageJpeg,
UpdatedAt: time.Now(),
}
avatar := false
header := false
cached := false
@ -443,8 +372,8 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P
ScheduledStatusID: "",
Blurhash: "",
Processing: gtsmodel.ProcessingStatusReceived,
File: file,
Thumbnail: thumbnail,
File: gtsmodel.File{UpdatedAt: time.Now()},
Thumbnail: gtsmodel.Thumbnail{UpdatedAt: time.Now()},
Avatar: &avatar,
Header: &header,
Cached: &cached,
@ -495,34 +424,28 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P
}
processingMedia := &ProcessingMedia{
attachment: attachment,
data: data,
postData: postData,
thumbState: int32(received),
fullSizeState: int32(received),
database: m.db,
storage: m.storage,
media: attachment,
dataFn: data,
postFn: postData,
manager: m,
}
return processingMedia, nil
}
func (m *manager) preProcessRecache(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error) {
// get the existing attachment
attachment, err := m.db.GetAttachmentByID(ctx, attachmentID)
func (m *manager) preProcessRecache(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, id string) (*ProcessingMedia, error) {
// get the existing attachment from database.
attachment, err := m.db.GetAttachmentByID(ctx, id)
if err != nil {
return nil, err
}
processingMedia := &ProcessingMedia{
attachment: attachment,
data: data,
postData: postData,
thumbState: int32(received),
fullSizeState: int32(received),
database: m.db,
storage: m.storage,
recache: true, // indicate it's a recache
media: attachment,
dataFn: data,
postFn: postData,
manager: m,
recache: true, // indicate it's a recache
}
return processingMedia, nil

View file

@ -39,7 +39,7 @@ func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() {
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
panic(err)
}
@ -62,7 +62,7 @@ func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() {
}
pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif"
if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil {
panic(err)
}

View file

@ -87,7 +87,7 @@ func (suite *PruneRemoteTestSuite) TestPruneAndRecache() {
// now recache the image....
data := func(_ context.Context) (io.ReadCloser, int64, error) {
// load bytes from a test image
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpeg")
b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg")
if err != nil {
panic(err)
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 20 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

After

Width:  |  Height:  |  Size: 5.8 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.3 KiB

After

Width:  |  Height:  |  Size: 5.8 KiB

Before After
Before After

View file

@ -24,13 +24,6 @@ import (
"time"
)
// maxFileHeaderBytes represents the maximum amount of bytes we want
// to examine from the beginning of a file to determine its type.
//
// See: https://en.wikipedia.org/wiki/File_format#File_header
// and https://github.com/h2non/filetype
const maxFileHeaderBytes = 261
// mime consts
const (
mimeImage = "image"
@ -52,14 +45,6 @@ const (
mimeVideoMp4 = mimeVideo + "/" + mimeMp4
)
type processState int32
const (
received processState = iota // processing order has been received but not done yet
complete // processing order has been completed successfully
errored // processing order has been completed with an error
)
// EmojiMaxBytes is the maximum permitted bytes of an emoji upload (50kb)
// const EmojiMaxBytes = 51200
@ -132,17 +117,3 @@ type DataFunc func(ctx context.Context) (reader io.ReadCloser, fileSize int64, e
//
// This can be set to nil, and will then not be executed.
type PostDataCallbackFunc func(ctx context.Context) error
type mediaMeta struct {
width int
height int
size int
aspect float32
blurhash string
small []byte
// video-specific properties
duration float32
framerate float32
bitrate uint64
}

View file

@ -19,72 +19,22 @@
package media
import (
"context"
"errors"
"fmt"
"io"
"github.com/h2non/filetype"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/storage"
)
// AllSupportedMIMETypes just returns all media
// MIME types supported by this instance.
func AllSupportedMIMETypes() []string {
return []string{
mimeImageJpeg,
mimeImageGif,
mimeImagePng,
mimeImageWebp,
mimeVideoMp4,
}
var SupportedMIMETypes = []string{
mimeImageJpeg,
mimeImageGif,
mimeImagePng,
mimeImageWebp,
mimeVideoMp4,
}
// parseContentType parses the MIME content type from a file, returning it as a string in the form (eg., "image/jpeg").
// Returns an error if the content type is not something we can process.
//
// Fileheader should be no longer than 262 bytes; anything more than this is inefficient.
func parseContentType(fileHeader []byte) (string, error) {
if fhLength := len(fileHeader); fhLength > maxFileHeaderBytes {
return "", fmt.Errorf("parseContentType requires %d bytes max, we got %d", maxFileHeaderBytes, fhLength)
}
kind, err := filetype.Match(fileHeader)
if err != nil {
return "", err
}
if kind == filetype.Unknown {
return "", errors.New("filetype unknown")
}
return kind.MIME.Value, nil
}
// supportedAttachment checks mime type of an attachment against a
// slice of accepted types, and returns True if the mime type is accepted.
func supportedAttachment(mimeType string) bool {
for _, accepted := range AllSupportedMIMETypes() {
if mimeType == accepted {
return true
}
}
return false
}
// supportedEmoji checks that the content type is image/png or image/gif -- the only types supported for emoji.
func supportedEmoji(mimeType string) bool {
acceptedEmojiTypes := []string{
mimeImageGif,
mimeImagePng,
}
for _, accepted := range acceptedEmojiTypes {
if mimeType == accepted {
return true
}
}
return false
var SupportedEmojiMIMETypes = []string{
mimeImageGif,
mimeImagePng,
}
// ParseMediaType converts s to a recognized MediaType, or returns an error if unrecognized
@ -127,31 +77,3 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
log.Error("media manager cron logger: ", err, msg, keysAndValues)
}
// lengthReader wraps a reader and reads the length of total bytes written as it goes.
type lengthReader struct {
source io.Reader
length int64
}
func (r *lengthReader) Read(b []byte) (int, error) {
n, err := r.source.Read(b)
r.length += int64(n)
return n, err
}
// putStream either puts a file with a known fileSize into storage directly, and returns the
// fileSize unchanged, or it wraps the reader with a lengthReader and returns the discovered
// fileSize.
func putStream(ctx context.Context, storage *storage.Driver, key string, r io.Reader, fileSize int64) (int64, error) {
if fileSize > 0 {
return fileSize, storage.PutStream(ctx, key, r)
}
lr := &lengthReader{
source: r,
}
err := storage.PutStream(ctx, key, lr)
return lr.length, err
}

View file

@ -19,63 +19,55 @@
package media
import (
"bytes"
"fmt"
"image"
"image/color"
"image/draw"
"image/jpeg"
"io"
"os"
"github.com/abema/go-mp4"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
var thumbFill = color.RGBA{42, 43, 47, 0} // the color to fill video thumbnails with
type gtsVideo struct {
frame *gtsImage
duration float32 // in seconds
bitrate uint64
framerate float32
}
func decodeVideo(r io.Reader, contentType string) (*mediaMeta, error) {
// decodeVideoFrame decodes and returns an image from a single frame in the given video stream.
// (note: currently this only returns a blank image resized to fit video dimensions).
func decodeVideoFrame(r io.Reader) (*gtsVideo, error) {
// We'll need a readseeker to decode the video. We can get a readseeker
// without burning too much mem by first copying the reader into a temp file.
// First create the file in the temporary directory...
tempFile, err := os.CreateTemp(os.TempDir(), "gotosocial-")
tmp, err := os.CreateTemp(os.TempDir(), "gotosocial-")
if err != nil {
return nil, fmt.Errorf("could not create temporary file while decoding video: %w", err)
return nil, err
}
tempFileName := tempFile.Name()
// Make sure to clean up the temporary file when we're done with it
defer func() {
if err := tempFile.Close(); err != nil {
log.Errorf("could not close file %s: %s", tempFileName, err)
}
if err := os.Remove(tempFileName); err != nil {
log.Errorf("could not remove file %s: %s", tempFileName, err)
}
tmp.Close()
os.Remove(tmp.Name())
}()
// Now copy the entire reader we've been provided into the
// temporary file; we won't use the reader again after this.
if _, err := io.Copy(tempFile, r); err != nil {
return nil, fmt.Errorf("could not copy video reader into temporary file %s: %w", tempFileName, err)
if _, err := io.Copy(tmp, r); err != nil {
return nil, err
}
var (
width int
height int
duration float32
framerate float32
bitrate uint64
)
// probe the video file to extract useful metadata from it; for methodology, see:
// https://github.com/abema/go-mp4/blob/7d8e5a7c5e644e0394261b0cf72fef79ce246d31/mp4tool/probe/probe.go#L85-L154
info, err := mp4.Probe(tempFile)
info, err := mp4.Probe(tmp)
if err != nil {
return nil, fmt.Errorf("could not probe temporary video file %s: %w", tempFileName, err)
return nil, fmt.Errorf("error probing tmp file %s: %w", tmp.Name(), err)
}
var (
width int
height int
video gtsVideo
)
for _, tr := range info.Tracks {
if tr.AVC == nil {
continue
@ -89,72 +81,42 @@ func decodeVideo(r io.Reader, contentType string) (*mediaMeta, error) {
height = h
}
if br := tr.Samples.GetBitrate(tr.Timescale); br > bitrate {
bitrate = br
} else if br := info.Segments.GetBitrate(tr.TrackID, tr.Timescale); br > bitrate {
bitrate = br
if br := tr.Samples.GetBitrate(tr.Timescale); br > video.bitrate {
video.bitrate = br
} else if br := info.Segments.GetBitrate(tr.TrackID, tr.Timescale); br > video.bitrate {
video.bitrate = br
}
if d := float32(tr.Duration) / float32(tr.Timescale); d > duration {
duration = d
framerate = float32(len(tr.Samples)) / duration
if d := float64(tr.Duration) / float64(tr.Timescale); d > float64(video.duration) {
video.framerate = float32(len(tr.Samples)) / float32(d)
video.duration = float32(d)
}
}
var errs gtserror.MultiError
// Check for empty video metadata.
var empty []string
if width == 0 {
errs = append(errs, "video width could not be discovered")
empty = append(empty, "width")
}
if height == 0 {
errs = append(errs, "video height could not be discovered")
empty = append(empty, "height")
}
if video.duration == 0 {
empty = append(empty, "duration")
}
if video.framerate == 0 {
empty = append(empty, "framerate")
}
if video.bitrate == 0 {
empty = append(empty, "bitrate")
}
if len(empty) > 0 {
return nil, fmt.Errorf("error determining video metadata: %v", empty)
}
if duration == 0 {
errs = append(errs, "video duration could not be discovered")
}
// Create new empty "frame" image.
// TODO: decode frame from video file.
video.frame = blankImage(width, height)
if framerate == 0 {
errs = append(errs, "video framerate could not be discovered")
}
if bitrate == 0 {
errs = append(errs, "video bitrate could not be discovered")
}
if errs != nil {
return nil, errs.Combine()
}
return &mediaMeta{
width: width,
height: height,
duration: duration,
framerate: framerate,
bitrate: bitrate,
size: height * width,
aspect: float32(width) / float32(height),
}, nil
}
func deriveThumbnailFromVideo(height int, width int) (*mediaMeta, error) {
// create a rectangle with the same dimensions as the video
img := image.NewRGBA(image.Rect(0, 0, width, height))
// fill the rectangle with our desired fill color
draw.Draw(img, img.Bounds(), &image.Uniform{thumbFill}, image.Point{}, draw.Src)
// we can get away with using extremely poor quality for this monocolor thumbnail
out := &bytes.Buffer{}
if err := jpeg.Encode(out, img, &jpeg.Options{Quality: 1}); err != nil {
return nil, fmt.Errorf("error encoding video thumbnail: %w", err)
}
return &mediaMeta{
width: width,
height: height,
size: width * height,
aspect: float32(width) / float32(height),
small: out.Bytes(),
}, nil
return &video, nil
}