[chore] media and emoji refactoring (#3000)

* start updating media manager interface ready for storing attachments / emoji right away

* store emoji and media as uncached immediately, then (re-)cache on Processing{}.Load()

* remove now unused media workers

* fix tests and issues

* fix another test!

* fix emoji activitypub uri setting behaviour, fix remainder of test compilation issues

* fix more tests

* fix (most of) remaining tests, add debouncing to repeatedly failing media / emojis

* whoops, rebase issue

* remove kim's whacky experiments

* do some reshuffling, ensure emoji uri gets set

* ensure marked as not cached on cleanup

* tweaks to media / emoji processing to handle context canceled better

* ensure newly fetched emojis actually get set in returned slice

* use different varnames to be a bit more obvious

* move emoji refresh rate limiting to dereferencer

* add exported dereferencer functions for remote media, use these for recaching in processor

* add check for nil attachment in updateAttachment()

* remove unused emoji and media fields + columns

* see previous commit

* fix old migrations expecting image_updated_at to exists (from copies of old models)

* remove freshness checking code (seems to be broken...)

* fix error arg causing nil ptr exception

* finish documentating functions with comments, slight tweaks to media / emoji deref error logic

* remove some extra unneeded boolean checking

* finish writing documentation (code comments) for exported media manager methods

* undo changes to migration snapshot gtsmodels, updated failing migration to have its own snapshot

* move doesColumnExist() to util.go in migrations package
This commit is contained in:
kim 2024-06-26 15:01:16 +00:00 committed by GitHub
commit 21bb324156
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 2578 additions and 1926 deletions

View file

@ -56,176 +56,172 @@ func NewManager(state *state.State) *Manager {
return &Manager{state: state}
}
// PreProcessMedia begins the process of decoding
// and storing the given data as an attachment.
// It will return a pointer to a ProcessingMedia
// struct upon which further actions can be performed,
// such as getting the finished media, thumbnail,
// attachment, etc.
//
// - data: a function that the media manager can call
// to return a reader containing the media data.
// - accountID: the account that the media belongs to.
// - ai: optional and can be nil. Any additional information
// about the attachment provided will be put in the database.
//
// Note: unlike ProcessMedia, this will NOT
// queue the media to be asynchronously processed.
func (m *Manager) PreProcessMedia(
data DataFunc,
// CreateMedia creates a new media attachment entry
// in the database for given owning account ID and
// extra information, and prepares a new processing
// media entry to dereference it using the given
// data function, decode the media and finish filling
// out remaining media fields (e.g. type, path, etc).
func (m *Manager) CreateMedia(
ctx context.Context,
accountID string,
ai *AdditionalMediaInfo,
) *ProcessingMedia {
data DataFunc,
info AdditionalMediaInfo,
) (
*ProcessingMedia,
error,
) {
now := time.Now()
// Generate new ID.
id := id.NewULID()
// Placeholder URL for attachment.
url := uris.URIForAttachment(
accountID,
string(TypeAttachment),
string(SizeOriginal),
id,
"unknown",
)
// Placeholder storage path for attachment.
path := uris.StoragePathForAttachment(
accountID,
string(TypeAttachment),
string(SizeOriginal),
id,
"unknown",
)
// Calculate attachment thumbnail file path
thumbPath := uris.StoragePathForAttachment(
accountID,
string(TypeAttachment),
string(SizeSmall),
id,
// Always encode attachment
// thumbnails as jpg.
"jpg",
)
// Calculate attachment thumbnail URL.
thumbURL := uris.URIForAttachment(
accountID,
string(TypeAttachment),
string(SizeSmall),
id,
// Always encode attachment
// thumbnails as jpg.
"jpg",
)
// Populate initial fields on the new media,
// leaving out fields with values we don't know
// yet. These will be overwritten as we go.
now := time.Now()
attachment := &gtsmodel.MediaAttachment{
ID: id.NewULID(),
ID: id,
CreatedAt: now,
UpdatedAt: now,
URL: url,
Type: gtsmodel.FileTypeUnknown,
FileMeta: gtsmodel.FileMeta{},
AccountID: accountID,
Processing: gtsmodel.ProcessingStatusReceived,
File: gtsmodel.File{
UpdatedAt: now,
ContentType: "application/octet-stream",
Path: path,
},
Thumbnail: gtsmodel.Thumbnail{UpdatedAt: now},
Avatar: util.Ptr(false),
Header: util.Ptr(false),
Cached: util.Ptr(false),
Thumbnail: gtsmodel.Thumbnail{
ContentType: mimeImageJpeg, // thumbs always jpg.
Path: thumbPath,
URL: thumbURL,
},
Avatar: util.Ptr(false),
Header: util.Ptr(false),
Cached: util.Ptr(false),
}
attachment.URL = uris.URIForAttachment(
accountID,
string(TypeAttachment),
string(SizeOriginal),
attachment.ID,
"unknown",
)
attachment.File.Path = uris.StoragePathForAttachment(
accountID,
string(TypeAttachment),
string(SizeOriginal),
attachment.ID,
"unknown",
)
// Check if we were provided additional info
// to add to the attachment, and overwrite
// some of the attachment fields if so.
if ai != nil {
if ai.CreatedAt != nil {
attachment.CreatedAt = *ai.CreatedAt
}
if ai.StatusID != nil {
attachment.StatusID = *ai.StatusID
}
if ai.RemoteURL != nil {
attachment.RemoteURL = *ai.RemoteURL
}
if ai.Description != nil {
attachment.Description = *ai.Description
}
if ai.ScheduledStatusID != nil {
attachment.ScheduledStatusID = *ai.ScheduledStatusID
}
if ai.Blurhash != nil {
attachment.Blurhash = *ai.Blurhash
}
if ai.Avatar != nil {
attachment.Avatar = ai.Avatar
}
if ai.Header != nil {
attachment.Header = ai.Header
}
if ai.FocusX != nil {
attachment.FileMeta.Focus.X = *ai.FocusX
}
if ai.FocusY != nil {
attachment.FileMeta.Focus.Y = *ai.FocusY
}
if info.CreatedAt != nil {
attachment.CreatedAt = *info.CreatedAt
}
if info.StatusID != nil {
attachment.StatusID = *info.StatusID
}
if info.RemoteURL != nil {
attachment.RemoteURL = *info.RemoteURL
}
if info.Description != nil {
attachment.Description = *info.Description
}
if info.ScheduledStatusID != nil {
attachment.ScheduledStatusID = *info.ScheduledStatusID
}
if info.Blurhash != nil {
attachment.Blurhash = *info.Blurhash
}
if info.Avatar != nil {
attachment.Avatar = info.Avatar
}
if info.Header != nil {
attachment.Header = info.Header
}
if info.FocusX != nil {
attachment.FileMeta.Focus.X = *info.FocusX
}
if info.FocusY != nil {
attachment.FileMeta.Focus.Y = *info.FocusY
}
processingMedia := &ProcessingMedia{
media: attachment,
dataFn: data,
mgr: m,
}
return processingMedia
}
// PreProcessMediaRecache refetches, reprocesses,
// and recaches an existing attachment that has
// been uncached via cleaner pruning.
//
// Note: unlike ProcessMedia, this will NOT queue
// the media to be asychronously processed.
func (m *Manager) PreProcessMediaRecache(
ctx context.Context,
data DataFunc,
attachmentID string,
) (*ProcessingMedia, error) {
// Get the existing attachment from database.
attachment, err := m.state.DB.GetAttachmentByID(ctx, attachmentID)
// Store attachment in database in initial form.
err := m.state.DB.PutAttachment(ctx, attachment)
if err != nil {
return nil, err
}
processingMedia := &ProcessingMedia{
media: attachment,
dataFn: data,
recache: true, // Indicate it's a recache.
mgr: m,
}
return processingMedia, nil
// Pass prepared media as ready to be cached.
return m.RecacheMedia(attachment, data), nil
}
// PreProcessEmoji begins the process of decoding and storing
// the given data as an emoji. It will return a pointer to a
// ProcessingEmoji struct upon which further actions can be
// performed, such as getting the finished media, thumbnail,
// attachment, etc.
//
// - data: function that the media manager can call
// to return a reader containing the emoji data.
// - shortcode: the emoji shortcode without the ':'s around it.
// - emojiID: database ID that should be used to store the emoji.
// - uri: ActivityPub URI/ID of the emoji.
// - ai: optional and can be nil. Any additional information
// about the emoji provided will be put in the database.
// - refresh: refetch/refresh the emoji.
//
// Note: unlike ProcessEmoji, this will NOT queue
// the emoji to be asynchronously processed.
func (m *Manager) PreProcessEmoji(
ctx context.Context,
// RecacheMedia wraps a media model (assumed already
// inserted in the database!) with given data function
// to perform a blocking dereference / decode operation
// from the data stream returned.
func (m *Manager) RecacheMedia(
media *gtsmodel.MediaAttachment,
data DataFunc,
) *ProcessingMedia {
return &ProcessingMedia{
media: media,
dataFn: data,
mgr: m,
}
}
// CreateEmoji creates a new emoji entry in the
// database for given shortcode, domain and extra
// information, and prepares a new processing emoji
// entry to dereference it using the given data
// function, decode the media and finish filling
// out remaining fields (e.g. type, path, etc).
func (m *Manager) CreateEmoji(
ctx context.Context,
shortcode string,
emojiID string,
uri string,
ai *AdditionalEmojiInfo,
refresh bool,
) (*ProcessingEmoji, error) {
var (
newPathID string
emoji *gtsmodel.Emoji
now = time.Now()
)
domain string,
data DataFunc,
info AdditionalEmojiInfo,
) (
*ProcessingEmoji,
error,
) {
now := time.Now()
// Generate new ID.
id := id.NewULID()
// Fetch the local instance account for emoji path generation.
instanceAcc, err := m.state.DB.GetInstanceAccount(ctx, "")
@ -233,206 +229,240 @@ func (m *Manager) PreProcessEmoji(
return nil, gtserror.Newf("error fetching instance account: %w", err)
}
if refresh {
// Existing emoji!
if domain == "" && info.URI == nil {
// Generate URI for local emoji.
uri := uris.URIForEmoji(id)
info.URI = &uri
}
emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID)
// Generate static URL for attachment.
staticURL := uris.URIForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
id,
// All static emojis
// are encoded as png.
mimePng,
)
// Generate static image path for attachment.
staticPath := uris.StoragePathForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
id,
// All static emojis
// are encoded as png.
mimePng,
)
// Populate initial fields on the new emoji,
// leaving out fields with values we don't know
// yet. These will be overwritten as we go.
emoji := &gtsmodel.Emoji{
ID: id,
Shortcode: shortcode,
Domain: domain,
ImageStaticURL: staticURL,
ImageStaticPath: staticPath,
ImageStaticContentType: mimeImagePng,
Disabled: util.Ptr(false),
VisibleInPicker: util.Ptr(true),
CreatedAt: now,
UpdatedAt: now,
}
// Finally, create new emoji.
return m.createEmoji(ctx,
m.state.DB.PutEmoji,
data,
emoji,
info,
)
}
// RefreshEmoji will prepare a recache operation
// for the given emoji, updating it with extra
// information, and in particular using new storage
// paths for the dereferenced media files to skirt
// around browser caching of the old files.
func (m *Manager) RefreshEmoji(
ctx context.Context,
emoji *gtsmodel.Emoji,
data DataFunc,
info AdditionalEmojiInfo,
) (
*ProcessingEmoji,
error,
) {
// Fetch the local instance account for emoji path generation.
instanceAcc, err := m.state.DB.GetInstanceAccount(ctx, "")
if err != nil {
return nil, gtserror.Newf("error fetching instance account: %w", err)
}
// Create references to old emoji image
// paths before they get updated with new
// path ID. These are required for later
// deleting the old image files on refresh.
shortcodeDomain := util.ShortcodeDomain(emoji)
oldStaticPath := emoji.ImageStaticPath
oldPath := emoji.ImagePath
// Since this is a refresh we will end up storing new images at new
// paths, so we should wrap closer to delete old paths at completion.
wrapped := func(ctx context.Context) (io.ReadCloser, int64, error) {
// Call original data func.
rc, sz, err := data(ctx)
if err != nil {
err = gtserror.Newf("error fetching emoji to refresh from the db: %w", err)
return nil, err
return nil, 0, err
}
// Since this is a refresh, we will end up with
// new images stored for this emoji, so we should
// use an io.Closer callback to perform clean up
// of the original images from storage.
originalData := data
originalImagePath := emoji.ImagePath
originalImageStaticPath := emoji.ImageStaticPath
// Wrap closer to cleanup old data.
c := iotools.CloserFunc(func() error {
data = func(ctx context.Context) (io.ReadCloser, int64, error) {
// Call original data func.
rc, sz, err := originalData(ctx)
if err != nil {
return nil, 0, err
// First try close original.
if rc.Close(); err != nil {
return err
}
// Wrap closer to cleanup old data.
c := iotools.CloserCallback(rc, func() {
if err := m.state.Storage.Delete(ctx, originalImagePath); err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error removing old emoji %s@%s from storage: %v", emoji.Shortcode, emoji.Domain, err)
}
// Remove any *old* emoji image file path now stream is closed.
if err := m.state.Storage.Delete(ctx, oldPath); err != nil &&
!storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting old emoji %s from storage: %v", shortcodeDomain, err)
}
if err := m.state.Storage.Delete(ctx, originalImageStaticPath); err != nil && !storage.IsNotFound(err) {
log.Errorf(ctx, "error removing old static emoji %s@%s from storage: %v", emoji.Shortcode, emoji.Domain, err)
}
})
// Remove any *old* emoji static image file path now stream is closed.
if err := m.state.Storage.Delete(ctx, oldStaticPath); err != nil &&
!storage.IsNotFound(err) {
log.Errorf(ctx, "error deleting old static emoji %s from storage: %v", shortcodeDomain, err)
}
// Return newly wrapped readcloser and size.
return iotools.ReadCloser(rc, c), sz, nil
}
return nil
})
// Reuse existing shortcode and URI -
// these don't change when we refresh.
emoji.Shortcode = shortcode
emoji.URI = uri
// Use a new ID to create a new path
// for the new images, to get around
// needing to do cache invalidation.
newPathID, err = id.NewRandomULID()
if err != nil {
return nil, gtserror.Newf("error generating alternateID for emoji refresh: %s", err)
}
emoji.ImageStaticURL = uris.URIForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
newPathID,
// All static emojis
// are encoded as png.
mimePng,
)
emoji.ImageStaticPath = uris.StoragePathForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
newPathID,
// All static emojis
// are encoded as png.
mimePng,
)
} else {
// New emoji!
imageStaticURL := uris.URIForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
emojiID,
// All static emojis
// are encoded as png.
mimePng,
)
imageStaticPath := uris.StoragePathForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
emojiID,
// All static emojis
// are encoded as png.
mimePng,
)
// Populate initial fields on the new emoji,
// leaving out fields with values we don't know
// yet. These will be overwritten as we go.
emoji = &gtsmodel.Emoji{
ID: emojiID,
CreatedAt: now,
UpdatedAt: now,
Shortcode: shortcode,
ImageStaticURL: imageStaticURL,
ImageStaticPath: imageStaticPath,
ImageStaticContentType: mimeImagePng,
ImageUpdatedAt: now,
Disabled: util.Ptr(false),
URI: uri,
VisibleInPicker: util.Ptr(true),
}
// Return newly wrapped readcloser and size.
return iotools.ReadCloser(rc, c), sz, nil
}
// Use a new ID to create a new path
// for the new images, to get around
// needing to do cache invalidation.
newPathID, err := id.NewRandomULID()
if err != nil {
return nil, gtserror.Newf("error generating newPathID for emoji refresh: %s", err)
}
// Generate new static URL for emoji.
emoji.ImageStaticURL = uris.URIForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
newPathID,
// All static emojis
// are encoded as png.
mimePng,
)
// Generate new static image storage path for emoji.
emoji.ImageStaticPath = uris.StoragePathForAttachment(
instanceAcc.ID,
string(TypeEmoji),
string(SizeStatic),
newPathID,
// All static emojis
// are encoded as png.
mimePng,
)
// Finally, create new emoji in database.
processingEmoji, err := m.createEmoji(ctx,
func(ctx context.Context, emoji *gtsmodel.Emoji) error {
return m.state.DB.UpdateEmoji(ctx, emoji)
},
wrapped,
emoji,
info,
)
if err != nil {
return nil, err
}
// Set the refreshed path ID used.
processingEmoji.newPathID = newPathID
return processingEmoji, nil
}
func (m *Manager) createEmoji(
ctx context.Context,
putDB func(context.Context, *gtsmodel.Emoji) error,
data DataFunc,
emoji *gtsmodel.Emoji,
info AdditionalEmojiInfo,
) (
*ProcessingEmoji,
error,
) {
// Check if we have additional info to add to the emoji,
// and overwrite some of the emoji fields if so.
if ai != nil {
if ai.CreatedAt != nil {
emoji.CreatedAt = *ai.CreatedAt
}
if ai.Domain != nil {
emoji.Domain = *ai.Domain
}
if ai.ImageRemoteURL != nil {
emoji.ImageRemoteURL = *ai.ImageRemoteURL
}
if ai.ImageStaticRemoteURL != nil {
emoji.ImageStaticRemoteURL = *ai.ImageStaticRemoteURL
}
if ai.Disabled != nil {
emoji.Disabled = ai.Disabled
}
if ai.VisibleInPicker != nil {
emoji.VisibleInPicker = ai.VisibleInPicker
}
if ai.CategoryID != nil {
emoji.CategoryID = *ai.CategoryID
}
if info.URI != nil {
emoji.URI = *info.URI
}
if info.CreatedAt != nil {
emoji.CreatedAt = *info.CreatedAt
}
if info.Domain != nil {
emoji.Domain = *info.Domain
}
if info.ImageRemoteURL != nil {
emoji.ImageRemoteURL = *info.ImageRemoteURL
}
if info.ImageStaticRemoteURL != nil {
emoji.ImageStaticRemoteURL = *info.ImageStaticRemoteURL
}
if info.Disabled != nil {
emoji.Disabled = info.Disabled
}
if info.VisibleInPicker != nil {
emoji.VisibleInPicker = info.VisibleInPicker
}
if info.CategoryID != nil {
emoji.CategoryID = *info.CategoryID
}
// Store emoji in database in initial form.
if err := putDB(ctx, emoji); err != nil {
return nil, err
}
// Return wrapped emoji for later processing.
processingEmoji := &ProcessingEmoji{
emoji: emoji,
existing: refresh,
newPathID: newPathID,
dataFn: data,
mgr: m,
emoji: emoji,
dataFn: data,
mgr: m,
}
return processingEmoji, nil
}
// PreProcessEmojiRecache refetches, reprocesses, and recaches
// an existing emoji that has been uncached via cleaner pruning.
//
// Note: unlike ProcessEmoji, this will NOT queue the emoji to
// be asychronously processed.
func (m *Manager) PreProcessEmojiRecache(
ctx context.Context,
// RecacheEmoji wraps an emoji model (assumed already
// inserted in the database!) with given data function
// to perform a blocking dereference / decode operation
// from the data stream returned.
func (m *Manager) RecacheEmoji(
emoji *gtsmodel.Emoji,
data DataFunc,
emojiID string,
) (*ProcessingEmoji, error) {
// Get the existing emoji from the database.
emoji, err := m.state.DB.GetEmojiByID(ctx, emojiID)
if err != nil {
return nil, err
) *ProcessingEmoji {
return &ProcessingEmoji{
emoji: emoji,
dataFn: data,
mgr: m,
}
processingEmoji := &ProcessingEmoji{
emoji: emoji,
dataFn: data,
existing: true, // Indicate recache.
mgr: m,
}
return processingEmoji, nil
}
// ProcessEmoji will call PreProcessEmoji, followed
// by queuing the emoji in the emoji worker queue.
func (m *Manager) ProcessEmoji(
ctx context.Context,
data DataFunc,
shortcode string,
id string,
uri string,
ai *AdditionalEmojiInfo,
refresh bool,
) (*ProcessingEmoji, error) {
// Create a new processing emoji object for this emoji request.
emoji, err := m.PreProcessEmoji(ctx, data, shortcode, id, uri, ai, refresh)
if err != nil {
return nil, err
}
// Attempt to add emoji item to the worker queue.
m.state.Workers.Media.Queue.Push(emoji.Process)
return emoji, nil
}