mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-12-09 21:08:07 -06:00
[feature] Clean up/uncache remote media (#407)
* Add whereNotEmptyAndNotNull * Add GetRemoteOlderThanDays * Add GetRemoteOlderThanDays * Add PruneRemote to Manager interface * Start implementing PruneRemote * add new attachment + status to tests * fix up and test GetRemoteOlderThan * fix bad import * PruneRemote: return number pruned * add Cached column to mediaattachment * update + test pruneRemote * update mediaTest * use Cached column * upstep bun to latest version * embed structs in mediaAttachment * migrate mediaAttachment to new format * don't default cached to true * select only remote media * update db dependencies * step bun back to last working version * update pruneRemote to use Cached field * fix storage path of test attachments * add recache logic to manager * fix trimmed aspect ratio * test prune and recache * return errwithcode * tidy up different paths for emoji vs attachment * fix incorrect thumbnail type being stored * expose TransportController to media processor * implement tee-ing recached content * add thoughts of dog to test fedi attachments * test get remote files * add comment on PruneRemote * add postData cleanup to recache * test thumbnail fetching * add incredible diagram * go mod tidy * buffer pipes for recache streaming * test for client stops reading after 1kb * add media-remote-cache-days to config * add cron package * wrap logrus so it's available to cron * start and stop cron jobs gracefully
This commit is contained in:
parent
100f1280a6
commit
07727753b9
424 changed files with 637100 additions and 176498 deletions
172
internal/db/bundb/migrations/20220214175650_media_cleanup.go
Normal file
172
internal/db/bundb/migrations/20220214175650_media_cleanup.go
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
GoToSocial
|
||||
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package migrations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"time"
|
||||
|
||||
previousgtsmodel "github.com/superseriousbusiness/gotosocial/internal/db/bundb/migrations/20211113114307_init"
|
||||
newgtsmodel "github.com/superseriousbusiness/gotosocial/internal/db/bundb/migrations/20220214175650_media_cleanup"
|
||||
"github.com/uptrace/bun"
|
||||
)
|
||||
|
||||
func init() {
|
||||
const batchSize = 100
|
||||
up := func(ctx context.Context, db *bun.DB) error {
|
||||
// we need to migrate media attachments into a new table
|
||||
// see section 6 here: https://www.sqlite.org/lang_altertable.html
|
||||
|
||||
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
|
||||
// create the new media attachments table
|
||||
if _, err := tx.
|
||||
NewCreateTable().
|
||||
ModelTableExpr("new_media_attachments").
|
||||
Model(&newgtsmodel.MediaAttachment{}).
|
||||
IfNotExists().
|
||||
Exec(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
offset := time.Now()
|
||||
// migrate existing media attachments into new table
|
||||
migrateLoop:
|
||||
for {
|
||||
oldAttachments := []*previousgtsmodel.MediaAttachment{}
|
||||
err := tx.
|
||||
NewSelect().
|
||||
Model(&oldAttachments).
|
||||
// subtract a millisecond from the offset just to make sure we're not getting double entries (this happens sometimes)
|
||||
Where("media_attachment.created_at < ?", offset.Add(-1*time.Millisecond)).
|
||||
Order("media_attachment.created_at DESC").
|
||||
Limit(batchSize).
|
||||
Scan(ctx)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
// there's been a real error
|
||||
return err
|
||||
}
|
||||
|
||||
if err == sql.ErrNoRows || len(oldAttachments) == 0 {
|
||||
// we're finished migrating
|
||||
break migrateLoop
|
||||
}
|
||||
|
||||
// update the offset to the createdAt time of the oldest media attachment in the slice
|
||||
offset = oldAttachments[len(oldAttachments)-1].CreatedAt
|
||||
|
||||
// for every old attachment, we need to make a new attachment out of it by taking the same values
|
||||
newAttachments := []*newgtsmodel.MediaAttachment{}
|
||||
for _, old := range oldAttachments {
|
||||
new := &newgtsmodel.MediaAttachment{
|
||||
ID: old.ID,
|
||||
CreatedAt: old.CreatedAt,
|
||||
UpdatedAt: old.UpdatedAt,
|
||||
StatusID: old.StatusID,
|
||||
URL: old.URL,
|
||||
RemoteURL: old.RemoteURL,
|
||||
Type: newgtsmodel.FileType(old.Type),
|
||||
FileMeta: newgtsmodel.FileMeta{
|
||||
Original: newgtsmodel.Original{
|
||||
Width: old.FileMeta.Original.Width,
|
||||
Height: old.FileMeta.Original.Height,
|
||||
Size: old.FileMeta.Original.Size,
|
||||
Aspect: old.FileMeta.Original.Aspect,
|
||||
},
|
||||
Small: newgtsmodel.Small{
|
||||
Width: old.FileMeta.Small.Width,
|
||||
Height: old.FileMeta.Small.Height,
|
||||
Size: old.FileMeta.Small.Size,
|
||||
Aspect: old.FileMeta.Small.Aspect,
|
||||
},
|
||||
Focus: newgtsmodel.Focus{
|
||||
X: old.FileMeta.Focus.X,
|
||||
Y: old.FileMeta.Focus.Y,
|
||||
},
|
||||
},
|
||||
AccountID: old.AccountID,
|
||||
Description: old.Description,
|
||||
ScheduledStatusID: old.ScheduledStatusID,
|
||||
Blurhash: old.Blurhash,
|
||||
Processing: newgtsmodel.ProcessingStatus(old.Processing),
|
||||
File: newgtsmodel.File{
|
||||
Path: old.File.Path,
|
||||
ContentType: old.File.ContentType,
|
||||
FileSize: old.File.FileSize,
|
||||
UpdatedAt: old.File.UpdatedAt,
|
||||
},
|
||||
Thumbnail: newgtsmodel.Thumbnail{
|
||||
Path: old.Thumbnail.Path,
|
||||
ContentType: old.Thumbnail.ContentType,
|
||||
FileSize: old.Thumbnail.FileSize,
|
||||
UpdatedAt: old.Thumbnail.UpdatedAt,
|
||||
URL: old.Thumbnail.URL,
|
||||
RemoteURL: old.Thumbnail.RemoteURL,
|
||||
},
|
||||
Avatar: old.Avatar,
|
||||
Header: old.Header,
|
||||
Cached: true,
|
||||
}
|
||||
newAttachments = append(newAttachments, new)
|
||||
}
|
||||
|
||||
// insert this batch of new attachments, and then continue the loop
|
||||
if _, err := tx.
|
||||
NewInsert().
|
||||
Model(&newAttachments).
|
||||
ModelTableExpr("new_media_attachments").
|
||||
Exec(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// we have all the data we need from the old table, so we can safely drop it now
|
||||
if _, err := tx.NewDropTable().Model(&previousgtsmodel.MediaAttachment{}).Exec(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// rename the new table to the same name as the old table was
|
||||
if _, err := tx.QueryContext(ctx, "ALTER TABLE new_media_attachments RENAME TO media_attachments;"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// add an index to the new table
|
||||
if _, err := tx.
|
||||
NewCreateIndex().
|
||||
Model(&newgtsmodel.MediaAttachment{}).
|
||||
Index("media_attachments_id_idx").
|
||||
Column("id").
|
||||
Exec(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
down := func(ctx context.Context, db *bun.DB) error {
|
||||
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
if err := Migrations.Register(up, down); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue