Compare commits

..

1 commit

Author SHA1 Message Date
tobi
e58f939278 try my darnedest to optimize status threading migration 2025-09-25 16:06:42 +02:00
2 changed files with 41 additions and 35 deletions

View file

@ -32,7 +32,6 @@ These contribution guidelines were adapted from / inspired by those of Gitea (ht
- [CLI Tests](#cli-tests)
- [Federation](#federation)
- [Updating Swagger docs](#updating-swagger-docs)
- [CI/CD configuration](#ci-cd-configuration)
- [Other Useful Stuff](#other-useful-stuff)
- [Running migrations on a Postgres DB backup locally](#running-migrations-on-a-postgres-db-backup-locally)

View file

@ -24,7 +24,6 @@ import (
"reflect"
"slices"
"strings"
"time"
"code.superseriousbusiness.org/gotosocial/internal/db"
newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/new"
@ -62,9 +61,21 @@ func init() {
return gtserror.Newf("error adding statuses column thread_id_new: %w", err)
}
// Create an index on thread_id_new so
// we can keep track of it as we update.
//
// We'll remove this at the end of the migration.
log.Info(ctx, "creating temporary thread_id_new index")
if _, err := db.NewCreateIndex().
Table("statuses").
Index("statuses_thread_id_new_idx").
Column("thread_id_new").
Exec(ctx); err != nil {
return gtserror.Newf("error creating temporary thread_id_new index: %w", err)
}
var sr statusRethreader
var updatedRowsTotal int64
var maxID string
var updatedRows int64
var statuses []*oldmodel.Status
// Get a total count of all statuses before migration.
@ -73,50 +84,41 @@ func init() {
return gtserror.Newf("error getting status table count: %w", err)
}
// Start at largest
// possible ULID value.
maxID = id.Highest
log.Warnf(ctx, "migrating %d statuses, this may take a *long* time", total)
log.Warnf(ctx, "migrating %d statuses, this may take a *long* time, and the first few queries will likely be slower than the rest", total)
for {
start := time.Now()
// Reset slice.
clear(statuses)
statuses = statuses[:0]
// Select IDs of next
// batch, paging down.
// Select IDs of next batch, choosing
// only statuses we haven't migrated yet.
//
// Let the database give us these in whatever order
// it likes, as it's faster than doing an ORDER BY.
if err := db.NewSelect().
Model(&statuses).
Column("id").
Where("? < ?", bun.Ident("id"), maxID).
OrderExpr("? DESC", bun.Ident("id")).
Where("? = ?", bun.Ident("thread_id_new"), id.Lowest).
Limit(250).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting unthreaded statuses: %w", err)
}
// No more statuses!
l := len(statuses)
if l == 0 {
if len(statuses) == 0 {
log.Info(ctx, "done migrating statuses!")
break
}
// Set next maxID value from statuses.
maxID = statuses[l-1].ID
// Rethread each selected status in a transaction.
var updatedRowsThisBatch int64
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
updatedRowsThisBatch += n
updatedRowsTotal += n
updatedRows += n
}
return nil
@ -124,26 +126,22 @@ func init() {
return err
}
// Show current speed + percent migrated.
// Show percent migrated.
//
// Percent may end up wonky due to approximations
// and batching, so show a generic message at 100%.
timeTaken := time.Since(start).Milliseconds()
msPerRow := float64(timeTaken) / float64(updatedRowsThisBatch)
rowsPerMs := float64(1) / float64(msPerRow)
rowsPerSecond := 1000 * rowsPerMs
percentDone := (float64(updatedRowsTotal) / float64(total)) * 100
if percentDone <= 100 {
// Will maybe end up wonky due to approximations
// and batching, so stop showing it after 99%.
percentDone := (float64(updatedRows) / float64(total)) * 100
if percentDone <= 99 {
log.Infof(
ctx,
"[updated %d total rows, now @ ~%.0f rows/s] done ~%.2f%% of statuses",
updatedRowsTotal, rowsPerSecond, percentDone,
"[updated %d rows] migrated approx. %.2f%% of statuses",
updatedRows, percentDone,
)
} else {
log.Infof(
ctx,
"[updated %d total rows, now @ ~%.0f rows/s] almost done... ",
updatedRowsTotal, rowsPerSecond,
"[updated %d rows] almost done migrating... ",
updatedRows,
)
}
}
@ -153,6 +151,13 @@ func init() {
return err
}
log.Info(ctx, "dropping temporary thread_id_new index")
if _, err := db.NewDropIndex().
Index("statuses_thread_id_new_idx").
Exec(ctx); err != nil {
return gtserror.Newf("error dropping temporary thread_id_new index: %w", err)
}
log.Info(ctx, "dropping old thread_to_statuses table")
if _, err := db.NewDropTable().
Table("thread_to_statuses").
@ -299,6 +304,8 @@ func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, statu
return 0, gtserror.Newf("error getting children: %w", err)
}
// Dedupe thread IDs.
// Check for newly picked-up threads
// to find stragglers for below. Else
// we've reached end of what we can do.