mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 10:52:25 -05:00
[chore] Use bulk updates + fewer loops in status rethreading migration (#4459)
This pull request tries to optimize our status rethreading migration by using bulk updates + avoiding unnecessary writes, and doing the migration in one top-level loop and one stragglers loop, without the extra loop to copy thread_id over. On my machine it runs at about 2400 rows per second on Postgres, now, and about 9000 rows per second on SQLite. Tried *many* different ways of doing this, with and without temporary indexes, with different batch and transaction sizes, etc., and this seems to be just about the most performant way of getting stuff done. With the changes, a few minutes have been shaved off migration time testing on my development machine. *Hopefully* this will translate to more time shaved off when running on a vps with slower read/write speed and less processor power. SQLite before: ``` real 20m58,446s user 16m26,635s sys 5m53,648s ``` SQLite after: ``` real 14m25,435s user 12m47,449s sys 2m27,898s ``` Postgres before: ``` real 28m25,307s user 3m40,005s sys 4m45,018s ``` Postgres after: ``` real 22m31,999s user 3m46,674s sys 4m39,592s ``` Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4459 Co-authored-by: tobi <tobi.smethurst@protonmail.com> Co-committed-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
parent
bd1c43d55e
commit
e7cd8bb43e
7 changed files with 429 additions and 271 deletions
|
|
@ -66,98 +66,6 @@ func doWALCheckpoint(ctx context.Context, db *bun.DB) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// batchUpdateByID performs the given updateQuery with updateArgs
|
||||
// over the entire given table, batching by the ID of batchByCol.
|
||||
func batchUpdateByID(
|
||||
ctx context.Context,
|
||||
tx bun.Tx,
|
||||
table string,
|
||||
batchByCol string,
|
||||
updateQuery string,
|
||||
updateArgs []any,
|
||||
) error {
|
||||
// Get a count of all in table.
|
||||
total, err := tx.NewSelect().
|
||||
Table(table).
|
||||
Count(ctx)
|
||||
if err != nil {
|
||||
return gtserror.Newf("error selecting total count: %w", err)
|
||||
}
|
||||
|
||||
// Query batch size
|
||||
// in number of rows.
|
||||
const batchsz = 5000
|
||||
|
||||
// Stores highest batch value
|
||||
// used in iterate queries,
|
||||
// starting at highest possible.
|
||||
highest := id.Highest
|
||||
|
||||
// Total updated rows.
|
||||
var updated int
|
||||
|
||||
for {
|
||||
// Limit to batchsz
|
||||
// items at once.
|
||||
batchQ := tx.
|
||||
NewSelect().
|
||||
Table(table).
|
||||
Column(batchByCol).
|
||||
Where("? < ?", bun.Ident(batchByCol), highest).
|
||||
OrderExpr("? DESC", bun.Ident(batchByCol)).
|
||||
Limit(batchsz)
|
||||
|
||||
// Finalize UPDATE to act only on batch.
|
||||
qStr := updateQuery + " WHERE ? IN (?)"
|
||||
args := append(slices.Clone(updateArgs),
|
||||
bun.Ident(batchByCol),
|
||||
batchQ,
|
||||
)
|
||||
|
||||
// Execute the prepared raw query with arguments.
|
||||
res, err := tx.NewRaw(qStr, args...).Exec(ctx)
|
||||
if err != nil {
|
||||
return gtserror.Newf("error updating old column values: %w", err)
|
||||
}
|
||||
|
||||
// Check how many items we updated.
|
||||
thisUpdated, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return gtserror.Newf("error counting affected rows: %w", err)
|
||||
}
|
||||
|
||||
if thisUpdated == 0 {
|
||||
// Nothing updated
|
||||
// means we're done.
|
||||
break
|
||||
}
|
||||
|
||||
// Update the overall count.
|
||||
updated += int(thisUpdated)
|
||||
|
||||
// Log helpful message to admin.
|
||||
log.Infof(ctx, "migrated %d of %d %s (up to %s)",
|
||||
updated, total, table, highest)
|
||||
|
||||
// Get next highest
|
||||
// id for next batch.
|
||||
if err := tx.
|
||||
NewSelect().
|
||||
With("batch_query", batchQ).
|
||||
ColumnExpr("min(?) FROM ?", bun.Ident(batchByCol), bun.Ident("batch_query")).
|
||||
Scan(ctx, &highest); err != nil {
|
||||
return gtserror.Newf("error selecting next highest: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if total != int(updated) {
|
||||
// Return error here in order to rollback the whole transaction.
|
||||
return fmt.Errorf("total=%d does not match updated=%d", total, updated)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertEnums performs a transaction that converts
|
||||
// a table's column of our old-style enums (strings) to
|
||||
// more performant and space-saving integer types.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue