Compare commits

..

9 commits

Author SHA1 Message Date
tobi
2563568ccc that'll do 2025-09-30 14:54:11 +02:00
tobi
9c544e732c boobs 2025-09-30 14:54:11 +02:00
tobi
1725769733 i'm adjusting the PR, pray i don't adjust it further 2025-09-30 14:54:11 +02:00
tobi
4fd0bdcf2f should be done poking now 2025-09-30 14:54:11 +02:00
tobi
5c000620e2 whoops 2025-09-30 14:54:11 +02:00
tobi
dd3a32acdb few more little tweaks 2025-09-30 14:54:11 +02:00
tobi
365b9efb12 whew 2025-09-30 14:54:11 +02:00
tobi
487292e6f3 remove errant comment 2025-09-30 14:54:11 +02:00
tobi
ae8ac4dd6c finalize indexes etc 2025-09-30 14:54:11 +02:00
2 changed files with 47 additions and 58 deletions

View file

@ -26,7 +26,6 @@ import (
"strings"
"time"
"code.superseriousbusiness.org/gotosocial/internal/db"
newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/new"
oldmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/old"
"code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/util"
@ -47,15 +46,11 @@ func init() {
return gtserror.Newf("error getting bun column def: %w", err)
}
// Update column def to use '${name}_new'.
// Update column def to use temporary
// '${name}_new' while we migrate.
newColDef = strings.Replace(newColDef,
"thread_id", "thread_id_new", 1)
var sr statusRethreader
var updatedTotal int64
var maxID string
var statuses []*oldmodel.Status
// Create thread_id_new already
// so we can populate it as we go.
log.Info(ctx, "creating statuses column thread_id_new")
@ -66,23 +61,28 @@ func init() {
return gtserror.Newf("error adding statuses column thread_id_new: %w", err)
}
// Try to merge the wal so we're
// not working on the wal file.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
// Get a total count of all statuses before migration.
total, err := db.NewSelect().Table("statuses").Count(ctx)
// Get a total count of all
// statuses before migration.
totalStatuses, err := db.
NewSelect().
Table("statuses").
Count(ctx)
if err != nil {
return gtserror.Newf("error getting status table count: %w", err)
}
log.Warnf(ctx, "migrating %d statuses total, this may take a *long* time", totalStatuses)
// Start at largest
var sr statusRethreader
var updatedRowsTotal int64
var statuses []*oldmodel.Status
// Page starting at largest
// possible ULID value.
maxID = id.Highest
log.Warnf(ctx, "rethreading %d statuses, this will take a *long* time", total)
var maxID = id.Highest
// Open initial transaction.
tx, err := db.BeginTx(ctx, nil)
@ -98,19 +98,17 @@ func init() {
batchStart := time.Now()
// Select top-level statuses.
// Select IDs of next
// batch, paging down.
if err := tx.NewSelect().
Model(&statuses).
Column("id").
// We specifically use in_reply_to_account_id instead of in_reply_to_id as
// they should both be set / unset in unison, but we specifically have an
// index on in_reply_to_account_id with ID ordering, unlike in_reply_to_id.
Where("? IS NULL", bun.Ident("in_reply_to_account_id")).
Where("? IS NULL", bun.Ident("in_reply_to_id")).
Where("? < ?", bun.Ident("id"), maxID).
OrderExpr("? DESC", bun.Ident("id")).
Limit(500).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting top level statuses: %w", err)
return gtserror.Newf("error selecting statuses: %w", err)
}
l := len(statuses)
@ -121,9 +119,9 @@ func init() {
// after leaving the loop.
break
} else if i%200 == 0 {
} else if i%100 == 0 {
// Begin a new transaction every
// 200 batches (~100,000 statuses),
// 100 batches (~50000 statuses),
// to avoid massive commits.
// Close existing transaction.
@ -144,29 +142,30 @@ func init() {
}
}
// Set next maxID value from statuses.
maxID = statuses[len(statuses)-1].ID
// Set next maxID
// value from statuses.
maxID = statuses[l-1].ID
// Rethread using the
// open transaction.
var updatedInBatch int64
var updatedRowsThisBatch int64
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status, false)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
updatedInBatch += n
updatedTotal += n
updatedRowsThisBatch += n
updatedRowsTotal += n
}
// Show speed for this batch.
timeTaken := time.Since(batchStart).Milliseconds()
msPerRow := float64(timeTaken) / float64(updatedInBatch)
msPerRow := float64(timeTaken) / float64(updatedRowsThisBatch)
rowsPerMs := float64(1) / float64(msPerRow)
rowsPerSecond := 1000 * rowsPerMs
// Show percent migrated overall.
totalDone := (float64(updatedTotal) / float64(total)) * 100
totalDone := (float64(updatedRowsTotal) / float64(totalStatuses)) * 100
log.Infof(
ctx,
@ -200,38 +199,35 @@ func init() {
batchStart := time.Now()
// Select straggler statuses.
// Get stragglers for which
// we haven't set thread ID yet.
if err := db.NewSelect().
Model(&statuses).
Column("id").
Where("? = ?", bun.Ident("thread_id_new"), id.Lowest).
// We select in smaller batches for this part
// of the migration as there is a chance that
// we may be fetching statuses that might be
// part of the same thread, i.e. one call to
// rethreadStatus() may effect other statuses
// later in the slice.
Limit(250).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting straggler statuses: %w", err)
return gtserror.Newf("error selecting straggler: %w", err)
}
// Reached end of block.
if len(statuses) == 0 {
// No more
// statuses!
break
}
// Rethread each selected batch of straggler statuses in a transaction.
var updatedInBatch int64
// Update this batch
// inside a transaction.
var updatedRowsThisBatch int64
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status, true)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
updatedInBatch += n
updatedTotal += n
updatedRowsThisBatch += n
updatedRowsTotal += n
}
return nil
}); err != nil {
@ -240,12 +236,12 @@ func init() {
// Show speed for this batch.
timeTaken := time.Since(batchStart).Milliseconds()
msPerRow := float64(timeTaken) / float64(updatedInBatch)
msPerRow := float64(timeTaken) / float64(updatedRowsThisBatch)
rowsPerMs := float64(1) / float64(msPerRow)
rowsPerSecond := 1000 * rowsPerMs
// Show percent migrated overall.
totalDone := (float64(updatedTotal) / float64(total)) * 100
totalDone := (float64(updatedRowsTotal) / float64(totalStatuses)) * 100
log.Infof(
ctx,
@ -254,7 +250,7 @@ func init() {
)
}
// Attempt to merge any sqlite write-ahead-log.
// Try to merge everything we've done so far.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
@ -378,13 +374,6 @@ func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, statu
// Ensure the passed status
// has up-to-date information.
// This may have changed from
// the initial batch selection
// to the rethreadStatus() call.
//
// Note: Use a map for this so we
// can also select thread_id_new,
// which is not part of *oldmodel.Status.
upToDateValues := make(map[string]any, 3)
if err := tx.NewSelect().
TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")).
@ -619,7 +608,7 @@ func (sr *statusRethreader) getParents(ctx context.Context, tx bun.Tx) error {
Model(&parent).
Column("id", "in_reply_to_id", "thread_id").
Where("? = ?", bun.Ident("id"), id).
Scan(ctx); err != nil && err != db.ErrNoEntries {
Scan(ctx); err != nil && err != sql.ErrNoRows {
return err
}
@ -658,7 +647,7 @@ func (sr *statusRethreader) getChildren(ctx context.Context, tx bun.Tx, idx int)
Model(&sr.statuses).
Column("id", "thread_id").
Where("? = ?", bun.Ident("in_reply_to_id"), id).
Scan(ctx); err != nil && err != db.ErrNoEntries {
Scan(ctx); err != nil && err != sql.ErrNoRows {
return err
}
@ -697,7 +686,7 @@ func (sr *statusRethreader) getStragglers(ctx context.Context, tx bun.Tx, idx in
bun.Ident("id"),
bun.In(sr.statusIDs),
).
Scan(ctx); err != nil && err != db.ErrNoEntries {
Scan(ctx); err != nil && err != sql.ErrNoRows {
return err
}

View file

@ -269,7 +269,7 @@ ol {
blockquote {
padding: 0.5rem;
border-left: 0.2rem solid $border-accent;
margin-inline: 0;
margin: 0;
font-style: normal;
/*