Compare commits

...

1 commit

Author SHA1 Message Date
tobi
e58f939278 try my darnedest to optimize status threading migration 2025-09-25 16:06:42 +02:00
6 changed files with 270 additions and 306 deletions

View file

@ -32,7 +32,8 @@ These contribution guidelines were adapted from / inspired by those of Gitea (ht
- [CLI Tests](#cli-tests) - [CLI Tests](#cli-tests)
- [Federation](#federation) - [Federation](#federation)
- [Updating Swagger docs](#updating-swagger-docs) - [Updating Swagger docs](#updating-swagger-docs)
- [CI/CD configuration](#ci-cd-configuration) - [Other Useful Stuff](#other-useful-stuff)
- [Running migrations on a Postgres DB backup locally](#running-migrations-on-a-postgres-db-backup-locally)
## Introduction ## Introduction
@ -525,3 +526,40 @@ The `woodpecker` pipeline files are in the `.woodpecker` directory of this repos
The Woodpecker instance for GoToSocial is [here](https://woodpecker.superseriousbusiness.org/repos/2). The Woodpecker instance for GoToSocial is [here](https://woodpecker.superseriousbusiness.org/repos/2).
Documentation for Woodpecker is [here](https://woodpecker-ci.org/docs/intro). Documentation for Woodpecker is [here](https://woodpecker-ci.org/docs/intro).
## Other Useful Stuff
Various bits and bobs.
### Running migrations on a Postgres DB backup locally
It may be useful when testing or debugging migrations to be able to run them against a copy of a real instance's Postgres database locally.
Basic steps for this:
1. Dump the Postgres database on the remote machine, and copy the dump over to your development machine.
2. Create a local Postgres container and mount the dump into it with, for example:
```bash
docker run -it --name postgres --network host -e POSTGRES_PASSWORD=postgres -v /path/to/db_dump:/db_dump postgres
```
3. Get a terminal inside the running container:
```bash
docker exec -it --user postgres postgres bash
```
4. Using that terminal, restore the dump (this will probably take a little while depending on the dump size and the specs of your machine):
```bash
psql -X postgres < /db_dump
```
5. With the Postgres container still running, run GoToSocial and point it towards the container. Use the appropriate `GTS_HOST` (and `GTS_ACCOUNT_DOMAIN`) values for the instance you dumped:
```bash
GTS_HOST=example.org \
GTS_DB_TYPE=postgres \
GTS_DB_POSTGRES_CONNECTION_STRING=postgres://postgres:postgres@localhost:5432/postgres \
./gotosocial migrations run
```
When you're done messing around, don't forget to remove any containers that you started up, and remove any lingering volumes with `docker volume prune`, else you might end up filling your disk with unused temporary volumes.

View file

@ -28,9 +28,11 @@ import (
"code.superseriousbusiness.org/gotosocial/internal/db" "code.superseriousbusiness.org/gotosocial/internal/db"
newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/new" newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/new"
oldmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/old" oldmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/old"
"code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/util"
"code.superseriousbusiness.org/gotosocial/internal/gtserror" "code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/id" "code.superseriousbusiness.org/gotosocial/internal/id"
"code.superseriousbusiness.org/gotosocial/internal/log" "code.superseriousbusiness.org/gotosocial/internal/log"
"code.superseriousbusiness.org/gotosocial/internal/util/xslices"
"github.com/uptrace/bun" "github.com/uptrace/bun"
) )
@ -44,13 +46,36 @@ func init() {
return gtserror.Newf("error getting bun column def: %w", err) return gtserror.Newf("error getting bun column def: %w", err)
} }
// Update column def to use '${name}_new'. // Update column def to use temporary
// '${name}_new' while we migrate.
newColDef = strings.Replace(newColDef, newColDef = strings.Replace(newColDef,
"thread_id", "thread_id_new", 1) "thread_id", "thread_id_new", 1)
// Create thread_id_new already
// so we can populate it as we go.
log.Info(ctx, "creating statuses column thread_id_new")
if _, err := db.NewAddColumn().
Table("statuses").
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding statuses column thread_id_new: %w", err)
}
// Create an index on thread_id_new so
// we can keep track of it as we update.
//
// We'll remove this at the end of the migration.
log.Info(ctx, "creating temporary thread_id_new index")
if _, err := db.NewCreateIndex().
Table("statuses").
Index("statuses_thread_id_new_idx").
Column("thread_id_new").
Exec(ctx); err != nil {
return gtserror.Newf("error creating temporary thread_id_new index: %w", err)
}
var sr statusRethreader var sr statusRethreader
var count int var updatedRows int64
var maxID string
var statuses []*oldmodel.Status var statuses []*oldmodel.Status
// Get a total count of all statuses before migration. // Get a total count of all statuses before migration.
@ -59,51 +84,41 @@ func init() {
return gtserror.Newf("error getting status table count: %w", err) return gtserror.Newf("error getting status table count: %w", err)
} }
// Start at largest log.Warnf(ctx, "migrating %d statuses, this may take a *long* time, and the first few queries will likely be slower than the rest", total)
// possible ULID value. for {
maxID = id.Highest
log.Warn(ctx, "rethreading top-level statuses, this will take a *long* time")
for /* TOP LEVEL STATUS LOOP */ {
// Reset slice. // Reset slice.
clear(statuses) clear(statuses)
statuses = statuses[:0] statuses = statuses[:0]
// Select top-level statuses. // Select IDs of next batch, choosing
// only statuses we haven't migrated yet.
//
// Let the database give us these in whatever order
// it likes, as it's faster than doing an ORDER BY.
if err := db.NewSelect(). if err := db.NewSelect().
Model(&statuses). Model(&statuses).
Column("id", "thread_id"). Column("id").
Where("? = ?", bun.Ident("thread_id_new"), id.Lowest).
// We specifically use in_reply_to_account_id instead of in_reply_to_id as Limit(250).
// they should both be set / unset in unison, but we specifically have an
// index on in_reply_to_account_id with ID ordering, unlike in_reply_to_id.
Where("? IS NULL", bun.Ident("in_reply_to_account_id")).
Where("? < ?", bun.Ident("id"), maxID).
OrderExpr("? DESC", bun.Ident("id")).
Limit(5000).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) { Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting top level statuses: %w", err) return gtserror.Newf("error selecting unthreaded statuses: %w", err)
} }
// Reached end of block. // No more statuses!
if len(statuses) == 0 { if len(statuses) == 0 {
log.Info(ctx, "done migrating statuses!")
break break
} }
// Set next maxID value from statuses. // Rethread each selected status in a transaction.
maxID = statuses[len(statuses)-1].ID
// Rethread each selected batch of top-level statuses in a transaction.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Rethread each top-level status.
for _, status := range statuses { for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status) n, err := sr.rethreadStatus(ctx, tx, status)
if err != nil { if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err) return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
} }
count += n updatedRows += n
} }
return nil return nil
@ -111,7 +126,24 @@ func init() {
return err return err
} }
log.Infof(ctx, "[approx %d of %d] rethreading statuses (top-level)", count, total) // Show percent migrated.
//
// Will maybe end up wonky due to approximations
// and batching, so stop showing it after 99%.
percentDone := (float64(updatedRows) / float64(total)) * 100
if percentDone <= 99 {
log.Infof(
ctx,
"[updated %d rows] migrated approx. %.2f%% of statuses",
updatedRows, percentDone,
)
} else {
log.Infof(
ctx,
"[updated %d rows] almost done migrating... ",
updatedRows,
)
}
} }
// Attempt to merge any sqlite write-ahead-log. // Attempt to merge any sqlite write-ahead-log.
@ -119,58 +151,11 @@ func init() {
return err return err
} }
log.Warn(ctx, "rethreading straggler statuses, this will take a *long* time") log.Info(ctx, "dropping temporary thread_id_new index")
for /* STRAGGLER STATUS LOOP */ { if _, err := db.NewDropIndex().
Index("statuses_thread_id_new_idx").
// Reset slice. Exec(ctx); err != nil {
clear(statuses) return gtserror.Newf("error dropping temporary thread_id_new index: %w", err)
statuses = statuses[:0]
// Select straggler statuses.
if err := db.NewSelect().
Model(&statuses).
Column("id", "in_reply_to_id", "thread_id").
Where("? IS NULL", bun.Ident("thread_id")).
// We select in smaller batches for this part
// of the migration as there is a chance that
// we may be fetching statuses that might be
// part of the same thread, i.e. one call to
// rethreadStatus() may effect other statuses
// later in the slice.
Limit(1000).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting straggler statuses: %w", err)
}
// Reached end of block.
if len(statuses) == 0 {
break
}
// Rethread each selected batch of straggler statuses in a transaction.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Rethread each top-level status.
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
count += n
}
return nil
}); err != nil {
return err
}
log.Infof(ctx, "[approx %d of %d] rethreading statuses (stragglers)", count, total)
}
// Attempt to merge any sqlite write-ahead-log.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
} }
log.Info(ctx, "dropping old thread_to_statuses table") log.Info(ctx, "dropping old thread_to_statuses table")
@ -180,33 +165,6 @@ func init() {
return gtserror.Newf("error dropping old thread_to_statuses table: %w", err) return gtserror.Newf("error dropping old thread_to_statuses table: %w", err)
} }
log.Info(ctx, "creating new statuses thread_id column")
if _, err := db.NewAddColumn().
Table("statuses").
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding new thread_id column: %w", err)
}
log.Info(ctx, "setting thread_id_new = thread_id (this may take a while...)")
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return batchUpdateByID(ctx, tx,
"statuses", // table
"id", // batchByCol
"UPDATE ? SET ? = ?", // updateQuery
[]any{bun.Ident("statuses"),
bun.Ident("thread_id_new"),
bun.Ident("thread_id")},
)
}); err != nil {
return err
}
// Attempt to merge any sqlite write-ahead-log.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
log.Info(ctx, "dropping old statuses thread_id index") log.Info(ctx, "dropping old statuses thread_id index")
if _, err := db.NewDropIndex(). if _, err := db.NewDropIndex().
Index("statuses_thread_id_idx"). Index("statuses_thread_id_idx").
@ -289,8 +247,8 @@ type statusRethreader struct {
} }
// rethreadStatus is the main logic handler for statusRethreader{}. this is what gets called from the migration // rethreadStatus is the main logic handler for statusRethreader{}. this is what gets called from the migration
// in order to trigger a status rethreading operation for the given status, returning total number rethreaded. // in order to trigger a status rethreading operation for the given status, returning total number of rows changed.
func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, status *oldmodel.Status) (int, error) { func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, status *oldmodel.Status) (int64, error) {
// Zero slice and // Zero slice and
// map ptr values. // map ptr values.
@ -346,6 +304,8 @@ func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, statu
return 0, gtserror.Newf("error getting children: %w", err) return 0, gtserror.Newf("error getting children: %w", err)
} }
// Dedupe thread IDs.
// Check for newly picked-up threads // Check for newly picked-up threads
// to find stragglers for below. Else // to find stragglers for below. Else
// we've reached end of what we can do. // we've reached end of what we can do.
@ -371,10 +331,6 @@ func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, statu
threadIdx = len(sr.threadIDs) threadIdx = len(sr.threadIDs)
} }
// Total number of
// statuses threaded.
total := len(sr.statusIDs)
// Check for the case where the entire // Check for the case where the entire
// batch of statuses is already correctly // batch of statuses is already correctly
// threaded. Then we have nothing to do! // threaded. Then we have nothing to do!
@ -417,29 +373,61 @@ func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, statu
} }
} }
// Update all the statuses to // Use a bulk update to update all the
// use determined thread_id. // statuses to use determined thread_id.
if _, err := tx.NewUpdate(). //
Table("statuses"). // https://bun.uptrace.dev/guide/query-update.html#bulk-update
Where("? IN (?)", bun.Ident("id"), bun.In(sr.statusIDs)). values := make([]*util.Status, 0, len(sr.statusIDs))
Set("? = ?", bun.Ident("thread_id"), threadID). for _, statusID := range sr.statusIDs {
Exec(ctx); err != nil { values = append(values, &util.Status{
ID: statusID,
ThreadIDNew: threadID,
})
}
res, err := tx.NewUpdate().
With("_data", tx.NewValues(&values)).
Model((*util.Status)(nil)).
TableExpr("_data").
// Set the new thread ID, which we can use as
// an indication that we've migrated this batch.
Set("? = ?", bun.Ident("thread_id_new"), bun.Ident("_data.thread_id_new")).
// While we're here, also set old thread_id, as
// we'll use it for further rethreading purposes.
Set("? = ?", bun.Ident("thread_id"), bun.Ident("_data.thread_id_new")).
// "Join" on status ID.
Where("? = ?", bun.Ident("status.id"), bun.Ident("_data.id")).
// To avoid spurious writes,
// only update unmigrated statuses.
Where("? = ?", bun.Ident("status.thread_id_new"), id.Lowest).
Exec(ctx)
if err != nil {
return 0, gtserror.Newf("error updating status thread ids: %w", err) return 0, gtserror.Newf("error updating status thread ids: %w", err)
} }
rowsAffected, err := res.RowsAffected()
if err != nil {
return 0, gtserror.Newf("error counting rows affected: %w", err)
}
if len(sr.threadIDs) > 0 { if len(sr.threadIDs) > 0 {
// Update any existing thread // Update any existing thread
// mutes to use latest thread_id. // mutes to use latest thread_id.
// Dedupe thread IDs before query
// to avoid ludicrous "IN" clause.
threadIDs := sr.threadIDs
threadIDs = xslices.Deduplicate(threadIDs)
if _, err := tx.NewUpdate(). if _, err := tx.NewUpdate().
Table("thread_mutes"). Table("thread_mutes").
Where("? IN (?)", bun.Ident("thread_id"), bun.In(sr.threadIDs)). Where("? IN (?)", bun.Ident("thread_id"), bun.In(threadIDs)).
Set("? = ?", bun.Ident("thread_id"), threadID). Set("? = ?", bun.Ident("thread_id"), threadID).
Exec(ctx); err != nil { Exec(ctx); err != nil {
return 0, gtserror.Newf("error updating mute thread ids: %w", err) return 0, gtserror.Newf("error updating mute thread ids: %w", err)
} }
} }
return total, nil return rowsAffected, nil
} }
// append will append the given status to the internal tracking of statusRethreader{} for // append will append the given status to the internal tracking of statusRethreader{} for
@ -560,6 +548,11 @@ func (sr *statusRethreader) getStragglers(ctx context.Context, tx bun.Tx, idx in
clear(sr.statuses) clear(sr.statuses)
sr.statuses = sr.statuses[:0] sr.statuses = sr.statuses[:0]
// Dedupe thread IDs before query
// to avoid ludicrous "IN" clause.
threadIDs := sr.threadIDs[idx:]
threadIDs = xslices.Deduplicate(threadIDs)
// Select stragglers that // Select stragglers that
// also have thread IDs. // also have thread IDs.
if err := tx.NewSelect(). if err := tx.NewSelect().
@ -567,7 +560,7 @@ func (sr *statusRethreader) getStragglers(ctx context.Context, tx bun.Tx, idx in
Column("id", "thread_id", "in_reply_to_id"). Column("id", "thread_id", "in_reply_to_id").
Where("? IN (?) AND ? NOT IN (?)", Where("? IN (?) AND ? NOT IN (?)",
bun.Ident("thread_id"), bun.Ident("thread_id"),
bun.In(sr.threadIDs[idx:]), bun.In(threadIDs),
bun.Ident("id"), bun.Ident("id"),
bun.In(sr.statusIDs), bun.In(sr.statusIDs),
). ).

View file

@ -23,45 +23,45 @@ import (
// Status represents a user-created 'post' or 'status' in the database, either remote or local // Status represents a user-created 'post' or 'status' in the database, either remote or local
type Status struct { type Status struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set) EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched. FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time. PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status. Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account? Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status? AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes. BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
BoostOf *Status `bun:"-"` // status that corresponds to boostOfID BoostOf *Status `bun:"-"` // status that corresponds to boostOfID
ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:00000000000000000000000000"` // id of the thread to which this status belongs ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:'00000000000000000000000000'"` // id of the thread to which this status belongs
EditIDs []string `bun:"edits,array"` // EditIDs []string `bun:"edits,array"` //
PollID string `bun:"type:CHAR(26),nullzero"` // PollID string `bun:"type:CHAR(26),nullzero"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status. ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive? Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in? Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status? CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!. ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s) Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed. PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB. PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to. ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to.
} }
// enumType is the type we (at least, should) use // enumType is the type we (at least, should) use

View file

@ -0,0 +1,25 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package util
// Status is a helper type specifically
// for updating the thread ID of a status.
type Status struct {
ID string `bun:"type:CHAR(26)"`
ThreadIDNew string `bun:"type:CHAR(26)"`
}

View file

@ -66,98 +66,6 @@ func doWALCheckpoint(ctx context.Context, db *bun.DB) error {
return nil return nil
} }
// batchUpdateByID performs the given updateQuery with updateArgs
// over the entire given table, batching by the ID of batchByCol.
func batchUpdateByID(
ctx context.Context,
tx bun.Tx,
table string,
batchByCol string,
updateQuery string,
updateArgs []any,
) error {
// Get a count of all in table.
total, err := tx.NewSelect().
Table(table).
Count(ctx)
if err != nil {
return gtserror.Newf("error selecting total count: %w", err)
}
// Query batch size
// in number of rows.
const batchsz = 5000
// Stores highest batch value
// used in iterate queries,
// starting at highest possible.
highest := id.Highest
// Total updated rows.
var updated int
for {
// Limit to batchsz
// items at once.
batchQ := tx.
NewSelect().
Table(table).
Column(batchByCol).
Where("? < ?", bun.Ident(batchByCol), highest).
OrderExpr("? DESC", bun.Ident(batchByCol)).
Limit(batchsz)
// Finalize UPDATE to act only on batch.
qStr := updateQuery + " WHERE ? IN (?)"
args := append(slices.Clone(updateArgs),
bun.Ident(batchByCol),
batchQ,
)
// Execute the prepared raw query with arguments.
res, err := tx.NewRaw(qStr, args...).Exec(ctx)
if err != nil {
return gtserror.Newf("error updating old column values: %w", err)
}
// Check how many items we updated.
thisUpdated, err := res.RowsAffected()
if err != nil {
return gtserror.Newf("error counting affected rows: %w", err)
}
if thisUpdated == 0 {
// Nothing updated
// means we're done.
break
}
// Update the overall count.
updated += int(thisUpdated)
// Log helpful message to admin.
log.Infof(ctx, "migrated %d of %d %s (up to %s)",
updated, total, table, highest)
// Get next highest
// id for next batch.
if err := tx.
NewSelect().
With("batch_query", batchQ).
ColumnExpr("min(?) FROM ?", bun.Ident(batchByCol), bun.Ident("batch_query")).
Scan(ctx, &highest); err != nil {
return gtserror.Newf("error selecting next highest: %w", err)
}
}
if total != int(updated) {
// Return error here in order to rollback the whole transaction.
return fmt.Errorf("total=%d does not match updated=%d", total, updated)
}
return nil
}
// convertEnums performs a transaction that converts // convertEnums performs a transaction that converts
// a table's column of our old-style enums (strings) to // a table's column of our old-style enums (strings) to
// more performant and space-saving integer types. // more performant and space-saving integer types.

View file

@ -27,56 +27,56 @@ import (
// Status represents a user-created 'post' or 'status' in the database, either remote or local // Status represents a user-created 'post' or 'status' in the database, either remote or local
type Status struct { type Status struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set) EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched. FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time. PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status. Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
Attachments []*MediaAttachment `bun:"attached_media,rel:has-many"` // Attachments corresponding to attachmentIDs Attachments []*MediaAttachment `bun:"attached_media,rel:has-many"` // Attachments corresponding to attachmentIDs
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
Tags []*Tag `bun:"attached_tags,m2m:status_to_tags"` // Tags corresponding to tagIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation Tags []*Tag `bun:"attached_tags,m2m:status_to_tags"` // Tags corresponding to tagIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
Mentions []*Mention `bun:"attached_mentions,rel:has-many"` // Mentions corresponding to mentionIDs Mentions []*Mention `bun:"attached_mentions,rel:has-many"` // Mentions corresponding to mentionIDs
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Emojis []*Emoji `bun:"attached_emojis,m2m:status_to_emojis"` // Emojis corresponding to emojiIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation Emojis []*Emoji `bun:"attached_emojis,m2m:status_to_emojis"` // Emojis corresponding to emojiIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account? Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status? AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
Account *Account `bun:"rel:belongs-to"` // account corresponding to accountID Account *Account `bun:"rel:belongs-to"` // account corresponding to accountID
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID
InReplyToAccount *Account `bun:"rel:belongs-to"` // account corresponding to inReplyToAccountID InReplyToAccount *Account `bun:"rel:belongs-to"` // account corresponding to inReplyToAccountID
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes. BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
BoostOf *Status `bun:"-"` // status that corresponds to boostOfID BoostOf *Status `bun:"-"` // status that corresponds to boostOfID
BoostOfAccount *Account `bun:"rel:belongs-to"` // account that corresponds to boostOfAccountID BoostOfAccount *Account `bun:"rel:belongs-to"` // account that corresponds to boostOfAccountID
ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:00000000000000000000000000"` // id of the thread to which this status belongs ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:'00000000000000000000000000'"` // id of the thread to which this status belongs
EditIDs []string `bun:"edits,array"` // IDs of status edits for this status, ordered from smallest (oldest) -> largest (newest) ID. EditIDs []string `bun:"edits,array"` // IDs of status edits for this status, ordered from smallest (oldest) -> largest (newest) ID.
Edits []*StatusEdit `bun:"-"` // Edits of this status, ordered from oldest -> newest edit. Edits []*StatusEdit `bun:"-"` // Edits of this status, ordered from oldest -> newest edit.
PollID string `bun:"type:CHAR(26),nullzero"` // PollID string `bun:"type:CHAR(26),nullzero"` //
Poll *Poll `bun:"-"` // Poll *Poll `bun:"-"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status. ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive? Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in? Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status? CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
CreatedWithApplication *Application `bun:"rel:belongs-to"` // application corresponding to createdWithApplicationID CreatedWithApplication *Application `bun:"rel:belongs-to"` // application corresponding to createdWithApplicationID
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!. ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s) Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
InteractionPolicy *InteractionPolicy `bun:""` // InteractionPolicy for this status. If null then the default InteractionPolicy should be assumed for this status's Visibility. Always null for boost wrappers. InteractionPolicy *InteractionPolicy `bun:""` // InteractionPolicy for this status. If null then the default InteractionPolicy should be assumed for this status's Visibility. Always null for boost wrappers.
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed. PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB. PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of *either* an Accept Activity, or a ReplyAuthorization or AnnounceAuthorization, which approves the Announce, Create or interaction request Activity that this status was/will be attached to. ApprovedByURI string `bun:",nullzero"` // URI of *either* an Accept Activity, or a ReplyAuthorization or AnnounceAuthorization, which approves the Announce, Create or interaction request Activity that this status was/will be attached to.
} }
// GetID implements timeline.Timelineable{}. // GetID implements timeline.Timelineable{}.