Merge branch 'main' into interaction_policies_forward_compat

This commit is contained in:
tobi 2025-05-28 11:37:57 +02:00
commit 82780b1a89
207 changed files with 9302 additions and 2275 deletions

View file

@ -280,6 +280,6 @@ type ThrottlingConfig struct {
}
type ScraperDeterrenceConfig struct {
Enabled bool `name:"enabled" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
Difficulty uint8 `name:"difficulty" usage:"The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions."`
Enabled bool `name:"enabled" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
Difficulty uint32 `name:"difficulty" usage:"The proof-of-work difficulty, which determines roughly how many hash-encode rounds required of each client."`
}

View file

@ -149,7 +149,7 @@ var Defaults = Configuration{
ScraperDeterrence: ScraperDeterrenceConfig{
Enabled: false,
Difficulty: 4,
Difficulty: 100000,
},
},

View file

@ -144,7 +144,7 @@ func (cfg *Configuration) RegisterFlags(flags *pflag.FlagSet) {
flags.Int("advanced-throttling-multiplier", cfg.Advanced.Throttling.Multiplier, "Multiplier to use per cpu for http request throttling. 0 or less turns throttling off.")
flags.Duration("advanced-throttling-retry-after", cfg.Advanced.Throttling.RetryAfter, "Retry-After duration response to send for throttled requests.")
flags.Bool("advanced-scraper-deterrence-enabled", cfg.Advanced.ScraperDeterrence.Enabled, "Enable proof-of-work based scraper deterrence on profile / status pages")
flags.Uint8("advanced-scraper-deterrence-difficulty", cfg.Advanced.ScraperDeterrence.Difficulty, "The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions.")
flags.Uint32("advanced-scraper-deterrence-difficulty", cfg.Advanced.ScraperDeterrence.Difficulty, "The proof-of-work difficulty, which determines how many leading zeros to try solve in hash solutions.")
flags.StringSlice("http-client-allow-ips", cfg.HTTPClient.AllowIPs, "")
flags.StringSlice("http-client-block-ips", cfg.HTTPClient.BlockIPs, "")
flags.Duration("http-client-timeout", cfg.HTTPClient.Timeout, "")
@ -1356,9 +1356,9 @@ func (cfg *Configuration) UnmarshalMap(cfgmap map[string]any) error {
if ival, ok := cfgmap["advanced-scraper-deterrence-difficulty"]; ok {
var err error
cfg.Advanced.ScraperDeterrence.Difficulty, err = cast.ToUint8E(ival)
cfg.Advanced.ScraperDeterrence.Difficulty, err = cast.ToUint32E(ival)
if err != nil {
return fmt.Errorf("error casting %#v -> uint8 for 'advanced-scraper-deterrence-difficulty': %w", ival, err)
return fmt.Errorf("error casting %#v -> uint32 for 'advanced-scraper-deterrence-difficulty': %w", ival, err)
}
}
@ -4799,7 +4799,7 @@ func AdvancedScraperDeterrenceDifficultyFlag() string {
}
// GetAdvancedScraperDeterrenceDifficulty safely fetches the Configuration value for state's 'Advanced.ScraperDeterrence.Difficulty' field
func (st *ConfigState) GetAdvancedScraperDeterrenceDifficulty() (v uint8) {
func (st *ConfigState) GetAdvancedScraperDeterrenceDifficulty() (v uint32) {
st.mutex.RLock()
v = st.config.Advanced.ScraperDeterrence.Difficulty
st.mutex.RUnlock()
@ -4807,7 +4807,7 @@ func (st *ConfigState) GetAdvancedScraperDeterrenceDifficulty() (v uint8) {
}
// SetAdvancedScraperDeterrenceDifficulty safely sets the Configuration value for state's 'Advanced.ScraperDeterrence.Difficulty' field
func (st *ConfigState) SetAdvancedScraperDeterrenceDifficulty(v uint8) {
func (st *ConfigState) SetAdvancedScraperDeterrenceDifficulty(v uint32) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.Advanced.ScraperDeterrence.Difficulty = v
@ -4815,12 +4815,12 @@ func (st *ConfigState) SetAdvancedScraperDeterrenceDifficulty(v uint8) {
}
// GetAdvancedScraperDeterrenceDifficulty safely fetches the value for global configuration 'Advanced.ScraperDeterrence.Difficulty' field
func GetAdvancedScraperDeterrenceDifficulty() uint8 {
func GetAdvancedScraperDeterrenceDifficulty() uint32 {
return global.GetAdvancedScraperDeterrenceDifficulty()
}
// SetAdvancedScraperDeterrenceDifficulty safely sets the value for global configuration 'Advanced.ScraperDeterrence.Difficulty' field
func SetAdvancedScraperDeterrenceDifficulty(v uint8) {
func SetAdvancedScraperDeterrenceDifficulty(v uint32) {
global.SetAdvancedScraperDeterrenceDifficulty(v)
}

View file

@ -336,7 +336,6 @@ func bunDB(sqldb *sql.DB, dialect func() schema.Dialect) *bun.DB {
&gtsmodel.ConversationToStatus{},
&gtsmodel.StatusToEmoji{},
&gtsmodel.StatusToTag{},
&gtsmodel.ThreadToStatus{},
} {
db.RegisterModel(t)
}

View file

@ -21,7 +21,7 @@ import (
"context"
"strings"
gtsmodel "code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
gtsmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20231016113235_mute_status_thread"
"code.superseriousbusiness.org/gotosocial/internal/log"
"github.com/uptrace/bun"
"github.com/uptrace/bun/dialect"

View file

@ -0,0 +1,32 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
// Thread represents one thread of statuses.
// TODO: add more fields here if necessary.
type Thread struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
StatusIDs []string `bun:"-"` // ids of statuses belonging to this thread (order not guaranteed)
}
// ThreadToStatus is an intermediate struct to facilitate the
// many2many relationship between a thread and one or more statuses.
type ThreadToStatus struct {
ThreadID string `bun:"type:CHAR(26),unique:statusthread,nullzero,notnull"`
StatusID string `bun:"type:CHAR(26),unique:statusthread,nullzero,notnull"`
}

View file

@ -0,0 +1,29 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
import "time"
// ThreadMute represents an account-level mute of a thread of statuses.
type ThreadMute struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
ThreadID string `bun:"type:CHAR(26),nullzero,notnull,unique:thread_mute_thread_id_account_id"` // ID of the muted thread
AccountID string `bun:"type:CHAR(26),nullzero,notnull,unique:thread_mute_thread_id_account_id"` // Account ID of the creator of this mute
}

View file

@ -0,0 +1,584 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package migrations
import (
"context"
"database/sql"
"errors"
"reflect"
"slices"
"strings"
"code.superseriousbusiness.org/gotosocial/internal/db"
newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/new"
oldmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250415111056_thread_all_statuses/old"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/id"
"code.superseriousbusiness.org/gotosocial/internal/log"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
newType := reflect.TypeOf(&newmodel.Status{})
// Get the new column definition with not-null thread_id.
newColDef, err := getBunColumnDef(db, newType, "ThreadID")
if err != nil {
return gtserror.Newf("error getting bun column def: %w", err)
}
// Update column def to use '${name}_new'.
newColDef = strings.Replace(newColDef,
"thread_id", "thread_id_new", 1)
var sr statusRethreader
var count int
var maxID string
var statuses []*oldmodel.Status
// Get a total count of all statuses before migration.
total, err := db.NewSelect().Table("statuses").Count(ctx)
if err != nil {
return gtserror.Newf("error getting status table count: %w", err)
}
// Start at largest
// possible ULID value.
maxID = id.Highest
log.Warn(ctx, "rethreading top-level statuses, this will take a *long* time")
for /* TOP LEVEL STATUS LOOP */ {
// Reset slice.
clear(statuses)
statuses = statuses[:0]
// Select top-level statuses.
if err := db.NewSelect().
Model(&statuses).
Column("id", "thread_id").
// We specifically use in_reply_to_account_id instead of in_reply_to_id as
// they should both be set / unset in unison, but we specifically have an
// index on in_reply_to_account_id with ID ordering, unlike in_reply_to_id.
Where("? IS NULL", bun.Ident("in_reply_to_account_id")).
Where("? < ?", bun.Ident("id"), maxID).
OrderExpr("? DESC", bun.Ident("id")).
Limit(5000).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting top level statuses: %w", err)
}
// Reached end of block.
if len(statuses) == 0 {
break
}
// Set next maxID value from statuses.
maxID = statuses[len(statuses)-1].ID
// Rethread each selected batch of top-level statuses in a transaction.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Rethread each top-level status.
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
count += n
}
return nil
}); err != nil {
return err
}
log.Infof(ctx, "[approx %d of %d] rethreading statuses (top-level)", count, total)
}
// Attempt to merge any sqlite write-ahead-log.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
log.Warn(ctx, "rethreading straggler statuses, this will take a *long* time")
for /* STRAGGLER STATUS LOOP */ {
// Reset slice.
clear(statuses)
statuses = statuses[:0]
// Select straggler statuses.
if err := db.NewSelect().
Model(&statuses).
Column("id", "in_reply_to_id", "thread_id").
Where("? IS NULL", bun.Ident("thread_id")).
// We select in smaller batches for this part
// of the migration as there is a chance that
// we may be fetching statuses that might be
// part of the same thread, i.e. one call to
// rethreadStatus() may effect other statuses
// later in the slice.
Limit(1000).
Scan(ctx); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting straggler statuses: %w", err)
}
// Reached end of block.
if len(statuses) == 0 {
break
}
// Rethread each selected batch of straggler statuses in a transaction.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// Rethread each top-level status.
for _, status := range statuses {
n, err := sr.rethreadStatus(ctx, tx, status)
if err != nil {
return gtserror.Newf("error rethreading status %s: %w", status.URI, err)
}
count += n
}
return nil
}); err != nil {
return err
}
log.Infof(ctx, "[approx %d of %d] rethreading statuses (stragglers)", count, total)
}
// Attempt to merge any sqlite write-ahead-log.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
log.Info(ctx, "dropping old thread_to_statuses table")
if _, err := db.NewDropTable().
Table("thread_to_statuses").
Exec(ctx); err != nil {
return gtserror.Newf("error dropping old thread_to_statuses table: %w", err)
}
log.Info(ctx, "creating new statuses thread_id column")
if _, err := db.NewAddColumn().
Table("statuses").
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding new thread_id column: %w", err)
}
log.Info(ctx, "setting thread_id_new = thread_id (this may take a while...)")
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return batchUpdateByID(ctx, tx,
"statuses", // table
"id", // batchByCol
"UPDATE ? SET ? = ?", // updateQuery
[]any{bun.Ident("statuses"),
bun.Ident("thread_id_new"),
bun.Ident("thread_id")},
)
}); err != nil {
return err
}
// Attempt to merge any sqlite write-ahead-log.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
log.Info(ctx, "dropping old statuses thread_id index")
if _, err := db.NewDropIndex().
Index("statuses_thread_id_idx").
Exec(ctx); err != nil {
return gtserror.Newf("error dropping old thread_id index: %w", err)
}
log.Info(ctx, "dropping old statuses thread_id column")
if _, err := db.NewDropColumn().
Table("statuses").
Column("thread_id").
Exec(ctx); err != nil {
return gtserror.Newf("error dropping old thread_id column: %w", err)
}
log.Info(ctx, "renaming thread_id_new to thread_id")
if _, err := db.NewRaw(
"ALTER TABLE ? RENAME COLUMN ? TO ?",
bun.Ident("statuses"),
bun.Ident("thread_id_new"),
bun.Ident("thread_id"),
).Exec(ctx); err != nil {
return gtserror.Newf("error renaming new column: %w", err)
}
log.Info(ctx, "creating new statuses thread_id index")
if _, err := db.NewCreateIndex().
Table("statuses").
Index("statuses_thread_id_idx").
Column("thread_id").
Exec(ctx); err != nil {
return gtserror.Newf("error creating new thread_id index: %w", err)
}
return nil
}
down := func(ctx context.Context, db *bun.DB) error {
return nil
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}
type statusRethreader struct {
// the unique status and thread IDs
// of all models passed to append().
// these are later used to update all
// statuses to a single thread ID, and
// update all thread related models to
// use the new updated thread ID.
statusIDs []string
threadIDs []string
// stores the unseen IDs of status
// InReplyTos newly tracked in append(),
// which is then used for a SELECT query
// in getParents(), then promptly reset.
inReplyToIDs []string
// statuses simply provides a reusable
// slice of status models for selects.
// its contents are ephemeral.
statuses []*oldmodel.Status
// seenIDs tracks the unique status and
// thread IDs we have seen, ensuring we
// don't append duplicates to statusIDs
// or threadIDs slices. also helps prevent
// adding duplicate parents to inReplyToIDs.
seenIDs map[string]struct{}
// allThreaded tracks whether every status
// passed to append() has a thread ID set.
// together with len(threadIDs) this can
// determine if already threaded correctly.
allThreaded bool
}
// rethreadStatus is the main logic handler for statusRethreader{}. this is what gets called from the migration
// in order to trigger a status rethreading operation for the given status, returning total number rethreaded.
func (sr *statusRethreader) rethreadStatus(ctx context.Context, tx bun.Tx, status *oldmodel.Status) (int, error) {
// Zero slice and
// map ptr values.
clear(sr.statusIDs)
clear(sr.threadIDs)
clear(sr.statuses)
clear(sr.seenIDs)
// Reset slices and values for use.
sr.statusIDs = sr.statusIDs[:0]
sr.threadIDs = sr.threadIDs[:0]
sr.statuses = sr.statuses[:0]
sr.allThreaded = true
if sr.seenIDs == nil {
// Allocate new hash set for status IDs.
sr.seenIDs = make(map[string]struct{})
}
// Ensure the passed status
// has up-to-date information.
// This may have changed from
// the initial batch selection
// to the rethreadStatus() call.
if err := tx.NewSelect().
Model(status).
Column("in_reply_to_id", "thread_id").
Where("? = ?", bun.Ident("id"), status.ID).
Scan(ctx); err != nil {
return 0, gtserror.Newf("error selecting status: %w", err)
}
// status and thread ID cursor
// index values. these are used
// to keep track of newly loaded
// status / thread IDs between
// loop iterations.
var statusIdx int
var threadIdx int
// Append given status as
// first to our ID slices.
sr.append(status)
for {
// Fetch parents for newly seen in_reply_tos since last loop.
if err := sr.getParents(ctx, tx); err != nil {
return 0, gtserror.Newf("error getting parents: %w", err)
}
// Fetch children for newly seen statuses since last loop.
if err := sr.getChildren(ctx, tx, statusIdx); err != nil {
return 0, gtserror.Newf("error getting children: %w", err)
}
// Check for newly picked-up threads
// to find stragglers for below. Else
// we've reached end of what we can do.
if threadIdx >= len(sr.threadIDs) {
break
}
// Update status IDs cursor.
statusIdx = len(sr.statusIDs)
// Fetch any stragglers for newly seen threads since last loop.
if err := sr.getStragglers(ctx, tx, threadIdx); err != nil {
return 0, gtserror.Newf("error getting stragglers: %w", err)
}
// Check for newly picked-up straggling statuses / replies to
// find parents / children for. Else we've done all we can do.
if statusIdx >= len(sr.statusIDs) && len(sr.inReplyToIDs) == 0 {
break
}
// Update thread IDs cursor.
threadIdx = len(sr.threadIDs)
}
// Total number of
// statuses threaded.
total := len(sr.statusIDs)
// Check for the case where the entire
// batch of statuses is already correctly
// threaded. Then we have nothing to do!
if sr.allThreaded && len(sr.threadIDs) == 1 {
return 0, nil
}
// Sort all of the threads and
// status IDs by age; old -> new.
slices.Sort(sr.threadIDs)
slices.Sort(sr.statusIDs)
var threadID string
if len(sr.threadIDs) > 0 {
// Regardless of whether there ended up being
// multiple threads, we take the oldest value
// thread ID to use for entire batch of them.
threadID = sr.threadIDs[0]
sr.threadIDs = sr.threadIDs[1:]
}
if threadID == "" {
// None of the previous parents were threaded, we instead
// generate new thread with ID based on oldest creation time.
createdAt, err := id.TimeFromULID(sr.statusIDs[0])
if err != nil {
return 0, gtserror.Newf("error parsing status ulid: %w", err)
}
// Generate thread ID from parsed time.
threadID = id.NewULIDFromTime(createdAt)
// We need to create a
// new thread table entry.
if _, err = tx.NewInsert().
Model(&newmodel.Thread{ID: threadID}).
Exec(ctx); err != nil {
return 0, gtserror.Newf("error creating new thread: %w", err)
}
}
// Update all the statuses to
// use determined thread_id.
if _, err := tx.NewUpdate().
Table("statuses").
Where("? IN (?)", bun.Ident("id"), bun.In(sr.statusIDs)).
Set("? = ?", bun.Ident("thread_id"), threadID).
Exec(ctx); err != nil {
return 0, gtserror.Newf("error updating status thread ids: %w", err)
}
if len(sr.threadIDs) > 0 {
// Update any existing thread
// mutes to use latest thread_id.
if _, err := tx.NewUpdate().
Table("thread_mutes").
Where("? IN (?)", bun.Ident("thread_id"), bun.In(sr.threadIDs)).
Set("? = ?", bun.Ident("thread_id"), threadID).
Exec(ctx); err != nil {
return 0, gtserror.Newf("error updating mute thread ids: %w", err)
}
}
return total, nil
}
// append will append the given status to the internal tracking of statusRethreader{} for
// potential future operations, checking for uniqueness. it tracks the inReplyToID value
// for the next call to getParents(), it tracks the status ID for list of statuses that
// need updating, the thread ID for the list of thread links and mutes that need updating,
// and whether all the statuses all have a provided thread ID (i.e. allThreaded).
func (sr *statusRethreader) append(status *oldmodel.Status) {
// Check if status already seen before.
if _, ok := sr.seenIDs[status.ID]; ok {
return
}
if status.InReplyToID != "" {
// Status has a parent, add any unique parent ID
// to list of reply IDs that need to be queried.
if _, ok := sr.seenIDs[status.InReplyToID]; ok {
sr.inReplyToIDs = append(sr.inReplyToIDs, status.InReplyToID)
}
}
// Add status' ID to list of seen status IDs.
sr.statusIDs = append(sr.statusIDs, status.ID)
if status.ThreadID != "" {
// Status was threaded, add any unique thread
// ID to our list of known status thread IDs.
if _, ok := sr.seenIDs[status.ThreadID]; !ok {
sr.threadIDs = append(sr.threadIDs, status.ThreadID)
}
} else {
// Status was not threaded,
// we now know not all statuses
// found were threaded.
sr.allThreaded = false
}
// Add status ID to map of seen IDs.
sr.seenIDs[status.ID] = struct{}{}
}
func (sr *statusRethreader) getParents(ctx context.Context, tx bun.Tx) error {
var parent oldmodel.Status
// Iteratively query parent for each stored
// reply ID. Note this is safe to do as slice
// loop since 'seenIDs' prevents duplicates.
for i := 0; i < len(sr.inReplyToIDs); i++ {
// Get next status ID.
id := sr.statusIDs[i]
// Select next parent status.
if err := tx.NewSelect().
Model(&parent).
Column("id", "in_reply_to_id", "thread_id").
Where("? = ?", bun.Ident("id"), id).
Scan(ctx); err != nil && err != db.ErrNoEntries {
return err
}
// Parent was missing.
if parent.ID == "" {
continue
}
// Add to slices.
sr.append(&parent)
}
// Reset reply slice.
clear(sr.inReplyToIDs)
sr.inReplyToIDs = sr.inReplyToIDs[:0]
return nil
}
func (sr *statusRethreader) getChildren(ctx context.Context, tx bun.Tx, idx int) error {
// Iteratively query all children for each
// of fetched parent statuses. Note this is
// safe to do as a slice loop since 'seenIDs'
// ensures it only ever contains unique IDs.
for i := idx; i < len(sr.statusIDs); i++ {
// Get next status ID.
id := sr.statusIDs[i]
// Reset child slice.
clear(sr.statuses)
sr.statuses = sr.statuses[:0]
// Select children of ID.
if err := tx.NewSelect().
Model(&sr.statuses).
Column("id", "thread_id").
Where("? = ?", bun.Ident("in_reply_to_id"), id).
Scan(ctx); err != nil && err != db.ErrNoEntries {
return err
}
// Append child status IDs to slices.
for _, child := range sr.statuses {
sr.append(child)
}
}
return nil
}
func (sr *statusRethreader) getStragglers(ctx context.Context, tx bun.Tx, idx int) error {
// Check for threads to query.
if idx >= len(sr.threadIDs) {
return nil
}
// Reset status slice.
clear(sr.statuses)
sr.statuses = sr.statuses[:0]
// Select stragglers that
// also have thread IDs.
if err := tx.NewSelect().
Model(&sr.statuses).
Column("id", "thread_id", "in_reply_to_id").
Where("? IN (?) AND ? NOT IN (?)",
bun.Ident("thread_id"),
bun.In(sr.threadIDs[idx:]),
bun.Ident("id"),
bun.In(sr.statusIDs),
).
Scan(ctx); err != nil && err != db.ErrNoEntries {
return err
}
// Append status IDs to slices.
for _, status := range sr.statuses {
sr.append(status)
}
return nil
}

View file

@ -0,0 +1,133 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
import (
"time"
)
// Status represents a user-created 'post' or 'status' in the database, either remote or local
type Status struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
BoostOf *Status `bun:"-"` // status that corresponds to boostOfID
ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:00000000000000000000000000"` // id of the thread to which this status belongs
EditIDs []string `bun:"edits,array"` //
PollID string `bun:"type:CHAR(26),nullzero"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to.
}
// enumType is the type we (at least, should) use
// for database enum types. it is the largest size
// supported by a PostgreSQL SMALLINT, since an
// SQLite SMALLINT is actually variable in size.
type enumType int16
// Visibility represents the
// visibility granularity of a status.
type Visibility enumType
const (
// VisibilityNone means nobody can see this.
// It's only used for web status visibility.
VisibilityNone Visibility = 1
// VisibilityPublic means this status will
// be visible to everyone on all timelines.
VisibilityPublic Visibility = 2
// VisibilityUnlocked means this status will be visible to everyone,
// but will only show on home timeline to followers, and in lists.
VisibilityUnlocked Visibility = 3
// VisibilityFollowersOnly means this status is viewable to followers only.
VisibilityFollowersOnly Visibility = 4
// VisibilityMutualsOnly means this status
// is visible to mutual followers only.
VisibilityMutualsOnly Visibility = 5
// VisibilityDirect means this status is
// visible only to mentioned recipients.
VisibilityDirect Visibility = 6
// VisibilityDefault is used when no other setting can be found.
VisibilityDefault Visibility = VisibilityUnlocked
)
// String returns a stringified, frontend API compatible form of Visibility.
func (v Visibility) String() string {
switch v {
case VisibilityNone:
return "none"
case VisibilityPublic:
return "public"
case VisibilityUnlocked:
return "unlocked"
case VisibilityFollowersOnly:
return "followers_only"
case VisibilityMutualsOnly:
return "mutuals_only"
case VisibilityDirect:
return "direct"
default:
panic("invalid visibility")
}
}
// StatusContentType is the content type with which a status's text is
// parsed. Can be either plain or markdown. Empty will default to plain.
type StatusContentType enumType
const (
StatusContentTypePlain StatusContentType = 1
StatusContentTypeMarkdown StatusContentType = 2
StatusContentTypeDefault = StatusContentTypePlain
)

View file

@ -0,0 +1,24 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
// Thread represents one thread of statuses.
// TODO: add more fields here if necessary.
type Thread struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
}

View file

@ -0,0 +1,131 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
import (
"time"
)
// Status represents a user-created 'post' or 'status' in the database, either remote or local
type Status struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
ThreadID string `bun:"type:CHAR(26),nullzero"` // id of the thread to which this status belongs; only set for remote statuses if a local account is involved at some point in the thread, otherwise null
EditIDs []string `bun:"edits,array"` //
PollID string `bun:"type:CHAR(26),nullzero"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to.
}
// enumType is the type we (at least, should) use
// for database enum types. it is the largest size
// supported by a PostgreSQL SMALLINT, since an
// SQLite SMALLINT is actually variable in size.
type enumType int16
// Visibility represents the
// visibility granularity of a status.
type Visibility enumType
const (
// VisibilityNone means nobody can see this.
// It's only used for web status visibility.
VisibilityNone Visibility = 1
// VisibilityPublic means this status will
// be visible to everyone on all timelines.
VisibilityPublic Visibility = 2
// VisibilityUnlocked means this status will be visible to everyone,
// but will only show on home timeline to followers, and in lists.
VisibilityUnlocked Visibility = 3
// VisibilityFollowersOnly means this status is viewable to followers only.
VisibilityFollowersOnly Visibility = 4
// VisibilityMutualsOnly means this status
// is visible to mutual followers only.
VisibilityMutualsOnly Visibility = 5
// VisibilityDirect means this status is
// visible only to mentioned recipients.
VisibilityDirect Visibility = 6
// VisibilityDefault is used when no other setting can be found.
VisibilityDefault Visibility = VisibilityUnlocked
)
// String returns a stringified, frontend API compatible form of Visibility.
func (v Visibility) String() string {
switch v {
case VisibilityNone:
return "none"
case VisibilityPublic:
return "public"
case VisibilityUnlocked:
return "unlocked"
case VisibilityFollowersOnly:
return "followers_only"
case VisibilityMutualsOnly:
return "mutuals_only"
case VisibilityDirect:
return "direct"
default:
panic("invalid visibility")
}
}
// StatusContentType is the content type with which a status's text is
// parsed. Can be either plain or markdown. Empty will default to plain.
type StatusContentType enumType
const (
StatusContentTypePlain StatusContentType = 1
StatusContentTypeMarkdown StatusContentType = 2
StatusContentTypeDefault = StatusContentTypePlain
)

View file

@ -26,6 +26,7 @@ import (
"strconv"
"strings"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/id"
"code.superseriousbusiness.org/gotosocial/internal/log"
@ -37,6 +38,112 @@ import (
"github.com/uptrace/bun/schema"
)
// doWALCheckpoint attempt to force a WAL file merge on SQLite3,
// which can be useful given how much can build-up in the WAL.
//
// see: https://www.sqlite.org/pragma.html#pragma_wal_checkpoint
func doWALCheckpoint(ctx context.Context, db *bun.DB) error {
if db.Dialect().Name() == dialect.SQLite && strings.EqualFold(config.GetDbSqliteJournalMode(), "WAL") {
_, err := db.ExecContext(ctx, "PRAGMA wal_checkpoint(RESTART);")
if err != nil {
return gtserror.Newf("error performing wal_checkpoint: %w", err)
}
}
return nil
}
// batchUpdateByID performs the given updateQuery with updateArgs
// over the entire given table, batching by the ID of batchByCol.
func batchUpdateByID(
ctx context.Context,
tx bun.Tx,
table string,
batchByCol string,
updateQuery string,
updateArgs []any,
) error {
// Get a count of all in table.
total, err := tx.NewSelect().
Table(table).
Count(ctx)
if err != nil {
return gtserror.Newf("error selecting total count: %w", err)
}
// Query batch size
// in number of rows.
const batchsz = 5000
// Stores highest batch value
// used in iterate queries,
// starting at highest possible.
highest := id.Highest
// Total updated rows.
var updated int
for {
// Limit to batchsz
// items at once.
batchQ := tx.
NewSelect().
Table(table).
Column(batchByCol).
Where("? < ?", bun.Ident(batchByCol), highest).
OrderExpr("? DESC", bun.Ident(batchByCol)).
Limit(batchsz)
// Finalize UPDATE to act only on batch.
qStr := updateQuery + " WHERE ? IN (?)"
args := append(slices.Clone(updateArgs),
bun.Ident(batchByCol),
batchQ,
)
// Execute the prepared raw query with arguments.
res, err := tx.NewRaw(qStr, args...).Exec(ctx)
if err != nil {
return gtserror.Newf("error updating old column values: %w", err)
}
// Check how many items we updated.
thisUpdated, err := res.RowsAffected()
if err != nil {
return gtserror.Newf("error counting affected rows: %w", err)
}
if thisUpdated == 0 {
// Nothing updated
// means we're done.
break
}
// Update the overall count.
updated += int(thisUpdated)
// Log helpful message to admin.
log.Infof(ctx, "migrated %d of %d %s (up to %s)",
updated, total, table, highest)
// Get next highest
// id for next batch.
if err := tx.
NewSelect().
With("batch_query", batchQ).
ColumnExpr("min(?) FROM ?", bun.Ident(batchByCol), bun.Ident("batch_query")).
Scan(ctx, &highest); err != nil {
return gtserror.Newf("error selecting next highest: %w", err)
}
}
if total != int(updated) {
// Return error here in order to rollback the whole transaction.
return fmt.Errorf("total=%d does not match updated=%d", total, updated)
}
return nil
}
// convertEnums performs a transaction that converts
// a table's column of our old-style enums (strings) to
// more performant and space-saving integer types.
@ -310,7 +417,7 @@ func getModelField(db bun.IDB, rtype reflect.Type, fieldName string) (*schema.Fi
}
// doesColumnExist safely checks whether given column exists on table, handling both SQLite and PostgreSQL appropriately.
func doesColumnExist(ctx context.Context, tx bun.Tx, table, col string) (bool, error) {
func doesColumnExist(ctx context.Context, tx bun.IDB, table, col string) (bool, error) {
var n int
var err error
switch tx.Dialect().Name() {

View file

@ -21,11 +21,13 @@ import (
"context"
"errors"
"slices"
"strings"
"code.superseriousbusiness.org/gotosocial/internal/db"
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
"code.superseriousbusiness.org/gotosocial/internal/id"
"code.superseriousbusiness.org/gotosocial/internal/log"
"code.superseriousbusiness.org/gotosocial/internal/state"
"code.superseriousbusiness.org/gotosocial/internal/util/xslices"
@ -335,115 +337,284 @@ func (s *statusDB) PutStatus(ctx context.Context, status *gtsmodel.Status) error
// as the cache does not attempt a mutex lock until AFTER hook.
//
return s.db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// create links between this status and any emojis it uses
for _, i := range status.EmojiIDs {
if status.BoostOfID != "" {
var threadID string
// Boost wrappers always inherit thread
// of the origin status they're boosting.
if err := tx.
NewSelect().
Table("statuses").
Column("thread_id").
Where("? = ?", bun.Ident("id"), status.BoostOfID).
Scan(ctx, &threadID); err != nil {
return gtserror.Newf("error selecting boosted status: %w", err)
}
// Set the selected thread.
status.ThreadID = threadID
// They also require no further
// checks! Simply insert status here.
return insertStatus(ctx, tx, status)
}
// Gather a list of possible thread IDs
// of all the possible related statuses
// to this one. If one exists we can use
// the end result, and if too many exist
// we can fix the status threading.
var threadIDs []string
if status.InReplyToID != "" {
var threadID string
// A stored parent status exists,
// select its thread ID to ideally
// inherit this for status.
if err := tx.
NewSelect().
Table("statuses").
Column("thread_id").
Where("? = ?", bun.Ident("id"), status.InReplyToID).
Scan(ctx, &threadID); err != nil {
return gtserror.Newf("error selecting status parent: %w", err)
}
// Append possible ID to threads slice.
threadIDs = append(threadIDs, threadID)
} else if status.InReplyToURI != "" {
var ids []string
// A parent status exists but is not
// yet stored. See if any siblings for
// this shared parent exist with their
// own thread IDs.
if err := tx.
NewSelect().
Table("statuses").
Column("thread_id").
Where("? = ?", bun.Ident("in_reply_to_uri"), status.InReplyToURI).
Scan(ctx, &ids); err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error selecting status siblings: %w", err)
}
// Append possible IDs to threads slice.
threadIDs = append(threadIDs, ids...)
}
if !*status.Local {
var ids []string
// For remote statuses specifically, check to
// see if any children are stored for this new
// stored parent with their own thread IDs.
if err := tx.
NewSelect().
Table("statuses").
Column("thread_id").
Where("? = ?", bun.Ident("in_reply_to_uri"), status.URI).
Scan(ctx, &ids); err != nil && !errors.Is(err, db.ErrNoEntries) {
return gtserror.Newf("error selecting status children: %w", err)
}
// Append possible IDs to threads slice.
threadIDs = append(threadIDs, ids...)
}
// Ensure only *unique* posssible thread IDs.
threadIDs = xslices.Deduplicate(threadIDs)
switch len(threadIDs) {
case 0:
// No related status with thread ID already exists,
// so create new thread ID from status creation time.
threadID := id.NewULIDFromTime(status.CreatedAt)
// Insert new thread.
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToEmoji{
StatusID: status.ID,
EmojiID: i,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("status_id"), bun.Ident("emoji_id")).
Model(&gtsmodel.Thread{ID: threadID}).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
return gtserror.Newf("error inserting thread: %w", err)
}
// Update status thread ID.
status.ThreadID = threadID
case 1:
// Inherit single known thread.
status.ThreadID = threadIDs[0]
default:
var err error
log.Infof(ctx, "reconciling status threading for %s: [%s]", status.URI, strings.Join(threadIDs, ","))
status.ThreadID, err = s.fixStatusThreading(ctx, tx, threadIDs)
if err != nil {
return err
}
}
// create links between this status and any tags it uses
for _, i := range status.TagIDs {
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToTag{
StatusID: status.ID,
TagID: i,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("status_id"), bun.Ident("tag_id")).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
}
}
// change the status ID of the media
// attachments to the current status
for _, a := range status.Attachments {
a.StatusID = status.ID
if _, err := tx.
NewUpdate().
Model(a).
Column("status_id").
Where("? = ?", bun.Ident("media_attachment.id"), a.ID).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
}
}
// If the status is threaded, create
// link between thread and status.
if status.ThreadID != "" {
if _, err := tx.
NewInsert().
Model(&gtsmodel.ThreadToStatus{
ThreadID: status.ThreadID,
StatusID: status.ID,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("thread_id"), bun.Ident("status_id")).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
}
}
// Finally, insert the status
_, err := tx.NewInsert().
Model(status).
Exec(ctx)
return err
// And after threading, insert status.
// This will error if ThreadID is unset.
return insertStatus(ctx, tx, status)
})
})
}
// fixStatusThreading can be called to reconcile statuses in the same thread but known to be using multiple given threads.
func (s *statusDB) fixStatusThreading(ctx context.Context, tx bun.Tx, threadIDs []string) (string, error) {
if len(threadIDs) <= 1 {
panic("invalid call to fixStatusThreading()")
}
// Sort ascending, i.e.
// oldest thread ID first.
slices.Sort(threadIDs)
// Drop the oldest thread ID
// from slice, we'll keep this.
threadID := threadIDs[0]
threadIDs = threadIDs[1:]
// On updates, gather IDs of changed model
// IDs for later stage of cache invalidation,
// preallocating slices for worst-case scenarios.
statusIDs := make([]string, 0, 4*len(threadIDs))
muteIDs := make([]string, 0, 4*len(threadIDs))
// Update all statuses with
// thread IDs to use oldest.
if _, err := tx.
NewUpdate().
Table("statuses").
Where("? IN (?)", bun.Ident("thread_id"), bun.In(threadIDs)).
Set("? = ?", bun.Ident("thread_id"), threadID).
Returning("?", bun.Ident("id")).
Exec(ctx, &statusIDs); err != nil && !errors.Is(err, db.ErrNoEntries) {
return "", gtserror.Newf("error updating statuses: %w", err)
}
// Update all thread mutes with
// thread IDs to use oldest.
if _, err := tx.
NewUpdate().
Table("thread_mutes").
Where("? IN (?)", bun.Ident("thread_id"), bun.In(threadIDs)).
Set("? = ?", bun.Ident("thread_id"), threadID).
Returning("?", bun.Ident("id")).
Exec(ctx, &muteIDs); err != nil && !errors.Is(err, db.ErrNoEntries) {
return "", gtserror.Newf("error updating thread mutes: %w", err)
}
// Delete all now
// unused thread IDs.
if _, err := tx.
NewDelete().
Table("threads").
Where("? IN (?)", bun.Ident("id"), bun.In(threadIDs)).
Exec(ctx); err != nil {
return "", gtserror.Newf("error deleting threads: %w", err)
}
// Invalidate caches for changed statuses and mutes.
s.state.Caches.DB.Status.InvalidateIDs("ID", statusIDs)
s.state.Caches.DB.ThreadMute.InvalidateIDs("ID", muteIDs)
return threadID, nil
}
// insertStatus handles the base status insert logic, that is the status itself,
// any intermediary table links, and updating media attachments to point to status.
func insertStatus(ctx context.Context, tx bun.Tx, status *gtsmodel.Status) error {
// create links between this
// status and any emojis it uses
for _, id := range status.EmojiIDs {
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToEmoji{
StatusID: status.ID,
EmojiID: id,
}).
Exec(ctx); err != nil {
return gtserror.Newf("error inserting status_to_emoji: %w", err)
}
}
// create links between this
// status and any tags it uses
for _, id := range status.TagIDs {
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToTag{
StatusID: status.ID,
TagID: id,
}).
Exec(ctx); err != nil {
return gtserror.Newf("error inserting status_to_tag: %w", err)
}
}
// change the status ID of the media
// attachments to the current status
for _, a := range status.Attachments {
a.StatusID = status.ID
if _, err := tx.
NewUpdate().
Model(a).
Column("status_id").
Where("? = ?", bun.Ident("media_attachment.id"), a.ID).
Exec(ctx); err != nil {
return gtserror.Newf("error updating media: %w", err)
}
}
// Finally, insert the status
if _, err := tx.NewInsert().
Model(status).
Exec(ctx); err != nil {
return gtserror.Newf("error inserting status: %w", err)
}
return nil
}
func (s *statusDB) UpdateStatus(ctx context.Context, status *gtsmodel.Status, columns ...string) error {
return s.state.Caches.DB.Status.Store(status, func() error {
// It is safe to run this database transaction within cache.Store
// as the cache does not attempt a mutex lock until AFTER hook.
//
return s.db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// create links between this status and any emojis it uses
for _, i := range status.EmojiIDs {
// create links between this
// status and any emojis it uses
for _, id := range status.EmojiIDs {
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToEmoji{
StatusID: status.ID,
EmojiID: i,
EmojiID: id,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("status_id"), bun.Ident("emoji_id")).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
return err
}
}
// create links between this status and any tags it uses
for _, i := range status.TagIDs {
// create links between this
// status and any tags it uses
for _, id := range status.TagIDs {
if _, err := tx.
NewInsert().
Model(&gtsmodel.StatusToTag{
StatusID: status.ID,
TagID: i,
TagID: id,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("status_id"), bun.Ident("tag_id")).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
return err
}
}
@ -457,26 +628,7 @@ func (s *statusDB) UpdateStatus(ctx context.Context, status *gtsmodel.Status, co
Column("status_id").
Where("? = ?", bun.Ident("media_attachment.id"), a.ID).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
}
}
// If the status is threaded, create
// link between thread and status.
if status.ThreadID != "" {
if _, err := tx.
NewInsert().
Model(&gtsmodel.ThreadToStatus{
ThreadID: status.ThreadID,
StatusID: status.ID,
}).
On("CONFLICT (?, ?) DO NOTHING", bun.Ident("thread_id"), bun.Ident("status_id")).
Exec(ctx); err != nil {
if !errors.Is(err, db.ErrAlreadyExists) {
return err
}
return err
}
}
@ -499,7 +651,9 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) error {
// Delete status from database and any related links in a transaction.
if err := s.db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
// delete links between this status and any emojis it uses
// delete links between this
// status and any emojis it uses
if _, err := tx.
NewDelete().
TableExpr("? AS ?", bun.Ident("status_to_emojis"), bun.Ident("status_to_emoji")).
@ -508,7 +662,8 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) error {
return err
}
// delete links between this status and any tags it uses
// delete links between this
// status and any tags it uses
if _, err := tx.
NewDelete().
TableExpr("? AS ?", bun.Ident("status_to_tags"), bun.Ident("status_to_tag")).
@ -517,16 +672,6 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) error {
return err
}
// Delete links between this status
// and any threads it was a part of.
if _, err := tx.
NewDelete().
TableExpr("? AS ?", bun.Ident("thread_to_statuses"), bun.Ident("thread_to_status")).
Where("? = ?", bun.Ident("thread_to_status.status_id"), id).
Exec(ctx); err != nil {
return err
}
// delete the status itself
if _, err := tx.
NewDelete().

View file

@ -21,8 +21,12 @@ import (
"testing"
"time"
"code.superseriousbusiness.org/gotosocial/internal/ap"
"code.superseriousbusiness.org/gotosocial/internal/db"
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
"code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
"code.superseriousbusiness.org/gotosocial/internal/id"
"code.superseriousbusiness.org/gotosocial/internal/util"
"github.com/stretchr/testify/suite"
)
@ -253,6 +257,302 @@ func (suite *StatusTestSuite) TestPutPopulatedStatus() {
)
}
func (suite *StatusTestSuite) TestPutStatusThreadingBoostOfIDSet() {
ctx := suite.T().Context()
// Fake account details.
accountID := id.NewULID()
accountURI := "https://example.com/users/" + accountID
var err error
// Prepare new status.
statusID := id.NewULID()
statusURI := accountURI + "/statuses/" + statusID
status := &gtsmodel.Status{
ID: statusID,
URI: statusURI,
AccountID: accountID,
AccountURI: accountURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
}
// Insert original status into database.
err = suite.db.PutStatus(ctx, status)
suite.NoError(err)
suite.NotEmpty(status.ThreadID)
// Prepare new boost.
boostID := id.NewULID()
boostURI := accountURI + "/statuses/" + boostID
boost := &gtsmodel.Status{
ID: boostID,
URI: boostURI,
AccountID: accountID,
AccountURI: accountURI,
BoostOfID: statusID,
BoostOfAccountID: accountID,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
}
// Insert boost wrapper into database.
err = suite.db.PutStatus(ctx, boost)
suite.NoError(err)
// Boost wrapper should have inherited thread.
suite.Equal(status.ThreadID, boost.ThreadID)
}
func (suite *StatusTestSuite) TestPutStatusThreadingInReplyToIDSet() {
ctx := suite.T().Context()
// Fake account details.
accountID := id.NewULID()
accountURI := "https://example.com/users/" + accountID
var err error
// Prepare new status.
statusID := id.NewULID()
statusURI := accountURI + "/statuses/" + statusID
status := &gtsmodel.Status{
ID: statusID,
URI: statusURI,
AccountID: accountID,
AccountURI: accountURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
}
// Insert original status into database.
err = suite.db.PutStatus(ctx, status)
suite.NoError(err)
suite.NotEmpty(status.ThreadID)
// Prepare new reply.
replyID := id.NewULID()
replyURI := accountURI + "/statuses/" + replyID
reply := &gtsmodel.Status{
ID: replyID,
URI: replyURI,
AccountID: accountID,
AccountURI: accountURI,
InReplyToID: statusID,
InReplyToURI: statusURI,
InReplyToAccountID: accountID,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
}
// Insert status reply into database.
err = suite.db.PutStatus(ctx, reply)
suite.NoError(err)
// Status reply should have inherited thread.
suite.Equal(status.ThreadID, reply.ThreadID)
}
func (suite *StatusTestSuite) TestPutStatusThreadingSiblings() {
ctx := suite.T().Context()
// Fake account details.
accountID := id.NewULID()
accountURI := "https://example.com/users/" + accountID
// Main parent status ID.
statusID := id.NewULID()
statusURI := accountURI + "/statuses/" + statusID
status := &gtsmodel.Status{
ID: statusID,
URI: statusURI,
AccountID: accountID,
AccountURI: accountURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
}
const siblingCount = 10
var statuses []*gtsmodel.Status
for range siblingCount {
id := id.NewULID()
uri := accountURI + "/statuses/" + id
// Note here that inReplyToID not being set,
// so as they get inserted it's as if children
// are being dereferenced ahead of stored parent.
//
// Which is where out-of-sync threads can occur.
statuses = append(statuses, &gtsmodel.Status{
ID: id,
URI: uri,
AccountID: accountID,
AccountURI: accountURI,
InReplyToURI: statusURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
})
}
var err error
var threadID string
// Insert all of the sibling children
// into the database, they should all
// still get correctly threaded together.
for _, child := range statuses {
err = suite.db.PutStatus(ctx, child)
suite.NoError(err)
suite.NotEmpty(child.ThreadID)
if threadID == "" {
threadID = child.ThreadID
} else {
suite.Equal(threadID, child.ThreadID)
}
}
// Finally, insert the parent status.
err = suite.db.PutStatus(ctx, status)
suite.NoError(err)
// Parent should have inherited thread.
suite.Equal(threadID, status.ThreadID)
}
func (suite *StatusTestSuite) TestPutStatusThreadingReconcile() {
ctx := suite.T().Context()
// Fake account details.
accountID := id.NewULID()
accountURI := "https://example.com/users/" + accountID
const threadLength = 10
var statuses []*gtsmodel.Status
var lastURI, lastID string
// Generate front-half of thread.
for range threadLength / 2 {
id := id.NewULID()
uri := accountURI + "/statuses/" + id
statuses = append(statuses, &gtsmodel.Status{
ID: id,
URI: uri,
AccountID: accountID,
AccountURI: accountURI,
InReplyToID: lastID,
InReplyToURI: lastURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
})
lastURI = uri
lastID = id
}
// Generate back-half of thread.
//
// Note here that inReplyToID not being set past
// the first item, so as they get inserted it's
// as if the children are dereferenced ahead of
// the stored parent, i.e. an out-of-sync thread.
for range threadLength / 2 {
id := id.NewULID()
uri := accountURI + "/statuses/" + id
statuses = append(statuses, &gtsmodel.Status{
ID: id,
URI: uri,
AccountID: accountID,
AccountURI: accountURI,
InReplyToID: lastID,
InReplyToURI: lastURI,
Local: util.Ptr(false),
Federated: util.Ptr(true),
ActivityStreamsType: ap.ObjectNote,
})
lastURI = uri
lastID = ""
}
var err error
// Thread IDs we expect to see for
// head statuses as we add them, and
// for tail statuses as we add them.
var thread0, threadN string
// Insert status thread from head and tail,
// specifically stopping before the middle.
// These should each get threaded separately.
for i := range (threadLength / 2) - 1 {
i0, iN := i, len(statuses)-1-i
// Insert i'th status from the start.
err = suite.db.PutStatus(ctx, statuses[i0])
suite.NoError(err)
suite.NotEmpty(statuses[i0].ThreadID)
// Check i0 thread.
if thread0 == "" {
thread0 = statuses[i0].ThreadID
} else {
suite.Equal(thread0, statuses[i0].ThreadID)
}
// Insert i'th status from the end.
err = suite.db.PutStatus(ctx, statuses[iN])
suite.NoError(err)
suite.NotEmpty(statuses[iN].ThreadID)
// Check iN thread.
if threadN == "" {
threadN = statuses[iN].ThreadID
} else {
suite.Equal(threadN, statuses[iN].ThreadID)
}
}
// Finally, insert remaining statuses,
// at some point among these it should
// trigger a status thread reconcile.
for _, status := range statuses {
if status.ThreadID != "" {
// already inserted
continue
}
// Insert remaining status into db.
err = suite.db.PutStatus(ctx, status)
suite.NoError(err)
}
// The reconcile should pick the older,
// i.e. smaller of two ULID thread IDs.
finalThreadID := min(thread0, threadN)
for _, status := range statuses {
// Get ID of status.
id := status.ID
// Fetch latest status the from database.
status, err := suite.db.GetStatusByID(
gtscontext.SetBarebones(ctx),
id,
)
suite.NoError(err)
// Ensure after reconcile uses expected thread.
suite.Equal(finalThreadID, status.ThreadID)
}
}
func TestStatusTestSuite(t *testing.T) {
suite.Run(t, new(StatusTestSuite))
}

View file

@ -47,7 +47,7 @@ type Status interface {
// PopulateStatusEdits ensures that status' edits are fully popualted.
PopulateStatusEdits(ctx context.Context, status *gtsmodel.Status) error
// PutStatus stores one status in the database.
// PutStatus stores one status in the database, this also handles status threading.
PutStatus(ctx context.Context, status *gtsmodel.Status) error
// UpdateStatus updates one status in the database.

View file

@ -101,7 +101,7 @@ func (d *Dereferencer) EnrichAnnounce(
// Generate an ID for the boost wrapper status.
boost.ID = id.NewULIDFromTime(boost.CreatedAt)
// Store the boost wrapper status in database.
// Store the remote boost wrapper status in database.
switch err = d.state.DB.PutStatus(ctx, boost); {
case err == nil:
// all groovy.

View file

@ -22,7 +22,6 @@ import (
"errors"
"net/http"
"net/url"
"slices"
"time"
"code.superseriousbusiness.org/gotosocial/internal/ap"
@ -571,15 +570,6 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error populating mentions for status %s: %w", uri, err)
}
// Ensure status in a thread is connected.
threadChanged, err := d.threadStatus(ctx,
status,
latestStatus,
)
if err != nil {
return nil, nil, gtserror.Newf("error handling threading for status %s: %w", uri, err)
}
// Populate tags associated with status, passing
// in existing status to reuse old where possible.
tagsChanged, err := d.fetchStatusTags(ctx,
@ -614,7 +604,7 @@ func (d *Dereferencer) enrichStatus(
}
if isNew {
// Simplest case, insert this new status into the database.
// Simplest case, insert this new remote status into the database.
if err := d.state.DB.PutStatus(ctx, latestStatus); err != nil {
return nil, nil, gtserror.Newf("error inserting new status %s: %w", uri, err)
}
@ -627,7 +617,6 @@ func (d *Dereferencer) enrichStatus(
latestStatus,
pollChanged,
mentionsChanged,
threadChanged,
tagsChanged,
mediaChanged,
emojiChanged,
@ -736,81 +725,6 @@ func (d *Dereferencer) fetchStatusMentions(
return changed, nil
}
// threadStatus ensures that given status is threaded correctly
// where necessary. that is it will inherit a thread ID from the
// existing copy if it is threaded correctly, else it will inherit
// a thread ID from a parent with existing thread, else it will
// generate a new thread ID if status mentions a local account.
func (d *Dereferencer) threadStatus(
ctx context.Context,
existing *gtsmodel.Status,
status *gtsmodel.Status,
) (
changed bool,
err error,
) {
// Check for existing status
// that is already threaded.
if existing.ThreadID != "" {
// Existing is threaded correctly.
if existing.InReplyTo == nil ||
existing.InReplyTo.ThreadID == existing.ThreadID {
status.ThreadID = existing.ThreadID
return false, nil
}
// TODO: delete incorrect thread
}
// Check for existing parent to inherit threading from.
if inReplyTo := status.InReplyTo; inReplyTo != nil &&
inReplyTo.ThreadID != "" {
status.ThreadID = inReplyTo.ThreadID
return true, nil
}
// Parent wasn't threaded. If this
// status mentions a local account,
// we should thread it so that local
// account can mute it if they want.
mentionsLocal := slices.ContainsFunc(
status.Mentions,
func(m *gtsmodel.Mention) bool {
// If TargetAccount couldn't
// be deref'd, we know it's not
// a local account, so only
// check for non-nil accounts.
return m.TargetAccount != nil &&
m.TargetAccount.IsLocal()
},
)
if !mentionsLocal {
// Status doesn't mention a
// local account, so we don't
// need to thread it.
return false, nil
}
// Status mentions a local account.
// Create a new thread and assign
// it to the status.
threadID := id.NewULID()
// Insert new thread model into db.
if err := d.state.DB.PutThread(ctx,
&gtsmodel.Thread{ID: threadID},
); err != nil {
return false, gtserror.Newf("error inserting new thread in db: %w", err)
}
// Set thread on latest status.
status.ThreadID = threadID
return true, nil
}
// fetchStatusTags populates the tags on 'status', fetching existing
// from the database and creating new where needed. 'existing' is used
// to fetch tags that have not changed since previous stored status.
@ -1135,7 +1049,6 @@ func (d *Dereferencer) handleStatusEdit(
status *gtsmodel.Status,
pollChanged bool,
mentionsChanged bool,
threadChanged bool,
tagsChanged bool,
mediaChanged bool,
emojiChanged bool,
@ -1193,14 +1106,6 @@ func (d *Dereferencer) handleStatusEdit(
// been previously populated properly.
}
if threadChanged {
cols = append(cols, "thread_id")
// Thread changed doesn't necessarily
// indicate an edit, it may just now
// actually be included in a thread.
}
if tagsChanged {
cols = append(cols, "tags") // i.e. TagIDs

View file

@ -27,56 +27,56 @@ import (
// Status represents a user-created 'post' or 'status' in the database, either remote or local
type Status struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
Attachments []*MediaAttachment `bun:"attached_media,rel:has-many"` // Attachments corresponding to attachmentIDs
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
Tags []*Tag `bun:"attached_tags,m2m:status_to_tags"` // Tags corresponding to tagIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
Mentions []*Mention `bun:"attached_mentions,rel:has-many"` // Mentions corresponding to mentionIDs
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Emojis []*Emoji `bun:"attached_emojis,m2m:status_to_emojis"` // Emojis corresponding to emojiIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
Account *Account `bun:"rel:belongs-to"` // account corresponding to accountID
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID
InReplyToAccount *Account `bun:"rel:belongs-to"` // account corresponding to inReplyToAccountID
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
BoostOf *Status `bun:"-"` // status that corresponds to boostOfID
BoostOfAccount *Account `bun:"rel:belongs-to"` // account that corresponds to boostOfAccountID
ThreadID string `bun:"type:CHAR(26),nullzero"` // id of the thread to which this status belongs; only set for remote statuses if a local account is involved at some point in the thread, otherwise null
EditIDs []string `bun:"edits,array"` //
Edits []*StatusEdit `bun:"-"` //
PollID string `bun:"type:CHAR(26),nullzero"` //
Poll *Poll `bun:"-"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
CreatedWithApplication *Application `bun:"rel:belongs-to"` // application corresponding to createdWithApplicationID
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
InteractionPolicy *InteractionPolicy `bun:""` // InteractionPolicy for this status. If null then the default InteractionPolicy should be assumed for this status's Visibility. Always null for boost wrappers.
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to.
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
EditedAt time.Time `bun:"type:timestamptz,nullzero"` // when this status was last edited (if set)
FetchedAt time.Time `bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `bun:",nullzero"` // web url for viewing this status
Content string `bun:""` // Content HTML for this status.
AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status
Attachments []*MediaAttachment `bun:"attached_media,rel:has-many"` // Attachments corresponding to attachmentIDs
TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status
Tags []*Tag `bun:"attached_tags,m2m:status_to_tags"` // Tags corresponding to tagIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
MentionIDs []string `bun:"mentions,array"` // Database IDs of any mentions in this status
Mentions []*Mention `bun:"attached_mentions,rel:has-many"` // Mentions corresponding to mentionIDs
EmojiIDs []string `bun:"emojis,array"` // Database IDs of any emojis used in this status
Emojis []*Emoji `bun:"attached_emojis,m2m:status_to_emojis"` // Emojis corresponding to emojiIDs. https://bun.uptrace.dev/guide/relations.html#many-to-many-relation
Local *bool `bun:",nullzero,notnull,default:false"` // is this status from a local account?
AccountID string `bun:"type:CHAR(26),nullzero,notnull"` // which account posted this status?
Account *Account `bun:"rel:belongs-to"` // account corresponding to accountID
AccountURI string `bun:",nullzero,notnull"` // activitypub uri of the owner of this status
InReplyToID string `bun:"type:CHAR(26),nullzero"` // id of the status this status replies to
InReplyToURI string `bun:",nullzero"` // activitypub uri of the status this status is a reply to
InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that this status replies to
InReplyTo *Status `bun:"-"` // status corresponding to inReplyToID
InReplyToAccount *Account `bun:"rel:belongs-to"` // account corresponding to inReplyToAccountID
BoostOfID string `bun:"type:CHAR(26),nullzero"` // id of the status this status is a boost of
BoostOfURI string `bun:"-"` // URI of the status this status is a boost of; field not inserted in the db, just for dereferencing purposes.
BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` // id of the account that owns the boosted status
BoostOf *Status `bun:"-"` // status that corresponds to boostOfID
BoostOfAccount *Account `bun:"rel:belongs-to"` // account that corresponds to boostOfAccountID
ThreadID string `bun:"type:CHAR(26),nullzero,notnull,default:00000000000000000000000000"` // id of the thread to which this status belongs
EditIDs []string `bun:"edits,array"` //
Edits []*StatusEdit `bun:"-"` //
PollID string `bun:"type:CHAR(26),nullzero"` //
Poll *Poll `bun:"-"` //
ContentWarning string `bun:",nullzero"` // Content warning HTML for this status.
ContentWarningText string `bun:""` // Original text of the content warning without formatting
Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status
Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive?
Language string `bun:",nullzero"` // what language is this status written in?
CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` // Which application was used to create this status?
CreatedWithApplication *Application `bun:"rel:belongs-to"` // application corresponding to createdWithApplicationID
ActivityStreamsType string `bun:",nullzero,notnull"` // What is the activitystreams type of this status? See: https://www.w3.org/TR/activitystreams-vocabulary/#object-types. Will probably almost always be Note but who knows!.
Text string `bun:""` // Original text of the status without formatting
ContentType StatusContentType `bun:",nullzero"` // Content type used to process the original text of the status
Federated *bool `bun:",notnull"` // This status will be federated beyond the local timeline(s)
InteractionPolicy *InteractionPolicy `bun:""` // InteractionPolicy for this status. If null then the default InteractionPolicy should be assumed for this status's Visibility. Always null for boost wrappers.
PendingApproval *bool `bun:",nullzero,notnull,default:false"` // If true then status is a reply or boost wrapper that must be Approved by the reply-ee or boost-ee before being fully distributed.
PreApproved bool `bun:"-"` // If true, then status is a reply to or boost wrapper of a status on our instance, has permission to do the interaction, and an Accept should be sent out for it immediately. Field not stored in the DB.
ApprovedByURI string `bun:",nullzero"` // URI of an Accept Activity that approves the Announce or Create Activity that this status was/will be attached to.
}
// GetID implements timeline.Timelineable{}.

View file

@ -23,10 +23,3 @@ type Thread struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
StatusIDs []string `bun:"-"` // ids of statuses belonging to this thread (order not guaranteed)
}
// ThreadToStatus is an intermediate struct to facilitate the
// many2many relationship between a thread and one or more statuses.
type ThreadToStatus struct {
ThreadID string `bun:"type:CHAR(26),unique:statusthread,nullzero,notnull"`
StatusID string `bun:"type:CHAR(26),unique:statusthread,nullzero,notnull"`
}

View file

@ -26,6 +26,7 @@ import (
"hash"
"io"
"net/http"
"strconv"
"time"
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
@ -35,6 +36,7 @@ import (
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/log"
"code.superseriousbusiness.org/gotosocial/internal/oauth"
"codeberg.org/gruf/go-bitutil"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
)
@ -60,49 +62,79 @@ func NoLLaMas(
return func(*gin.Context) {}
}
seed := make([]byte, 32)
var seed [32]byte
// Read random data for the token seed.
_, err := io.ReadFull(rand.Reader, seed)
_, err := io.ReadFull(rand.Reader, seed[:])
if err != nil {
panic(err)
}
// Configure nollamas.
var nollamas nollamas
nollamas.seed = seed
nollamas.entropy = seed
nollamas.ttl = time.Hour
nollamas.diff = config.GetAdvancedScraperDeterrenceDifficulty()
nollamas.rounds = config.GetAdvancedScraperDeterrenceDifficulty()
nollamas.getInstanceV1 = getInstanceV1
nollamas.policy = cookiePolicy
return nollamas.Serve
}
// i.e. hash slice length.
const hashLen = sha256.Size
// i.e. hex.EncodedLen(hashLen).
const encodedHashLen = 2 * hashLen
// hashWithBufs encompasses a hash along
// with the necessary buffers to generate
// a hashsum and then encode that sum.
type hashWithBufs struct {
hash hash.Hash
hbuf []byte
ebuf []byte
hbuf [hashLen]byte
ebuf [encodedHashLen]byte
}
// write is a passthrough to hash.Hash{}.Write().
func (h *hashWithBufs) write(b []byte) {
_, _ = h.hash.Write(b)
}
// writeString is a passthrough to hash.Hash{}.Write([]byte(s)).
func (h *hashWithBufs) writeString(s string) {
_, _ = h.hash.Write(byteutil.S2B(s))
}
// EncodedSum returns the hex encoded sum of hash.Sum().
func (h *hashWithBufs) EncodedSum() string {
_ = h.hash.Sum(h.hbuf[:0])
hex.Encode(h.ebuf[:], h.hbuf[:])
return string(h.ebuf[:])
}
// Reset will reset hash and buffers.
func (h *hashWithBufs) Reset() {
h.ebuf = [encodedHashLen]byte{}
h.hbuf = [hashLen]byte{}
h.hash.Reset()
}
type nollamas struct {
// our instance cookie policy.
policy apiutil.CookiePolicy
// unique token seed
// unique entropy
// to prevent hashes
// being guessable
seed []byte
entropy [32]byte
// success cookie TTL
ttl time.Duration
// algorithm difficulty knobs.
// diff determines the number
// of leading zeroes required.
diff uint8
// rounds determines roughly how
// many hash-encode rounds each
// client is required to complete.
rounds uint32
// extra fields required for
// our template rendering.
@ -134,18 +166,8 @@ func (m *nollamas) Serve(c *gin.Context) {
return
}
// i.e. outputted hash slice length.
const hashLen = sha256.Size
// i.e. hex.EncodedLen(hashLen).
const encodedHashLen = 2 * hashLen
// Prepare hash + buffers.
hash := hashWithBufs{
hash: sha256.New(),
hbuf: make([]byte, 0, hashLen),
ebuf: make([]byte, encodedHashLen),
}
// Prepare new hash with buffers.
hash := hashWithBufs{hash: sha256.New()}
// Extract client fingerprint data.
userAgent := c.GetHeader("User-Agent")
@ -153,15 +175,7 @@ func (m *nollamas) Serve(c *gin.Context) {
// Generate a unique token for this request,
// only valid for a period of now +- m.ttl.
token := m.token(&hash, userAgent, clientIP)
// For unique challenge string just use a
// single portion of their 'success' token.
// SHA256 is not yet cracked, this is not an
// application of a hash requiring serious
// cryptographic security and it rotates on
// a TTL basis, so it should be fine.
challenge := token[:len(token)/4]
token := m.getToken(&hash, userAgent, clientIP)
// Check for a provided success token.
cookie, _ := c.Cookie("gts-nollamas")
@ -169,8 +183,8 @@ func (m *nollamas) Serve(c *gin.Context) {
// Check whether passed cookie
// is the expected success token.
if subtle.ConstantTimeCompare(
byteutil.S2B(token),
byteutil.S2B(cookie),
byteutil.S2B(token),
) == 1 {
// They passed us a valid, expected
@ -185,10 +199,15 @@ func (m *nollamas) Serve(c *gin.Context) {
// handlers from being called.
c.Abort()
// Generate challenge for this unique (yet deterministic) token,
// returning seed, wanted 'challenge' result and expected solution.
seed, challenge, solution := m.getChallenge(&hash, token)
// Prepare new log entry.
l := log.WithContext(ctx).
WithField("userAgent", userAgent).
WithField("challenge", challenge)
WithField("seed", seed).
WithField("rounds", solution)
// Extract and parse query.
query := c.Request.URL.Query()
@ -196,32 +215,28 @@ func (m *nollamas) Serve(c *gin.Context) {
// Check query to see if an in-progress
// challenge solution has been provided.
nonce := query.Get("nollamas_solution")
if nonce == "" || len(nonce) > 20 {
if nonce == "" {
// noting that here, 20 is
// max integer string len.
//
// An invalid solution string, just
// present them with new challenge.
// No solution given, likely new client!
// Simply present them with challenge.
m.renderChallenge(c, seed, challenge)
l.Info("posing new challenge")
m.renderChallenge(c, challenge)
return
}
// Reset the hash.
hash.hash.Reset()
// Check nonce matches expected.
if subtle.ConstantTimeCompare(
byteutil.S2B(solution),
byteutil.S2B(nonce),
) != 1 {
// Check challenge+nonce as possible solution.
if !m.checkChallenge(&hash, challenge, nonce) {
// They failed challenge,
// re-present challenge page.
l.Info("invalid solution provided")
m.renderChallenge(c, challenge)
// Their nonce failed, re-challenge them.
m.renderChallenge(c, challenge, solution)
l.Infof("invalid solution provided: %s", nonce)
return
}
l.Infof("challenge passed: %s", nonce)
l.Info("challenge passed")
// Drop solution query and encode.
query.Del("nollamas_solution")
@ -233,7 +248,7 @@ func (m *nollamas) Serve(c *gin.Context) {
c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
}
func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
func (m *nollamas) renderChallenge(c *gin.Context, seed, challenge string) {
// Fetch current instance information for templating vars.
instance, errWithCode := m.getInstanceV1(c.Request.Context())
if errWithCode != nil {
@ -252,8 +267,8 @@ func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
"/assets/Fork-Awesome/css/fork-awesome.min.css",
},
Extra: map[string]any{
"challenge": challenge,
"difficulty": m.diff,
"seed": seed,
"challenge": challenge,
},
Javascript: []apiutil.JavascriptEntry{
{
@ -264,23 +279,25 @@ func (m *nollamas) renderChallenge(c *gin.Context, challenge string) {
})
}
func (m *nollamas) token(hash *hashWithBufs, userAgent, clientIP string) string {
// Use our unique seed to seed hash,
// getToken generates a unique yet deterministic token for given HTTP request
// details, seeded by runtime generated entropy data and ttl rounded timestamp.
func (m *nollamas) getToken(hash *hashWithBufs, userAgent, clientIP string) string {
// Reset before
// using hash.
hash.Reset()
// Use our unique entropy to seed hash,
// to ensure we have cryptographically
// unique, yet deterministic, tokens
// generated for a given http client.
hash.hash.Write(m.seed)
// Include difficulty level in
// hash input data so if config
// changes then token invalidates.
hash.hash.Write([]byte{m.diff})
hash.write(m.entropy[:])
// Also seed the generated input with
// current time rounded to TTL, so our
// single comparison handles expiries.
now := time.Now().Round(m.ttl).Unix()
hash.hash.Write([]byte{
hash.write([]byte{
byte(now >> 56),
byte(now >> 48),
byte(now >> 40),
@ -291,37 +308,78 @@ func (m *nollamas) token(hash *hashWithBufs, userAgent, clientIP string) string
byte(now),
})
// Finally, append unique client request data.
hash.hash.Write(byteutil.S2B(userAgent))
hash.hash.Write(byteutil.S2B(clientIP))
// Append client request data.
hash.writeString(userAgent)
hash.writeString(clientIP)
// Return hex encoded hash output.
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
hex.Encode(hash.ebuf, hash.hbuf)
return string(hash.ebuf)
// Return hex encoded hash.
return hash.EncodedSum()
}
func (m *nollamas) checkChallenge(hash *hashWithBufs, challenge, nonce string) bool {
// Hash and encode input challenge with
// proposed nonce as a possible solution.
hash.hash.Write(byteutil.S2B(challenge))
hash.hash.Write(byteutil.S2B(nonce))
hash.hbuf = hash.hash.Sum(hash.hbuf[:0])
hex.Encode(hash.ebuf, hash.hbuf)
solution := hash.ebuf
// getChallenge prepares a new challenge given the deterministic input token for this request.
// it will return an input seed string, a challenge string which is the end result the client
// should be looking for, and the solution for this such that challenge = hex(sha256(seed + solution)).
// the solution will always be a string-encoded 64bit integer calculated from m.rounds + random jitter.
func (m *nollamas) getChallenge(hash *hashWithBufs, token string) (seed, challenge, solution string) {
// Compiler bound-check hint.
if len(solution) < int(m.diff) {
panic(gtserror.New("BCE"))
// For their unique seed string just use a
// single portion of their 'success' token.
// SHA256 is not yet cracked, this is not an
// application of a hash requiring serious
// cryptographic security and it rotates on
// a TTL basis, so it should be fine.
seed = token[:len(token)/4]
// BEFORE resetting the hash, get the last
// two bytes of NON-hex-encoded data from
// token generation to use for random jitter.
// This is taken from the end of the hash as
// this is the "unseen" end part of token.
//
// (if we used hex-encoded data it would
// only ever be '0-9' or 'a-z' ASCII chars).
//
// Security-wise, same applies as-above.
jitter := int16(hash.hbuf[len(hash.hbuf)-2]) |
int16(hash.hbuf[len(hash.hbuf)-1])<<8
var rounds int64
switch {
// For some small percentage of
// clients we purposely low-ball
// their rounds required, to make
// it so gaming it with a starting
// nonce value may suddenly fail.
case jitter%37 == 0:
rounds = int64(m.rounds/10) + int64(jitter/10)
case jitter%31 == 0:
rounds = int64(m.rounds/5) + int64(jitter/5)
case jitter%29 == 0:
rounds = int64(m.rounds/3) + int64(jitter/3)
case jitter%13 == 0:
rounds = int64(m.rounds/2) + int64(jitter/2)
// Determine an appropriate number of hash rounds
// we want the client to perform on input seed. This
// is determined as configured m.rounds +- jitter.
// This will be the 'solution' to create 'challenge'.
default:
rounds = int64(m.rounds) + int64(jitter) //nolint:gosec
}
// Check that the first 'diff'
// many chars are indeed zeroes.
for i := range m.diff {
if solution[i] != '0' {
return false
}
}
// Encode (positive) determined hash rounds as string.
solution = strconv.FormatInt(bitutil.Abs64(rounds), 10)
return true
// Reset before
// using hash.
hash.Reset()
// Calculate the expected result
// of hex(sha256(seed + solution)),
// i.e. the proposed 'challenge'.
hash.writeString(seed)
hash.writeString(solution)
challenge = hash.EncodedSum()
return
}

View file

@ -95,41 +95,39 @@ func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
panic(err)
}
var seed string
var challenge string
var difficulty uint64
// Parse output body and find the challenge / difficulty.
for _, line := range strings.Split(string(b), "\n") {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "data-nollamas-seed=\""):
line = line[20:]
line = line[:len(line)-1]
seed = line
case strings.HasPrefix(line, "data-nollamas-challenge=\""):
line = line[25:]
line = line[:len(line)-1]
challenge = line
case strings.HasPrefix(line, "data-nollamas-difficulty=\""):
line = line[26:]
line = line[:len(line)-1]
var err error
difficulty, err = strconv.ParseUint(line, 10, 8)
assert.NoError(t, err)
}
}
// Ensure valid posed challenge.
assert.NotZero(t, difficulty)
assert.NotEmpty(t, challenge)
assert.NotEmpty(t, seed)
// Prepare a test request for gin engine.
r = httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw = httptest.NewRecorder()
// Now compute and set solution query paramater.
solution := computeSolution(challenge, difficulty)
r.URL.RawQuery = "nollamas_solution=" + solution
t.Logf("seed=%s", seed)
t.Logf("challenge=%s", challenge)
t.Logf("difficulty=%d", difficulty)
// Now compute and set solution query paramater.
solution := computeSolution(seed, challenge)
r.URL.RawQuery = "nollamas_solution=" + solution
t.Logf("solution=%s", solution)
// Pass req through
@ -152,17 +150,14 @@ func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
}
// computeSolution does the functional equivalent of our nollamas workerTask.js.
func computeSolution(challenge string, diff uint64) string {
outer:
func computeSolution(seed, challenge string) string {
for i := 0; ; i++ {
solution := strconv.Itoa(i)
combined := challenge + solution
combined := seed + solution
hash := sha256.Sum256(byteutil.S2B(combined))
encoded := hex.EncodeToString(hash[:])
for i := range diff {
if encoded[i] != '0' {
continue outer
}
if encoded != challenge {
continue
}
return solution
}

View file

@ -217,10 +217,6 @@ func (p *Processor) Create(
return nil, errWithCode
}
if errWithCode := p.processThreadID(ctx, status); errWithCode != nil {
return nil, errWithCode
}
// Process the incoming created status visibility.
processVisibility(form, requester.Settings.Privacy, status)
@ -444,46 +440,6 @@ func (p *Processor) processInReplyTo(
return nil
}
func (p *Processor) processThreadID(ctx context.Context, status *gtsmodel.Status) gtserror.WithCode {
// Status takes the thread ID of
// whatever it replies to, if set.
//
// Might not be set if status is local
// and replies to a remote status that
// doesn't have a thread ID yet.
//
// If so, we can just thread from this
// status onwards instead, since this
// is where the relevant part of the
// thread starts, from the perspective
// of our instance at least.
if status.InReplyTo != nil &&
status.InReplyTo.ThreadID != "" {
// Just inherit threadID from parent.
status.ThreadID = status.InReplyTo.ThreadID
return nil
}
// Mark new thread (or threaded
// subsection) starting from here.
threadID := id.NewULID()
if err := p.state.DB.PutThread(
ctx,
&gtsmodel.Thread{
ID: threadID,
},
); err != nil {
err := gtserror.Newf("error inserting new thread in db: %w", err)
return gtserror.NewErrorInternalError(err)
}
// Future replies to this status
// (if any) will inherit this thread ID.
status.ThreadID = threadID
return nil
}
func processVisibility(
form *apimodel.StatusCreateRequest,
accountDefaultVis gtsmodel.Visibility,