[performance] filter model and database table improvements (#4277)

- removes unnecessary fields / columns (created_at, updated_at)
- replaces filter.context_* columns with singular filter.contexts bit field which should save both struct memory and database space
- replaces filter.action string with integer enum type which should save both struct memory and database space
- adds links from filter to filter_* tables with Filter{}.KeywordIDs and Filter{}.StatusIDs fields (this also means we now have those ID slices cached, which reduces some lookups)
- removes account_id fields from filter_* tables, since there's a more direct connection between filter and filter_* tables, and filter.account_id already exists
- refactors a bunch of the filter processor logic to save on code repetition, factor in the above changes, fix a few bugs with missed error returns and bring it more in-line with some of our newer code

Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4277
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2025-06-24 17:24:34 +02:00 committed by tobi
commit 996da6e029
82 changed files with 2440 additions and 1722 deletions

View file

@ -20,7 +20,7 @@ package migrations
import (
"context"
"code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
gtsmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20241018151036_filter_unique_fix"
"github.com/uptrace/bun"
"github.com/uptrace/bun/dialect"
)

View file

@ -0,0 +1,77 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
import (
"regexp"
"time"
)
// Filter stores a filter created by a local account.
type Filter struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
ExpiresAt time.Time `bun:"type:timestamptz,nullzero"` // Time filter should expire. If null, should not expire.
AccountID string `bun:"type:CHAR(26),notnull,nullzero,unique:filters_account_id_title_uniq"` // ID of the local account that created the filter.
Title string `bun:",nullzero,notnull,unique:filters_account_id_title_uniq"` // The name of the filter.
Action FilterAction `bun:",nullzero,notnull"` // The action to take.
Keywords []*FilterKeyword `bun:"-"` // Keywords for this filter.
Statuses []*FilterStatus `bun:"-"` // Statuses for this filter.
ContextHome *bool `bun:",nullzero,notnull,default:false"` // Apply filter to home timeline and lists.
ContextNotifications *bool `bun:",nullzero,notnull,default:false"` // Apply filter to notifications.
ContextPublic *bool `bun:",nullzero,notnull,default:false"` // Apply filter to home timeline and lists.
ContextThread *bool `bun:",nullzero,notnull,default:false"` // Apply filter when viewing a status's associated thread.
ContextAccount *bool `bun:",nullzero,notnull,default:false"` // Apply filter when viewing an account profile.
}
// FilterKeyword stores a single keyword to filter statuses against.
type FilterKeyword struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
}
// FilterStatus stores a single status to filter.
type FilterStatus struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_statuses_filter_id_status_id_uniq"` // ID of the filter that this keyword belongs to.
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
StatusID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_statuses_filter_id_status_id_uniq"` // ID of the status to filter.
}
// FilterAction represents the action to take on a filtered status.
type FilterAction string
const (
// FilterActionNone filters should not exist, except internally, for partially constructed or invalid filters.
FilterActionNone FilterAction = ""
// FilterActionWarn means that the status should be shown behind a warning.
FilterActionWarn FilterAction = "warn"
// FilterActionHide means that the status should be removed from timeline results entirely.
FilterActionHide FilterAction = "hide"
)

View file

@ -0,0 +1,303 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package migrations
import (
"context"
"database/sql"
"errors"
"reflect"
"strings"
oldmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20241018151036_filter_unique_fix"
newmodel "code.superseriousbusiness.org/gotosocial/internal/db/bundb/migrations/20250617122055_filter_improvements"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
// Replace 'context_*' and 'action' columns with space-saving enum / bitfields.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
newFilterType := reflect.TypeOf((*newmodel.Filter)(nil))
// Generate bun definition for new filter table contexts column.
newColDef, err := getBunColumnDef(tx, newFilterType, "Contexts")
if err != nil {
return gtserror.Newf("error getting bun column def: %w", err)
}
// Add new column type to table.
if _, err := tx.NewAddColumn().
Model((*oldmodel.Filter)(nil)).
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding filter.contexts column: %w", err)
}
// Generate bun definition for new filter table action column.
newColDef, err = getBunColumnDef(tx, newFilterType, "Action")
if err != nil {
return gtserror.Newf("error getting bun column def: %w", err)
}
// For now, name it as '_new'.
newColDef = strings.ReplaceAll(
newColDef,
"action",
"action_new",
)
// Add new column type to table.
if _, err := tx.NewAddColumn().
Model((*oldmodel.Filter)(nil)).
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding filter.contexts column: %w", err)
}
var oldFilters []*oldmodel.Filter
// Select all filters.
if err := tx.NewSelect().
Model(&oldFilters).
Column("id",
"context_home",
"context_notifications",
"context_public",
"context_thread",
"context_account",
"action").
Scan(ctx); err != nil {
return gtserror.Newf("error selecting filters: %w", err)
}
for _, oldFilter := range oldFilters {
var newContexts newmodel.FilterContexts
var newAction newmodel.FilterAction
// Convert old contexts
// to new contexts type.
if *oldFilter.ContextHome {
newContexts.SetHome()
}
if *oldFilter.ContextNotifications {
newContexts.SetNotifications()
}
if *oldFilter.ContextPublic {
newContexts.SetPublic()
}
if *oldFilter.ContextThread {
newContexts.SetThread()
}
if *oldFilter.ContextAccount {
newContexts.SetAccount()
}
// Convert old action
// to new action type.
switch oldFilter.Action {
case oldmodel.FilterActionHide:
newAction = newmodel.FilterActionHide
case oldmodel.FilterActionWarn:
newAction = newmodel.FilterActionWarn
default:
return gtserror.Newf("invalid filter action %q for %s", oldFilter.Action, oldFilter.ID)
}
// Update filter row with
// the new contexts value.
if _, err := tx.NewUpdate().
Model((*oldmodel.Filter)(nil)).
Where("? = ?", bun.Ident("id"), oldFilter.ID).
Set("? = ?", bun.Ident("contexts"), newContexts).
Set("? = ?", bun.Ident("action_new"), newAction).
Exec(ctx); err != nil {
return gtserror.Newf("error updating filter.contexts: %w", err)
}
}
// Drop the old updated columns.
for _, col := range []string{
"context_home",
"context_notifications",
"context_public",
"context_thread",
"context_account",
"action",
} {
if _, err := tx.NewDropColumn().
Model((*oldmodel.Filter)(nil)).
Column(col).
Exec(ctx); err != nil {
return gtserror.Newf("error dropping filter.%s column: %w", col, err)
}
}
// Rename the new action
// column to correct name.
if _, err := tx.NewRaw(
"ALTER TABLE ? RENAME COLUMN ? TO ?",
bun.Ident("filters"),
bun.Ident("action_new"),
bun.Ident("action"),
).Exec(ctx); err != nil {
return gtserror.Newf("error renaming new action column: %w", err)
}
return nil
}); err != nil {
return err
}
// SQLITE: force WAL checkpoint to merge writes.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
// Drop a bunch of (now, and more generally) unused columns from filter tables.
if err := db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
for model, indices := range map[any][]string{
(*oldmodel.FilterKeyword)(nil): {"filter_keywords_account_id_idx"},
(*oldmodel.FilterStatus)(nil): {"filter_statuses_account_id_idx"},
} {
for _, index := range indices {
if _, err := tx.NewDropIndex().
Model(model).
Index(index).
Exec(ctx); err != nil {
return gtserror.Newf("error dropping %s index: %w", index, err)
}
}
}
for model, cols := range map[any][]string{
(*oldmodel.Filter)(nil): {"created_at", "updated_at"},
(*oldmodel.FilterKeyword)(nil): {"created_at", "updated_at", "account_id"},
(*oldmodel.FilterStatus)(nil): {"created_at", "updated_at", "account_id"},
} {
for _, col := range cols {
if _, err := tx.NewDropColumn().
Model(model).
Column(col).
Exec(ctx); err != nil {
return gtserror.Newf("error dropping %T.%s column: %w", model, col, err)
}
}
}
return nil
}); err != nil {
return err
}
// SQLITE: force WAL checkpoint to merge writes.
if err := doWALCheckpoint(ctx, db); err != nil {
return err
}
// Create links from 'filters' table to 'filter_{keywords,statuses}' tables.
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
newFilterType := reflect.TypeOf((*newmodel.Filter)(nil))
var filterIDs string
// Select all filter IDs.
if err := tx.NewSelect().
Model((*newmodel.Filter)(nil)).
Column("id").
Scan(ctx, &filterIDs); err != nil && !errors.Is(err, sql.ErrNoRows) {
return gtserror.Newf("error selecting filter ids: %w", err)
}
for _, data := range []struct {
Field string
Model any
}{
{
Field: "KeywordIDs",
Model: (*newmodel.FilterKeyword)(nil),
},
{
Field: "StatusIDs",
Model: (*newmodel.FilterStatus)(nil),
},
} {
// Generate bun definition for new filter table field column.
newColDef, err := getBunColumnDef(tx, newFilterType, data.Field)
if err != nil {
return gtserror.Newf("error getting bun column def: %w", err)
}
// Add new column type to table.
if _, err := tx.NewAddColumn().
Model((*oldmodel.Filter)(nil)).
ColumnExpr(newColDef).
Exec(ctx); err != nil {
return gtserror.Newf("error adding filter.%s column: %w", data.Field, err)
}
// Get the SQL field information from bun for Filter{}.$Field.
field, _, err := getModelField(tx, newFilterType, data.Field)
if err != nil {
return gtserror.Newf("error getting bun model field: %w", err)
}
// Extract column name.
col := field.SQLName
var relatedIDs []string
for _, filterID := range filterIDs {
// Reset related IDs.
clear(relatedIDs)
relatedIDs = relatedIDs[:0]
// Select $Model IDs that
// are attached to filterID.
if err := tx.NewSelect().
Model(data.Model).
Column("id").
Where("? = ?", bun.Ident("filter_id"), filterID).
Scan(ctx, &relatedIDs); err != nil {
return gtserror.Newf("error selecting %T ids: %w", data.Model, err)
}
// Now update the relevant filter
// row to contain these related IDs.
if _, err := tx.NewUpdate().
Model((*newmodel.Filter)(nil)).
Where("? = ?", bun.Ident("id"), filterID).
Set("? = ?", bun.Ident(col), relatedIDs).
Exec(ctx); err != nil {
return gtserror.Newf("error updating filters.%s ids: %w", col, err)
}
}
}
return nil
})
}
down := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return nil
})
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}

View file

@ -0,0 +1,243 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package gtsmodel
import (
"regexp"
"time"
"code.superseriousbusiness.org/gotosocial/internal/util"
)
// smallint is the largest size supported
// by a PostgreSQL SMALLINT, since an SQLite
// SMALLINT is actually variable in size.
type smallint int16
// enumType is the type we (at least, should) use
// for database enum types, as smallest int size.
type enumType smallint
// bitFieldType is the type we use
// for database int bit fields, at
// least where the smallest int size
// will suffice for number of fields.
type bitFieldType smallint
// FilterContext represents the
// context in which a Filter applies.
//
// These are used as bit-field masks to determine
// which are enabled in a FilterContexts bit field,
// as well as to signify internally any particular
// context in which a status should be filtered in.
type FilterContext bitFieldType
const (
// FilterContextNone means no filters should
// be applied, this is for internal use only.
FilterContextNone FilterContext = 0
// FilterContextHome means this status is being
// filtered as part of a home or list timeline.
FilterContextHome FilterContext = 1 << 1
// FilterContextNotifications means this status is
// being filtered as part of the notifications timeline.
FilterContextNotifications FilterContext = 1 << 2
// FilterContextPublic means this status is
// being filtered as part of a public or tag timeline.
FilterContextPublic FilterContext = 1 << 3
// FilterContextThread means this status is
// being filtered as part of a thread's context.
FilterContextThread FilterContext = 1 << 4
// FilterContextAccount means this status is
// being filtered as part of an account's statuses.
FilterContextAccount FilterContext = 1 << 5
)
// FilterContexts stores multiple contexts
// in which a Filter applies as bits in an int.
type FilterContexts bitFieldType
// Applies returns whether receiving FilterContexts applies in FilterContexts.
func (ctxs FilterContexts) Applies(ctx FilterContext) bool {
switch ctx {
case FilterContextHome:
return ctxs.Home()
case FilterContextNotifications:
return ctxs.Notifications()
case FilterContextPublic:
return ctxs.Public()
case FilterContextThread:
return ctxs.Thread()
case FilterContextAccount:
return ctxs.Account()
default:
return false
}
}
// Home returns whether FilterContextHome is set.
func (ctxs FilterContexts) Home() bool {
return ctxs&FilterContexts(FilterContextHome) != 0
}
// SetHome will set the FilterContextHome bit.
func (ctxs *FilterContexts) SetHome() {
*ctxs |= FilterContexts(FilterContextHome)
}
// UnsetHome will unset the FilterContextHome bit.
func (ctxs *FilterContexts) UnsetHome() {
*ctxs &= ^FilterContexts(FilterContextHome)
}
// Notifications returns whether FilterContextNotifications is set.
func (ctxs FilterContexts) Notifications() bool {
return ctxs&FilterContexts(FilterContextNotifications) != 0
}
// SetNotifications will set the FilterContextNotifications bit.
func (ctxs *FilterContexts) SetNotifications() {
*ctxs |= FilterContexts(FilterContextNotifications)
}
// UnsetNotifications will unset the FilterContextNotifications bit.
func (ctxs *FilterContexts) UnsetNotifications() {
*ctxs &= ^FilterContexts(FilterContextNotifications)
}
// Public returns whether FilterContextPublic is set.
func (ctxs FilterContexts) Public() bool {
return ctxs&FilterContexts(FilterContextPublic) != 0
}
// SetPublic will set the FilterContextPublic bit.
func (ctxs *FilterContexts) SetPublic() {
*ctxs |= FilterContexts(FilterContextPublic)
}
// UnsetPublic will unset the FilterContextPublic bit.
func (ctxs *FilterContexts) UnsetPublic() {
*ctxs &= ^FilterContexts(FilterContextPublic)
}
// Thread returns whether FilterContextThread is set.
func (ctxs FilterContexts) Thread() bool {
return ctxs&FilterContexts(FilterContextThread) != 0
}
// SetThread will set the FilterContextThread bit.
func (ctxs *FilterContexts) SetThread() {
*ctxs |= FilterContexts(FilterContextThread)
}
// UnsetThread will unset the FilterContextThread bit.
func (ctxs *FilterContexts) UnsetThread() {
*ctxs &= ^FilterContexts(FilterContextThread)
}
// Account returns whether FilterContextAccount is set.
func (ctxs FilterContexts) Account() bool {
return ctxs&FilterContexts(FilterContextAccount) != 0
}
// SetAccount will set / unset the FilterContextAccount bit.
func (ctxs *FilterContexts) SetAccount() {
*ctxs |= FilterContexts(FilterContextAccount)
}
// UnsetAccount will unset the FilterContextAccount bit.
func (ctxs *FilterContexts) UnsetAccount() {
*ctxs &= ^FilterContexts(FilterContextAccount)
}
// FilterAction represents the action
// to take on a filtered status.
type FilterAction enumType
const (
// FilterActionNone filters should not exist, except
// internally, for partially constructed or invalid filters.
FilterActionNone FilterAction = 0
// FilterActionWarn means that the
// status should be shown behind a warning.
FilterActionWarn FilterAction = 1
// FilterActionHide means that the status should
// be removed from timeline results entirely.
FilterActionHide FilterAction = 2
)
// Filter stores a filter created by a local account.
type Filter struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
ExpiresAt time.Time `bun:"type:timestamptz,nullzero"` // Time filter should expire. If null, should not expire.
AccountID string `bun:"type:CHAR(26),notnull,nullzero,unique:filters_account_id_title_uniq"` // ID of the local account that created the filter.
Title string `bun:",nullzero,notnull,unique:filters_account_id_title_uniq"` // The name of the filter.
Action FilterAction `bun:",nullzero,notnull,default:0"` // The action to take.
Keywords []*FilterKeyword `bun:"-"` // Keywords for this filter.
KeywordIDs []string `bun:"keywords,array"` //
Statuses []*FilterStatus `bun:"-"` // Statuses for this filter.
StatusIDs []string `bun:"statuses,array"` //
Contexts FilterContexts `bun:",nullzero,notnull,default:0"` // Which contexts does this filter apply in?
}
// FilterKeyword stores a single keyword to filter statuses against.
type FilterKeyword struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
}
// Compile will compile this FilterKeyword as a prepared regular expression.
func (k *FilterKeyword) Compile() (err error) {
var (
wordBreakStart string
wordBreakEnd string
)
if util.PtrOrZero(k.WholeWord) {
// Either word boundary or
// whitespace or start of line.
wordBreakStart = `(?:\b|\s|^)`
// Either word boundary or
// whitespace or end of line.
wordBreakEnd = `(?:\b|\s|$)`
}
// Compile keyword filter regexp.
quoted := regexp.QuoteMeta(k.Keyword)
k.Regexp, err = regexp.Compile(`(?i)` + wordBreakStart + quoted + wordBreakEnd)
return // caller is expected to wrap this error
}
// FilterStatus stores a single status to filter.
type FilterStatus struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_statuses_filter_id_status_id_uniq"` // ID of the filter that this keyword belongs to.
StatusID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_statuses_filter_id_status_id_uniq"` // ID of the status to filter.
}