[feature/performance] support uncaching remote emoji + scheduled cleanup functions (#1987)

This commit is contained in:
kim 2023-07-24 13:14:13 +01:00 committed by GitHub
commit 9eff0d46e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 1287 additions and 219 deletions

View file

@ -73,6 +73,9 @@ type Account interface {
// GetAccountFaves fetches faves/likes created by the target accountID.
GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, Error)
// GetAccountsUsingEmoji fetches all account models using emoji with given ID stored in their 'emojis' column.
GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error)
// GetAccountStatusesCount is a shortcut for the common action of counting statuses produced by accountID.
CountAccountStatuses(ctx context.Context, accountID string) (int, Error)

View file

@ -56,6 +56,27 @@ func (a *accountDB) GetAccountByID(ctx context.Context, id string) (*gtsmodel.Ac
)
}
func (a *accountDB) GetAccountsByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Account, error) {
accounts := make([]*gtsmodel.Account, 0, len(ids))
for _, id := range ids {
// Attempt to fetch account from DB.
account, err := a.GetAccountByID(
gtscontext.SetBarebones(ctx),
id,
)
if err != nil {
log.Errorf(ctx, "error getting account %q: %v", id, err)
continue
}
// Append account to return slice.
accounts = append(accounts, account)
}
return accounts, nil
}
func (a *accountDB) GetAccountByURI(ctx context.Context, uri string) (*gtsmodel.Account, db.Error) {
return a.getAccount(
ctx,
@ -444,6 +465,34 @@ func (a *accountDB) GetAccountCustomCSSByUsername(ctx context.Context, username
return account.CustomCSS, nil
}
func (a *accountDB) GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error) {
var accountIDs []string
// Create SELECT account query.
q := a.conn.NewSelect().
Table("accounts").
Column("id")
// Append a WHERE LIKE clause to the query
// that checks the `emoji` column for any
// text containing this specific emoji ID.
//
// The reason we do this instead of doing a
// `WHERE ? IN (emojis)` is that the latter
// ends up being much MUCH slower, and the
// database stores this ID-array-column as
// text anyways, allowing a simple LIKE query.
q = whereLike(q, "emojis", emojiID)
// Execute the query, scanning destination into accountIDs.
if _, err := q.Exec(ctx, &accountIDs); err != nil {
return nil, a.conn.ProcessError(err)
}
// Convert account IDs into account objects.
return a.GetAccountsByIDs(ctx, accountIDs)
}
func (a *accountDB) GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, db.Error) {
faves := new([]*gtsmodel.StatusFave)

View file

@ -126,12 +126,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error {
return err
}
// Select all accounts using this emoji.
if _, err := tx.NewSelect().
// Prepare SELECT accounts query.
aq := tx.NewSelect().
Table("accounts").
Column("id").
Where("? IN (emojis)", id).
Exec(ctx, &accountIDs); err != nil {
Column("id")
// Append a WHERE LIKE clause to the query
// that checks the `emoji` column for any
// text containing this specific emoji ID.
//
// (see GetStatusesUsingEmoji() for details.)
aq = whereLike(aq, "emojis", id)
// Select all accounts using this emoji into accountIDss.
if _, err := aq.Exec(ctx, &accountIDs); err != nil {
return err
}
@ -162,12 +170,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error {
}
}
// Select all statuses using this emoji.
if _, err := tx.NewSelect().
// Prepare SELECT statuses query.
sq := tx.NewSelect().
Table("statuses").
Column("id").
Where("? IN (emojis)", id).
Exec(ctx, &statusIDs); err != nil {
Column("id")
// Append a WHERE LIKE clause to the query
// that checks the `emoji` column for any
// text containing this specific emoji ID.
//
// (see GetStatusesUsingEmoji() for details.)
sq = whereLike(sq, "emojis", id)
// Select all statuses using this emoji into statusIDs.
if _, err := sq.Exec(ctx, &statusIDs); err != nil {
return err
}
@ -328,7 +344,7 @@ func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisable
}
func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) {
emojiIDs := []string{}
var emojiIDs []string
q := e.conn.NewSelect().
Table("emojis").
@ -336,7 +352,7 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt
Order("id DESC")
if maxID != "" {
q = q.Where("? < ?", bun.Ident("id"), maxID)
q = q.Where("id < ?", maxID)
}
if limit != 0 {
@ -350,6 +366,52 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt
return e.GetEmojisByIDs(ctx, emojiIDs)
}
func (e *emojiDB) GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) {
var emojiIDs []string
q := e.conn.NewSelect().
Table("emojis").
Column("id").
Where("domain IS NOT NULL").
Order("id DESC")
if maxID != "" {
q = q.Where("id < ?", maxID)
}
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx, &emojiIDs); err != nil {
return nil, e.conn.ProcessError(err)
}
return e.GetEmojisByIDs(ctx, emojiIDs)
}
func (e *emojiDB) GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) {
var emojiIDs []string
q := e.conn.NewSelect().
Table("emojis").
Column("id").
Where("cached = true").
Where("domain IS NOT NULL").
Where("created_at < ?", olderThan).
Order("created_at DESC")
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx, &emojiIDs); err != nil {
return nil, e.conn.ProcessError(err)
}
return e.GetEmojisByIDs(ctx, emojiIDs)
}
func (e *emojiDB) GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, db.Error) {
emojiIDs := []string{}

View file

@ -232,29 +232,6 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error {
return m.conn.ProcessError(err)
}
func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachmentIDs := []string{}
q := m.conn.
NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
Column("media_attachment.id").
Where("? = ?", bun.Ident("media_attachment.cached"), true).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
Where("? IS NOT NULL", bun.Ident("media_attachment.remote_url")).
Order("media_attachment.created_at DESC")
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx, &attachmentIDs); err != nil {
return nil, m.conn.ProcessError(err)
}
return m.GetAttachmentsByIDs(ctx, attachmentIDs)
}
func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {
q := m.conn.
NewSelect().
@ -273,7 +250,7 @@ func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time)
}
func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) {
attachmentIDs := []string{}
attachmentIDs := make([]string, 0, limit)
q := m.conn.NewSelect().
Table("media_attachments").
@ -281,7 +258,7 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) (
Order("id DESC")
if maxID != "" {
q = q.Where("? < ?", bun.Ident("id"), maxID)
q = q.Where("id < ?", maxID)
}
if limit != 0 {
@ -295,8 +272,55 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) (
return m.GetAttachmentsByIDs(ctx, attachmentIDs)
}
func (m *mediaDB) GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) {
attachmentIDs := make([]string, 0, limit)
q := m.conn.NewSelect().
Table("media_attachments").
Column("id").
Where("remote_url IS NOT NULL").
Order("id DESC")
if maxID != "" {
q = q.Where("id < ?", maxID)
}
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx, &attachmentIDs); err != nil {
return nil, m.conn.ProcessError(err)
}
return m.GetAttachmentsByIDs(ctx, attachmentIDs)
}
func (m *mediaDB) GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachmentIDs := make([]string, 0, limit)
q := m.conn.
NewSelect().
Table("media_attachments").
Column("id").
Where("cached = true").
Where("remote_url IS NOT NULL").
Where("created_at < ?", olderThan).
Order("created_at DESC")
if limit != 0 {
q = q.Limit(limit)
}
if err := q.Scan(ctx, &attachmentIDs); err != nil {
return nil, m.conn.ProcessError(err)
}
return m.GetAttachmentsByIDs(ctx, attachmentIDs)
}
func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachmentIDs := []string{}
attachmentIDs := make([]string, 0, limit)
q := m.conn.NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
@ -324,7 +348,7 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit
}
func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachmentIDs := []string{}
attachmentIDs := make([]string, 0, limit)
q := m.conn.
NewSelect().

View file

@ -38,7 +38,7 @@ func (suite *MediaTestSuite) TestGetAttachmentByID() {
}
func (suite *MediaTestSuite) TestGetOlder() {
attachments, err := suite.db.GetRemoteOlderThan(context.Background(), time.Now(), 20)
attachments, err := suite.db.GetCachedAttachmentsOlderThan(context.Background(), time.Now(), 20)
suite.NoError(err)
suite.Len(attachments, 2)
}

View file

@ -0,0 +1,55 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package migrations
import (
"context"
"strings"
"github.com/uptrace/bun"
)
func init() {
up := func(ctx context.Context, db *bun.DB) error {
_, err := db.ExecContext(ctx, "ALTER TABLE emojis ADD COLUMN cached BOOLEAN DEFAULT false")
if err != nil && !(strings.Contains(err.Error(), "already exists") || strings.Contains(err.Error(), "duplicate column name") || strings.Contains(err.Error(), "SQLSTATE 42701")) {
return err
}
if _, err := db.NewUpdate().
Table("emojis").
Where("disabled = false").
Set("cached = true").
Exec(ctx); err != nil {
return err
}
return nil
}
down := func(ctx context.Context, db *bun.DB) error {
return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
return nil
})
}
if err := Migrations.Register(up, down); err != nil {
panic(err)
}
}

View file

@ -149,7 +149,7 @@ func (r *reportDB) getReport(ctx context.Context, lookup string, dbQuery func(*g
if len(report.StatusIDs) > 0 {
// Fetch reported statuses
report.Statuses, err = r.state.DB.GetStatuses(ctx, report.StatusIDs)
report.Statuses, err = r.state.DB.GetStatusesByIDs(ctx, report.StatusIDs)
if err != nil {
return nil, fmt.Errorf("error getting status mentions: %w", err)
}

View file

@ -19,7 +19,6 @@ package bundb
import (
"context"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
@ -61,40 +60,6 @@ type searchDB struct {
state *state.State
}
// replacer is a thread-safe string replacer which escapes
// common SQLite + Postgres `LIKE` wildcard chars using the
// escape character `\`. Initialized as a var in this package
// so it can be reused.
var replacer = strings.NewReplacer(
`\`, `\\`, // Escape char.
`%`, `\%`, // Zero or more char.
`_`, `\_`, // Exactly one char.
)
// whereSubqueryLike appends a WHERE clause to the
// given SelectQuery q, which searches for matches
// of searchQuery in the given subQuery using LIKE.
func whereSubqueryLike(
q *bun.SelectQuery,
subQuery *bun.SelectQuery,
searchQuery string,
) *bun.SelectQuery {
// Escape existing wildcard + escape
// chars in the search query string.
searchQuery = replacer.Replace(searchQuery)
// Add our own wildcards back in; search
// zero or more chars around the query.
searchQuery = `%` + searchQuery + `%`
// Append resulting WHERE
// clause to the main query.
return q.Where(
"(?) LIKE ? ESCAPE ?",
subQuery, searchQuery, `\`,
)
}
// Query example (SQLite):
//
// SELECT "account"."id" FROM "accounts" AS "account"
@ -167,7 +132,7 @@ func (s *searchDB) SearchForAccounts(
// Search using LIKE for matches of query
// string within accountText subquery.
q = whereSubqueryLike(q, accountTextSubq, query)
q = whereLike(q, accountTextSubq, query)
if limit > 0 {
// Limit amount of accounts returned.
@ -345,7 +310,7 @@ func (s *searchDB) SearchForStatuses(
// Search using LIKE for matches of query
// string within statusText subquery.
q = whereSubqueryLike(q, statusTextSubq, query)
q = whereLike(q, statusTextSubq, query)
if limit > 0 {
// Limit amount of statuses returned.

View file

@ -58,18 +58,18 @@ func (s *statusDB) GetStatusByID(ctx context.Context, id string) (*gtsmodel.Stat
)
}
func (s *statusDB) GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, db.Error) {
func (s *statusDB) GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error) {
statuses := make([]*gtsmodel.Status, 0, len(ids))
for _, id := range ids {
// Attempt fetch from DB
// Attempt to fetch status from DB.
status, err := s.GetStatusByID(ctx, id)
if err != nil {
log.Errorf(ctx, "error getting status %q: %v", id, err)
continue
}
// Append status
// Append status to return slice.
statuses = append(statuses, status)
}
@ -429,6 +429,34 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) db.Error {
})
}
func (s *statusDB) GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error) {
var statusIDs []string
// Create SELECT status query.
q := s.conn.NewSelect().
Table("statuses").
Column("id")
// Append a WHERE LIKE clause to the query
// that checks the `emoji` column for any
// text containing this specific emoji ID.
//
// The reason we do this instead of doing a
// `WHERE ? IN (emojis)` is that the latter
// ends up being much MUCH slower, and the
// database stores this ID-array-column as
// text anyways, allowing a simple LIKE query.
q = whereLike(q, "emojis", emojiID)
// Execute the query, scanning destination into statusIDs.
if _, err := q.Exec(ctx, &statusIDs); err != nil {
return nil, s.conn.ProcessError(err)
}
// Convert status IDs into status objects.
return s.GetStatusesByIDs(ctx, statusIDs)
}
func (s *statusDB) GetStatusParents(ctx context.Context, status *gtsmodel.Status, onlyDirect bool) ([]*gtsmodel.Status, db.Error) {
if onlyDirect {
// Only want the direct parent, no further than first level

View file

@ -50,13 +50,13 @@ func (suite *StatusTestSuite) TestGetStatusByID() {
suite.True(*status.Likeable)
}
func (suite *StatusTestSuite) TestGetStatusesByID() {
func (suite *StatusTestSuite) TestGetStatusesByIDs() {
ids := []string{
suite.testStatuses["local_account_1_status_1"].ID,
suite.testStatuses["local_account_2_status_3"].ID,
}
statuses, err := suite.db.GetStatuses(context.Background(), ids)
statuses, err := suite.db.GetStatusesByIDs(context.Background(), ids)
if err != nil {
suite.FailNow(err.Error())
}

View file

@ -18,10 +18,46 @@
package bundb
import (
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/uptrace/bun"
)
// likeEscaper is a thread-safe string replacer which escapes
// common SQLite + Postgres `LIKE` wildcard chars using the
// escape character `\`. Initialized as a var in this package
// so it can be reused.
var likeEscaper = strings.NewReplacer(
`\`, `\\`, // Escape char.
`%`, `\%`, // Zero or more char.
`_`, `\_`, // Exactly one char.
)
// whereSubqueryLike appends a WHERE clause to the
// given SelectQuery, which searches for matches
// of `search` in the given subQuery using LIKE.
func whereLike(
query *bun.SelectQuery,
subject interface{},
search string,
) *bun.SelectQuery {
// Escape existing wildcard + escape
// chars in the search query string.
search = likeEscaper.Replace(search)
// Add our own wildcards back in; search
// zero or more chars around the query.
search = `%` + search + `%`
// Append resulting WHERE
// clause to the main query.
return query.Where(
"(?) LIKE ? ESCAPE ?",
subject, search, `\`,
)
}
// updateWhere parses []db.Where and adds it to the given update query.
func updateWhere(q *bun.UpdateQuery, where []db.Where) {
for _, w := range where {

View file

@ -19,6 +19,7 @@ package db
import (
"context"
"time"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
@ -40,8 +41,16 @@ type Emoji interface {
GetEmojisByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Emoji, Error)
// GetUseableEmojis gets all emojis which are useable by accounts on this instance.
GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, Error)
// GetEmojis ...
// GetEmojis fetches all emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis.
GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error)
// GetRemoteEmojis fetches all remote emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis.
GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error)
// GetCachedEmojisOlderThan fetches all cached remote emojis with 'updated_at' greater than 'olderThan', up to a maximum of 'limit' emojis.
GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error)
// GetEmojisBy gets emojis based on given parameters. Useful for admin actions.
GetEmojisBy(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, error)
// GetEmojiByID gets a specific emoji by its database ID.

View file

@ -44,12 +44,12 @@ type Media interface {
// GetAttachments ...
GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error)
// GetRemoteOlderThan gets limit n remote media attachments (including avatars and headers) older than the given
// olderThan time. These will be returned in order of attachment.created_at descending (newest to oldest in other words).
//
// The selected media attachments will be those with both a URL and a RemoteURL filled in.
// In other words, media attachments that originated remotely, and that we currently have cached locally.
GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)
// GetRemoteAttachments ...
GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error)
// GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than
// the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest).
GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)
// CountRemoteOlderThan is like GetRemoteOlderThan, except instead of getting limit n attachments,
// it just counts how many remote attachments in the database (including avatars and headers) meet

View file

@ -28,9 +28,6 @@ type Status interface {
// GetStatusByID returns one status from the database, with no rel fields populated, only their linking ID / URIs
GetStatusByID(ctx context.Context, id string) (*gtsmodel.Status, Error)
// GetStatuses gets a slice of statuses corresponding to the given status IDs.
GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, Error)
// GetStatusByURI returns one status from the database, with no rel fields populated, only their linking ID / URIs
GetStatusByURI(ctx context.Context, uri string) (*gtsmodel.Status, Error)
@ -58,6 +55,12 @@ type Status interface {
// CountStatusFaves returns the amount of faves/likes recorded for a status, or an error if something goes wrong
CountStatusFaves(ctx context.Context, status *gtsmodel.Status) (int, Error)
// GetStatuses gets a slice of statuses corresponding to the given status IDs.
GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error)
// GetStatusesUsingEmoji fetches all status models using emoji with given ID stored in their 'emojis' column.
GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error)
// GetStatusParents gets the parent statuses of a given status.
//
// If onlyDirect is true, only the immediate parent will be returned.