mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-30 23:22:26 -05:00 
			
		
		
		
	[performance] cache v2 filter keyword regular expressions (#2903)
* add caching of filterkeyword regular expressions * formatting * fix WholeWord nil check
This commit is contained in:
		
					parent
					
						
							
								6c0d93c6cb
							
						
					
				
			
			
				commit
				
					
						b092da6d28
					
				
			
		
					 5 changed files with 85 additions and 36 deletions
				
			
		
							
								
								
									
										5
									
								
								internal/cache/db.go
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								internal/cache/db.go
									
										
									
									
										vendored
									
									
								
							|  | @ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() { | ||||||
| 		// See internal/db/bundb/filter.go. | 		// See internal/db/bundb/filter.go. | ||||||
| 		filterKeyword2.Filter = nil | 		filterKeyword2.Filter = nil | ||||||
| 
 | 
 | ||||||
|  | 		// We specifically DO NOT unset | ||||||
|  | 		// the regexp field here, as any | ||||||
|  | 		// regexp.Regexp instance is safe | ||||||
|  | 		// for concurrent access. | ||||||
|  | 
 | ||||||
| 		return filterKeyword2 | 		return filterKeyword2 | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -25,6 +25,7 @@ import ( | ||||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" | 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" | ||||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | ||||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | ||||||
|  | 	"github.com/superseriousbusiness/gotosocial/internal/log" | ||||||
| 	"github.com/superseriousbusiness/gotosocial/internal/util" | 	"github.com/superseriousbusiness/gotosocial/internal/util" | ||||||
| 	"github.com/uptrace/bun" | 	"github.com/uptrace/bun" | ||||||
| ) | ) | ||||||
|  | @ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod | ||||||
| 		"ID", | 		"ID", | ||||||
| 		func() (*gtsmodel.FilterKeyword, error) { | 		func() (*gtsmodel.FilterKeyword, error) { | ||||||
| 			var filterKeyword gtsmodel.FilterKeyword | 			var filterKeyword gtsmodel.FilterKeyword | ||||||
| 			err := f.db. | 
 | ||||||
|  | 			// Scan from DB. | ||||||
|  | 			if err := f.db. | ||||||
| 				NewSelect(). | 				NewSelect(). | ||||||
| 				Model(&filterKeyword). | 				Model(&filterKeyword). | ||||||
| 				Where("? = ?", bun.Ident("id"), id). | 				Where("? = ?", bun.Ident("id"), id). | ||||||
| 				Scan(ctx) | 				Scan(ctx); err != nil { | ||||||
| 			return &filterKeyword, err | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			// Pre-compile filter keyword regular expression. | ||||||
|  | 			if err := filterKeyword.Compile(); err != nil { | ||||||
|  | 				return nil, gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			return &filterKeyword, nil | ||||||
| 		}, | 		}, | ||||||
| 		id, | 		id, | ||||||
| 	) | 	) | ||||||
|  | @ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod | ||||||
| 	return filterKeyword, nil | 	return filterKeyword, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { | func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) { | ||||||
| 	if filterKeyword.Filter == nil { | 	if filterKeyword.Filter == nil { | ||||||
| 		// Filter is not set, fetch from the cache or database. | 		// Filter is not set, fetch from the cache or database. | ||||||
| 		filter, err := f.state.DB.GetFilterByID( | 		filterKeyword.Filter, err = f.state.DB.GetFilterByID( | ||||||
| 			// Don't populate the filter with all of its keywords and statuses or we'll just end up back here. | 
 | ||||||
|  | 			// Don't populate the filter with all of its keywords | ||||||
|  | 			// and statuses or we'll just end up back here. | ||||||
| 			gtscontext.SetBarebones(ctx), | 			gtscontext.SetBarebones(ctx), | ||||||
| 			filterKeyword.FilterID, | 			filterKeyword.FilterID, | ||||||
| 		) | 		) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| 		filterKeyword.Filter = filter |  | ||||||
| 	} | 	} | ||||||
| 
 |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID | ||||||
| 
 | 
 | ||||||
| func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) { | func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) { | ||||||
| 	var filterKeywordIDs []string | 	var filterKeywordIDs []string | ||||||
|  | 
 | ||||||
| 	if err := f.db. | 	if err := f.db. | ||||||
| 		NewSelect(). | 		NewSelect(). | ||||||
| 		Model((*gtsmodel.FilterKeyword)(nil)). | 		Model((*gtsmodel.FilterKeyword)(nil)). | ||||||
|  | @ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | ||||||
| 		Scan(ctx, &filterKeywordIDs); err != nil { | 		Scan(ctx, &filterKeywordIDs); err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	if len(filterKeywordIDs) == 0 { | 	if len(filterKeywordIDs) == 0 { | ||||||
| 		return nil, nil | 		return nil, nil | ||||||
| 	} | 	} | ||||||
|  | @ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | ||||||
| 		filterKeywordIDs, | 		filterKeywordIDs, | ||||||
| 		func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { | 		func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { | ||||||
| 			uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) | 			uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) | ||||||
|  | 
 | ||||||
|  | 			// Scan from DB. | ||||||
| 			if err := f.db. | 			if err := f.db. | ||||||
| 				NewSelect(). | 				NewSelect(). | ||||||
| 				Model(&uncachedFilterKeywords). | 				Model(&uncachedFilterKeywords). | ||||||
|  | @ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | ||||||
| 				Scan(ctx); err != nil { | 				Scan(ctx); err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
|  | 
 | ||||||
|  | 			// Compile all the keyword regular expressions. | ||||||
|  | 			uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { | ||||||
|  | 				if err := filterKeyword.Compile(); err != nil { | ||||||
|  | 					log.Errorf(ctx, "error compiling filter keyword regex: %v", err) | ||||||
|  | 					return true | ||||||
|  | 				} | ||||||
|  | 				return false | ||||||
|  | 			}) | ||||||
|  | 
 | ||||||
| 			return uncachedFilterKeywords, nil | 			return uncachedFilterKeywords, nil | ||||||
| 		}, | 		}, | ||||||
| 	) | 	) | ||||||
|  | @ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Populate the filter keywords. Remove any that we can't populate from the return slice. | 	// Populate the filter keywords. Remove any that we can't populate from the return slice. | ||||||
| 	errs := gtserror.NewMultiError(len(filterKeywords)) |  | ||||||
| 	filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { | 	filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { | ||||||
| 		if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { | 		if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { | ||||||
| 			errs.Appendf( | 			log.Errorf(ctx, "error populating filter keyword: %v", err) | ||||||
| 				"error populating filter keyword %s: %w", |  | ||||||
| 				filterKeyword.ID, |  | ||||||
| 				err, |  | ||||||
| 			) |  | ||||||
| 			return true | 			return true | ||||||
| 		} | 		} | ||||||
| 		return false | 		return false | ||||||
| 	}) | 	}) | ||||||
| 
 | 
 | ||||||
| 	return filterKeywords, errs.Combine() | 	return filterKeywords, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { | func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { | ||||||
|  | 	if filterKeyword.Regexp == nil { | ||||||
|  | 		// Ensure regexp is compiled | ||||||
|  | 		// before attempted caching. | ||||||
|  | 		err := filterKeyword.Compile() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | ||||||
| 		_, err := f.db. | 		_, err := f.db. | ||||||
| 			NewInsert(). | 			NewInsert(). | ||||||
|  | @ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo | ||||||
| 	if len(columns) > 0 { | 	if len(columns) > 0 { | ||||||
| 		columns = append(columns, "updated_at") | 		columns = append(columns, "updated_at") | ||||||
| 	} | 	} | ||||||
| 
 | 	if filterKeyword.Regexp == nil { | ||||||
|  | 		// Ensure regexp is compiled | ||||||
|  | 		// before attempted caching. | ||||||
|  | 		err := filterKeyword.Compile() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | ||||||
| 		_, err := f.db. | 		_, err := f.db. | ||||||
| 			NewUpdate(). | 			NewUpdate(). | ||||||
|  |  | ||||||
|  | @ -17,7 +17,10 @@ | ||||||
| 
 | 
 | ||||||
| package gtsmodel | package gtsmodel | ||||||
| 
 | 
 | ||||||
| import "time" | import ( | ||||||
|  | 	"regexp" | ||||||
|  | 	"time" | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| // Filter stores a filter created by a local account. | // Filter stores a filter created by a local account. | ||||||
| type Filter struct { | type Filter struct { | ||||||
|  | @ -39,14 +42,28 @@ type Filter struct { | ||||||
| 
 | 
 | ||||||
| // FilterKeyword stores a single keyword to filter statuses against. | // FilterKeyword stores a single keyword to filter statuses against. | ||||||
| type FilterKeyword struct { | type FilterKeyword struct { | ||||||
| 	ID        string    `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database | 	ID        string         `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database | ||||||
| 	CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created | 	CreatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created | ||||||
| 	UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated | 	UpdatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated | ||||||
| 	AccountID string    `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. | 	AccountID string         `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. | ||||||
| 	FilterID  string    `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. | 	FilterID  string         `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. | ||||||
| 	Filter    *Filter   `bun:"-"`                                                                            // Filter corresponding to FilterID | 	Filter    *Filter        `bun:"-"`                                                                            // Filter corresponding to FilterID | ||||||
| 	Keyword   string    `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. | 	Keyword   string         `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. | ||||||
| 	WholeWord *bool     `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? | 	WholeWord *bool          `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? | ||||||
|  | 	Regexp    *regexp.Regexp `bun:"-"`                                                                            // pre-prepared regular expression | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Compile will compile this FilterKeyword as a prepared regular expression. | ||||||
|  | func (k *FilterKeyword) Compile() (err error) { | ||||||
|  | 	var wordBreak string | ||||||
|  | 	if k.WholeWord != nil && *k.WholeWord { | ||||||
|  | 		wordBreak = `\b` | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Compile keyword filter regexp. | ||||||
|  | 	quoted := regexp.QuoteMeta(k.Keyword) | ||||||
|  | 	k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak) | ||||||
|  | 	return // caller is expected to wrap this error | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // FilterStatus stores a single status to filter. | // FilterStatus stores a single status to filter. | ||||||
|  |  | ||||||
|  | @ -22,7 +22,6 @@ import ( | ||||||
| 	"errors" | 	"errors" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" | 	"math" | ||||||
| 	"regexp" |  | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"time" | 	"time" | ||||||
|  | @ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults( | ||||||
| 		keywordMatches := make([]string, 0, len(filter.Keywords)) | 		keywordMatches := make([]string, 0, len(filter.Keywords)) | ||||||
| 		fields := filterableTextFields(s) | 		fields := filterableTextFields(s) | ||||||
| 		for _, filterKeyword := range filter.Keywords { | 		for _, filterKeyword := range filter.Keywords { | ||||||
| 			wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false) |  | ||||||
| 			wordBreak := `` |  | ||||||
| 			if wholeWord { |  | ||||||
| 				wordBreak = `\b` |  | ||||||
| 			} |  | ||||||
| 			re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
| 			var isMatch bool | 			var isMatch bool | ||||||
| 			for _, field := range fields { | 			for _, field := range fields { | ||||||
| 				if re.MatchString(field) { | 				if filterKeyword.Regexp.MatchString(field) { | ||||||
| 					isMatch = true | 					isMatch = true | ||||||
| 					break | 					break | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
|  | @ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() { | ||||||
| 	requestingAccount := suite.testAccounts["local_account_1"] | 	requestingAccount := suite.testAccounts["local_account_1"] | ||||||
| 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | ||||||
| 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | ||||||
|  | 	suite.NoError(expectedMatchingFilterKeyword.Compile()) | ||||||
| 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | ||||||
| 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | ||||||
| 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | ||||||
|  | @ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() { | ||||||
| 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | ||||||
| 	expectedMatchingFilter.Action = gtsmodel.FilterActionHide | 	expectedMatchingFilter.Action = gtsmodel.FilterActionHide | ||||||
| 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | ||||||
|  | 	suite.NoError(expectedMatchingFilterKeyword.Compile()) | ||||||
| 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | ||||||
| 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | ||||||
| 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue