mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 14:52:30 -05:00 
			
		
		
		
	[performance] cache v2 filter keyword regular expressions (#2903)
* add caching of filterkeyword regular expressions * formatting * fix WholeWord nil check
This commit is contained in:
		
					parent
					
						
							
								6c0d93c6cb
							
						
					
				
			
			
				commit
				
					
						b092da6d28
					
				
			
		
					 5 changed files with 85 additions and 36 deletions
				
			
		
							
								
								
									
										5
									
								
								internal/cache/db.go
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								internal/cache/db.go
									
										
									
									
										vendored
									
									
								
							|  | @ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() { | |||
| 		// See internal/db/bundb/filter.go. | ||||
| 		filterKeyword2.Filter = nil | ||||
| 
 | ||||
| 		// We specifically DO NOT unset | ||||
| 		// the regexp field here, as any | ||||
| 		// regexp.Regexp instance is safe | ||||
| 		// for concurrent access. | ||||
| 
 | ||||
| 		return filterKeyword2 | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -25,6 +25,7 @@ import ( | |||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtserror" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/log" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/util" | ||||
| 	"github.com/uptrace/bun" | ||||
| ) | ||||
|  | @ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod | |||
| 		"ID", | ||||
| 		func() (*gtsmodel.FilterKeyword, error) { | ||||
| 			var filterKeyword gtsmodel.FilterKeyword | ||||
| 			err := f.db. | ||||
| 
 | ||||
| 			// Scan from DB. | ||||
| 			if err := f.db. | ||||
| 				NewSelect(). | ||||
| 				Model(&filterKeyword). | ||||
| 				Where("? = ?", bun.Ident("id"), id). | ||||
| 				Scan(ctx) | ||||
| 			return &filterKeyword, err | ||||
| 				Scan(ctx); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 
 | ||||
| 			// Pre-compile filter keyword regular expression. | ||||
| 			if err := filterKeyword.Compile(); err != nil { | ||||
| 				return nil, gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||
| 			} | ||||
| 
 | ||||
| 			return &filterKeyword, nil | ||||
| 		}, | ||||
| 		id, | ||||
| 	) | ||||
|  | @ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod | |||
| 	return filterKeyword, nil | ||||
| } | ||||
| 
 | ||||
| func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { | ||||
| func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) { | ||||
| 	if filterKeyword.Filter == nil { | ||||
| 		// Filter is not set, fetch from the cache or database. | ||||
| 		filter, err := f.state.DB.GetFilterByID( | ||||
| 			// Don't populate the filter with all of its keywords and statuses or we'll just end up back here. | ||||
| 		filterKeyword.Filter, err = f.state.DB.GetFilterByID( | ||||
| 
 | ||||
| 			// Don't populate the filter with all of its keywords | ||||
| 			// and statuses or we'll just end up back here. | ||||
| 			gtscontext.SetBarebones(ctx), | ||||
| 			filterKeyword.FilterID, | ||||
| 		) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		filterKeyword.Filter = filter | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
|  | @ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID | |||
| 
 | ||||
| func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) { | ||||
| 	var filterKeywordIDs []string | ||||
| 
 | ||||
| 	if err := f.db. | ||||
| 		NewSelect(). | ||||
| 		Model((*gtsmodel.FilterKeyword)(nil)). | ||||
|  | @ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | |||
| 		Scan(ctx, &filterKeywordIDs); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if len(filterKeywordIDs) == 0 { | ||||
| 		return nil, nil | ||||
| 	} | ||||
|  | @ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | |||
| 		filterKeywordIDs, | ||||
| 		func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) { | ||||
| 			uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) | ||||
| 
 | ||||
| 			// Scan from DB. | ||||
| 			if err := f.db. | ||||
| 				NewSelect(). | ||||
| 				Model(&uncachedFilterKeywords). | ||||
|  | @ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | |||
| 				Scan(ctx); err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 
 | ||||
| 			// Compile all the keyword regular expressions. | ||||
| 			uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { | ||||
| 				if err := filterKeyword.Compile(); err != nil { | ||||
| 					log.Errorf(ctx, "error compiling filter keyword regex: %v", err) | ||||
| 					return true | ||||
| 				} | ||||
| 				return false | ||||
| 			}) | ||||
| 
 | ||||
| 			return uncachedFilterKeywords, nil | ||||
| 		}, | ||||
| 	) | ||||
|  | @ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st | |||
| 	} | ||||
| 
 | ||||
| 	// Populate the filter keywords. Remove any that we can't populate from the return slice. | ||||
| 	errs := gtserror.NewMultiError(len(filterKeywords)) | ||||
| 	filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { | ||||
| 		if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { | ||||
| 			errs.Appendf( | ||||
| 				"error populating filter keyword %s: %w", | ||||
| 				filterKeyword.ID, | ||||
| 				err, | ||||
| 			) | ||||
| 			log.Errorf(ctx, "error populating filter keyword: %v", err) | ||||
| 			return true | ||||
| 		} | ||||
| 		return false | ||||
| 	}) | ||||
| 
 | ||||
| 	return filterKeywords, errs.Combine() | ||||
| 	return filterKeywords, nil | ||||
| } | ||||
| 
 | ||||
| func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { | ||||
| 	if filterKeyword.Regexp == nil { | ||||
| 		// Ensure regexp is compiled | ||||
| 		// before attempted caching. | ||||
| 		err := filterKeyword.Compile() | ||||
| 		if err != nil { | ||||
| 			return gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | ||||
| 		_, err := f.db. | ||||
| 			NewInsert(). | ||||
|  | @ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo | |||
| 	if len(columns) > 0 { | ||||
| 		columns = append(columns, "updated_at") | ||||
| 	} | ||||
| 
 | ||||
| 	if filterKeyword.Regexp == nil { | ||||
| 		// Ensure regexp is compiled | ||||
| 		// before attempted caching. | ||||
| 		err := filterKeyword.Compile() | ||||
| 		if err != nil { | ||||
| 			return gtserror.Newf("error compiling filter keyword regex: %w", err) | ||||
| 		} | ||||
| 	} | ||||
| 	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error { | ||||
| 		_, err := f.db. | ||||
| 			NewUpdate(). | ||||
|  |  | |||
|  | @ -17,7 +17,10 @@ | |||
| 
 | ||||
| package gtsmodel | ||||
| 
 | ||||
| import "time" | ||||
| import ( | ||||
| 	"regexp" | ||||
| 	"time" | ||||
| ) | ||||
| 
 | ||||
| // Filter stores a filter created by a local account. | ||||
| type Filter struct { | ||||
|  | @ -39,14 +42,28 @@ type Filter struct { | |||
| 
 | ||||
| // FilterKeyword stores a single keyword to filter statuses against. | ||||
| type FilterKeyword struct { | ||||
| 	ID        string    `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database | ||||
| 	CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created | ||||
| 	UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated | ||||
| 	AccountID string    `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. | ||||
| 	FilterID  string    `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. | ||||
| 	Filter    *Filter   `bun:"-"`                                                                            // Filter corresponding to FilterID | ||||
| 	Keyword   string    `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. | ||||
| 	WholeWord *bool     `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? | ||||
| 	ID        string         `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database | ||||
| 	CreatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created | ||||
| 	UpdatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated | ||||
| 	AccountID string         `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. | ||||
| 	FilterID  string         `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. | ||||
| 	Filter    *Filter        `bun:"-"`                                                                            // Filter corresponding to FilterID | ||||
| 	Keyword   string         `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. | ||||
| 	WholeWord *bool          `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? | ||||
| 	Regexp    *regexp.Regexp `bun:"-"`                                                                            // pre-prepared regular expression | ||||
| } | ||||
| 
 | ||||
| // Compile will compile this FilterKeyword as a prepared regular expression. | ||||
| func (k *FilterKeyword) Compile() (err error) { | ||||
| 	var wordBreak string | ||||
| 	if k.WholeWord != nil && *k.WholeWord { | ||||
| 		wordBreak = `\b` | ||||
| 	} | ||||
| 
 | ||||
| 	// Compile keyword filter regexp. | ||||
| 	quoted := regexp.QuoteMeta(k.Keyword) | ||||
| 	k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak) | ||||
| 	return // caller is expected to wrap this error | ||||
| } | ||||
| 
 | ||||
| // FilterStatus stores a single status to filter. | ||||
|  |  | |||
|  | @ -22,7 +22,6 @@ import ( | |||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"regexp" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | @ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults( | |||
| 		keywordMatches := make([]string, 0, len(filter.Keywords)) | ||||
| 		fields := filterableTextFields(s) | ||||
| 		for _, filterKeyword := range filter.Keywords { | ||||
| 			wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false) | ||||
| 			wordBreak := `` | ||||
| 			if wholeWord { | ||||
| 				wordBreak = `\b` | ||||
| 			} | ||||
| 			re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 			var isMatch bool | ||||
| 			for _, field := range fields { | ||||
| 				if re.MatchString(field) { | ||||
| 				if filterKeyword.Regexp.MatchString(field) { | ||||
| 					isMatch = true | ||||
| 					break | ||||
| 				} | ||||
|  |  | |||
|  | @ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() { | |||
| 	requestingAccount := suite.testAccounts["local_account_1"] | ||||
| 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | ||||
| 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | ||||
| 	suite.NoError(expectedMatchingFilterKeyword.Compile()) | ||||
| 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | ||||
| 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | ||||
| 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | ||||
|  | @ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() { | |||
| 	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"] | ||||
| 	expectedMatchingFilter.Action = gtsmodel.FilterActionHide | ||||
| 	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] | ||||
| 	suite.NoError(expectedMatchingFilterKeyword.Compile()) | ||||
| 	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter | ||||
| 	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword} | ||||
| 	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue