mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-30 22:42:24 -05:00 
			
		
		
		
	[feature] Cleanup unattached local media (#680)
* add localUnattached db function * add parseOlderThan util function * add pruneunusedlocalattachments to media manager * add unusedlocal pruning to schedule + admin call * set number of days to keep as a const * fix test
This commit is contained in:
		
					parent
					
						
							
								07620acc0e
							
						
					
				
			
			
				commit
				
					
						9e7d022a06
					
				
			
		
					 9 changed files with 253 additions and 10 deletions
				
			
		|  | @ -98,3 +98,29 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit | |||
| 
 | ||||
| 	return attachments, nil | ||||
| } | ||||
| 
 | ||||
| func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { | ||||
| 	attachments := []*gtsmodel.MediaAttachment{} | ||||
| 
 | ||||
| 	q := m.newMediaQ(&attachments). | ||||
| 		Where("media_attachment.cached = true"). | ||||
| 		Where("media_attachment.avatar = false"). | ||||
| 		Where("media_attachment.header = false"). | ||||
| 		Where("media_attachment.created_at < ?", olderThan). | ||||
| 		Where("media_attachment.remote_url IS NULL"). | ||||
| 		Where("media_attachment.status_id IS NULL") | ||||
| 
 | ||||
| 	if maxID != "" { | ||||
| 		q = q.Where("media_attachment.id < ?", maxID) | ||||
| 	} | ||||
| 
 | ||||
| 	if limit != 0 { | ||||
| 		q = q.Limit(limit) | ||||
| 	} | ||||
| 
 | ||||
| 	if err := q.Scan(ctx); err != nil { | ||||
| 		return nil, m.conn.ProcessError(err) | ||||
| 	} | ||||
| 
 | ||||
| 	return attachments, nil | ||||
| } | ||||
|  |  | |||
|  | @ -24,6 +24,7 @@ import ( | |||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/suite" | ||||
| 	"github.com/superseriousbusiness/gotosocial/testrig" | ||||
| ) | ||||
| 
 | ||||
| type MediaTestSuite struct { | ||||
|  | @ -51,6 +52,14 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() { | |||
| 	suite.Len(attachments, 2) | ||||
| } | ||||
| 
 | ||||
| func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() { | ||||
| 	ctx := context.Background() | ||||
| 
 | ||||
| 	attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10) | ||||
| 	suite.NoError(err) | ||||
| 	suite.Len(attachments, 1) | ||||
| } | ||||
| 
 | ||||
| func TestMediaTestSuite(t *testing.T) { | ||||
| 	suite.Run(t, new(MediaTestSuite)) | ||||
| } | ||||
|  |  | |||
|  | @ -38,4 +38,9 @@ type Media interface { | |||
| 	// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers | ||||
| 	// and avis may be in use or not; the caller should check this if it's important. | ||||
| 	GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error) | ||||
| 	// GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which | ||||
| 	// aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded | ||||
| 	// but never used for whatever reason, or attachments that were attached to a status which was subsequently | ||||
| 	// deleted. | ||||
| 	GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error) | ||||
| } | ||||
|  |  | |||
|  | @ -34,6 +34,10 @@ import ( | |||
| // selectPruneLimit is the amount of media entries to select at a time from the db when pruning | ||||
| const selectPruneLimit = 20 | ||||
| 
 | ||||
| // UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it | ||||
| // is not attached to a status, or was never attached to a status. | ||||
| const UnusedLocalAttachmentCacheDays = 3 | ||||
| 
 | ||||
| // Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs. | ||||
| type Manager interface { | ||||
| 	// ProcessMedia begins the process of decoding and storing the given data as an attachment. | ||||
|  | @ -75,11 +79,16 @@ type Manager interface { | |||
| 	// | ||||
| 	// The returned int is the amount of media that was pruned by this function. | ||||
| 	PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) | ||||
| 	// PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended | ||||
| 	// to include things like attachments that were uploaded on this server but left unused, etc. | ||||
| 	// PruneAllMeta prunes unused/out of date headers and avatars cached on this instance. | ||||
| 	// | ||||
| 	// The returned int is the amount of media that was pruned by this function. | ||||
| 	PruneAllMeta(ctx context.Context) (int, error) | ||||
| 	// PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by | ||||
| 	// a user on this instance, but never actually attached to a status, or attached but | ||||
| 	// later detached. | ||||
| 	// | ||||
| 	// The returned int is the amount of media that was pruned by this function. | ||||
| 	PruneUnusedLocalAttachments(ctx context.Context) (int, error) | ||||
| 
 | ||||
| 	// Stop stops the underlying worker pool of the manager. It should be called | ||||
| 	// when closing GoToSocial in order to cleanly finish any in-progress jobs. | ||||
|  | @ -210,6 +219,19 @@ func scheduleCleanupJobs(m *manager) error { | |||
| 		return fmt.Errorf("error starting media manager meta cleanup job: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if _, err := c.AddFunc("@midnight", func() { | ||||
| 		begin := time.Now() | ||||
| 		pruned, err := m.PruneUnusedLocalAttachments(pruneCtx) | ||||
| 		if err != nil { | ||||
| 			logrus.Errorf("media manager: error pruning unused local attachments: %s", err) | ||||
| 			return | ||||
| 		} | ||||
| 		logrus.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin)) | ||||
| 	}); err != nil { | ||||
| 		pruneCancel() | ||||
| 		return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	// start remote cache cleanup cronjob if configured | ||||
| 	if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 { | ||||
| 		if _, err := c.AddFunc("@midnight", func() { | ||||
|  |  | |||
|  | @ -21,7 +21,6 @@ package media | |||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"codeberg.org/gruf/go-store/storage" | ||||
| 	"github.com/sirupsen/logrus" | ||||
|  | @ -32,15 +31,10 @@ import ( | |||
| func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) { | ||||
| 	var totalPruned int | ||||
| 
 | ||||
| 	// convert days into a duration string | ||||
| 	olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24) | ||||
| 	// parse the duration string into a duration | ||||
| 	olderThanHours, err := time.ParseDuration(olderThanHoursString) | ||||
| 	olderThan, err := parseOlderThan(olderThanDays) | ||||
| 	if err != nil { | ||||
| 		return totalPruned, fmt.Errorf("PruneAllRemote: %d", err) | ||||
| 		return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err) | ||||
| 	} | ||||
| 	// 'subtract' that from the time now to give our threshold | ||||
| 	olderThan := time.Now().Add(-olderThanHours) | ||||
| 	logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan) | ||||
| 
 | ||||
| 	// select 20 attachments at a time and prune them | ||||
|  |  | |||
							
								
								
									
										86
									
								
								internal/media/pruneunusedlocal.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								internal/media/pruneunusedlocal.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,86 @@ | |||
| /* | ||||
|    GoToSocial | ||||
|    Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org | ||||
| 
 | ||||
|    This program is free software: you can redistribute it and/or modify | ||||
|    it under the terms of the GNU Affero General Public License as published by | ||||
|    the Free Software Foundation, either version 3 of the License, or | ||||
|    (at your option) any later version. | ||||
| 
 | ||||
|    This program is distributed in the hope that it will be useful, | ||||
|    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|    GNU Affero General Public License for more details. | ||||
| 
 | ||||
|    You should have received a copy of the GNU Affero General Public License | ||||
|    along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
| 
 | ||||
| package media | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"codeberg.org/gruf/go-store/storage" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/db" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | ||||
| ) | ||||
| 
 | ||||
| func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) { | ||||
| 	var totalPruned int | ||||
| 	var maxID string | ||||
| 	var attachments []*gtsmodel.MediaAttachment | ||||
| 	var err error | ||||
| 
 | ||||
| 	olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays) | ||||
| 	if err != nil { | ||||
| 		return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err) | ||||
| 	} | ||||
| 	logrus.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan) | ||||
| 
 | ||||
| 	// select 20 attachments at a time and prune them | ||||
| 	for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) { | ||||
| 		// use the id of the last attachment in the slice as the next 'maxID' value | ||||
| 		l := len(attachments) | ||||
| 		maxID = attachments[l-1].ID | ||||
| 		logrus.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID) | ||||
| 
 | ||||
| 		for _, attachment := range attachments { | ||||
| 			if err := m.pruneOneLocal(ctx, attachment); err != nil { | ||||
| 				return totalPruned, err | ||||
| 			} | ||||
| 			totalPruned++ | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// make sure we don't have a real error when we leave the loop | ||||
| 	if err != nil && err != db.ErrNoEntries { | ||||
| 		return totalPruned, err | ||||
| 	} | ||||
| 
 | ||||
| 	logrus.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned) | ||||
| 	return totalPruned, nil | ||||
| } | ||||
| 
 | ||||
| func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { | ||||
| 	if attachment.File.Path != "" { | ||||
| 		// delete the full size attachment from storage | ||||
| 		logrus.Tracef("pruneOneLocal: deleting %s", attachment.File.Path) | ||||
| 		if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if attachment.Thumbnail.Path != "" { | ||||
| 		// delete the thumbnail from storage | ||||
| 		logrus.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path) | ||||
| 		if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// delete the attachment completely | ||||
| 	return m.db.DeleteByID(ctx, attachment.ID, attachment) | ||||
| } | ||||
							
								
								
									
										75
									
								
								internal/media/pruneunusedlocal_test.go
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								internal/media/pruneunusedlocal_test.go
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,75 @@ | |||
| /* | ||||
|    GoToSocial | ||||
|    Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org | ||||
| 
 | ||||
|    This program is free software: you can redistribute it and/or modify | ||||
|    it under the terms of the GNU Affero General Public License as published by | ||||
|    the Free Software Foundation, either version 3 of the License, or | ||||
|    (at your option) any later version. | ||||
| 
 | ||||
|    This program is distributed in the hope that it will be useful, | ||||
|    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|    GNU Affero General Public License for more details. | ||||
| 
 | ||||
|    You should have received a copy of the GNU Affero General Public License | ||||
|    along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | ||||
| 
 | ||||
| package media_test | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/suite" | ||||
| 	"github.com/superseriousbusiness/gotosocial/internal/db" | ||||
| ) | ||||
| 
 | ||||
| type PruneUnusedLocalTestSuite struct { | ||||
| 	MediaStandardTestSuite | ||||
| } | ||||
| 
 | ||||
| func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() { | ||||
| 	testAttachment := suite.testAttachments["local_account_1_unattached_1"] | ||||
| 	suite.True(testAttachment.Cached) | ||||
| 
 | ||||
| 	totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background()) | ||||
| 	suite.NoError(err) | ||||
| 	suite.Equal(1, totalPruned) | ||||
| 
 | ||||
| 	_, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) | ||||
| 	suite.ErrorIs(err, db.ErrNoEntries) | ||||
| } | ||||
| 
 | ||||
| func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() { | ||||
| 	totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background()) | ||||
| 	suite.NoError(err) | ||||
| 	suite.Equal(1, totalPruned) | ||||
| 
 | ||||
| 	// final prune should prune nothing, since the first prune already happened | ||||
| 	totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background()) | ||||
| 	suite.NoError(err) | ||||
| 	suite.Equal(0, totalPrunedAgain) | ||||
| } | ||||
| 
 | ||||
| func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() { | ||||
| 	ctx := context.Background() | ||||
| 	testAttachment := suite.testAttachments["local_account_1_unattached_1"] | ||||
| 
 | ||||
| 	// Delete this attachment cached on disk | ||||
| 	media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) | ||||
| 	suite.NoError(err) | ||||
| 	suite.True(media.Cached) | ||||
| 	err = suite.storage.Delete(media.File.Path) | ||||
| 	suite.NoError(err) | ||||
| 
 | ||||
| 	// Now attempt to prune for item with db entry no file | ||||
| 	totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx) | ||||
| 	suite.NoError(err) | ||||
| 	suite.Equal(1, totalPruned) | ||||
| } | ||||
| 
 | ||||
| func TestPruneUnusedLocalTestSuite(t *testing.T) { | ||||
| 	suite.Run(t, &PruneUnusedLocalTestSuite{}) | ||||
| } | ||||
|  | @ -21,6 +21,7 @@ package media | |||
| import ( | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/h2non/filetype" | ||||
| 	"github.com/sirupsen/logrus" | ||||
|  | @ -128,3 +129,19 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) { | |||
| func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) { | ||||
| 	logrus.Error("media manager cron logger: ", err, msg, keysAndValues) | ||||
| } | ||||
| 
 | ||||
| func parseOlderThan(olderThanDays int) (time.Time, error) { | ||||
| 	// convert days into a duration string | ||||
| 	olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24) | ||||
| 
 | ||||
| 	// parse the duration string into a duration | ||||
| 	olderThanHours, err := time.ParseDuration(olderThanHoursString) | ||||
| 	if err != nil { | ||||
| 		return time.Time{}, err | ||||
| 	} | ||||
| 
 | ||||
| 	// 'subtract' that from the time now to give our threshold | ||||
| 	olderThan := time.Now().Add(-olderThanHours) | ||||
| 
 | ||||
| 	return olderThan, nil | ||||
| } | ||||
|  |  | |||
|  | @ -41,6 +41,15 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt | |||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	go func() { | ||||
| 		pruned, err := p.mediaManager.PruneUnusedLocalAttachments(ctx) | ||||
| 		if err != nil { | ||||
| 			logrus.Errorf("MediaPrune: error pruning unused local cache: %s", err) | ||||
| 		} else { | ||||
| 			logrus.Infof("MediaPrune: pruned %d unused local cache entries", pruned) | ||||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	go func() { | ||||
| 		pruned, err := p.mediaManager.PruneAllMeta(ctx) | ||||
| 		if err != nil { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue