mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-30 23:12:25 -05:00 
			
		
		
		
	[chore] Duplicated media cleanup (#649)
* add migration to clean up duplicated media * use /tmp/gotosocial for testrig storage path * defer remove storage tempdir * skip if not attached to status or status not found * log errors at error level * only log delete as else clause if successful * just return nil on down * reword delete logic a little bit * check if storage base path is defined * check for status id more thoroughly * don't log error if just no rows * go fmt * break statusIDLoop when found * break currentlyUsedLoop when found
This commit is contained in:
		
					parent
					
						
							
								da2386bab1
							
						
					
				
			
			
				commit
				
					
						13e4bbdbfa
					
				
			
		
					 3 changed files with 171 additions and 1 deletions
				
			
		|  | @ -0,0 +1,164 @@ | ||||||
|  | /* | ||||||
|  |    GoToSocial | ||||||
|  |    Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org | ||||||
|  | 
 | ||||||
|  |    This program is free software: you can redistribute it and/or modify | ||||||
|  |    it under the terms of the GNU Affero General Public License as published by | ||||||
|  |    the Free Software Foundation, either version 3 of the License, or | ||||||
|  |    (at your option) any later version. | ||||||
|  | 
 | ||||||
|  |    This program is distributed in the hope that it will be useful, | ||||||
|  |    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |    GNU Affero General Public License for more details. | ||||||
|  | 
 | ||||||
|  |    You should have received a copy of the GNU Affero General Public License | ||||||
|  |    along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||||
|  | */ | ||||||
|  | 
 | ||||||
|  | package migrations | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"context" | ||||||
|  | 	"database/sql" | ||||||
|  | 	"fmt" | ||||||
|  | 	"path" | ||||||
|  | 
 | ||||||
|  | 	"codeberg.org/gruf/go-store/kv" | ||||||
|  | 	"codeberg.org/gruf/go-store/storage" | ||||||
|  | 	"github.com/sirupsen/logrus" | ||||||
|  | 	"github.com/superseriousbusiness/gotosocial/internal/config" | ||||||
|  | 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" | ||||||
|  | 	"github.com/uptrace/bun" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func init() { | ||||||
|  | 	deleteAttachment := func(ctx context.Context, l *logrus.Entry, a *gtsmodel.MediaAttachment, s *kv.KVStore, tx bun.Tx) { | ||||||
|  | 		if err := s.Delete(a.File.Path); err != nil && err != storage.ErrNotFound { | ||||||
|  | 			l.Errorf("error removing file %s: %s", a.File.Path, err) | ||||||
|  | 		} else { | ||||||
|  | 			l.Debugf("deleted %s", a.File.Path) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if err := s.Delete(a.Thumbnail.Path); err != nil && err != storage.ErrNotFound { | ||||||
|  | 			l.Errorf("error removing file %s: %s", a.Thumbnail.Path, err) | ||||||
|  | 		} else { | ||||||
|  | 			l.Debugf("deleted %s", a.Thumbnail.Path) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if _, err := tx.NewDelete(). | ||||||
|  | 			Model(a). | ||||||
|  | 			WherePK(). | ||||||
|  | 			Exec(ctx); err != nil { | ||||||
|  | 			l.Errorf("error deleting attachment with id %s: %s", a.ID, err) | ||||||
|  | 		} else { | ||||||
|  | 			l.Debugf("deleted attachment with id %s", a.ID) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	up := func(ctx context.Context, db *bun.DB) error { | ||||||
|  | 		l := logrus.WithField("migration", "20220612091800_duplicated_media_cleanup") | ||||||
|  | 
 | ||||||
|  | 		storageBasePath := config.GetStorageLocalBasePath() | ||||||
|  | 		if storageBasePath == "" { | ||||||
|  | 			return fmt.Errorf("%s must be set to do storage migration", config.StorageLocalBasePathFlag()) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { | ||||||
|  | 			s, err := kv.OpenFile(storageBasePath, &storage.DiskConfig{ | ||||||
|  | 				LockFile: path.Join(storageBasePath, "store.lock"), | ||||||
|  | 			}) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return fmt.Errorf("error creating storage backend: %s", err) | ||||||
|  | 			} | ||||||
|  | 			defer s.Close() | ||||||
|  | 
 | ||||||
|  | 			// step 1. select all media attachment remote URLs that have duplicates | ||||||
|  | 			var dupes int | ||||||
|  | 			dupedRemoteURLs := []*gtsmodel.MediaAttachment{} | ||||||
|  | 			if err := tx.NewSelect(). | ||||||
|  | 				Model(&dupedRemoteURLs). | ||||||
|  | 				ColumnExpr("remote_url", "count(*)"). | ||||||
|  | 				Where("remote_url IS NOT NULL"). | ||||||
|  | 				Group("remote_url"). | ||||||
|  | 				Having("count(*) > 1"). | ||||||
|  | 				Scan(ctx); err != nil { | ||||||
|  | 				return err | ||||||
|  | 			} | ||||||
|  | 			dupes = len(dupedRemoteURLs) | ||||||
|  | 			l.Infof("found %d attachments with duplicate remote URLs", dupes) | ||||||
|  | 
 | ||||||
|  | 			for i, dupedRemoteURL := range dupedRemoteURLs { | ||||||
|  | 				if i%10 == 0 { | ||||||
|  | 					l.Infof("cleaning %d of %d", i, dupes) | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				// step 2: select all media attachments associated with this url | ||||||
|  | 				dupedAttachments := []*gtsmodel.MediaAttachment{} | ||||||
|  | 				if err := tx.NewSelect(). | ||||||
|  | 					Model(&dupedAttachments). | ||||||
|  | 					Where("remote_url = ?", dupedRemoteURL.RemoteURL). | ||||||
|  | 					Scan(ctx); err != nil { | ||||||
|  | 					l.Errorf("error running same attachments query: %s", err) | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
|  | 				l.Debugf("found %d duplicates of attachment with remote url %s", len(dupedAttachments), dupedRemoteURL.RemoteURL) | ||||||
|  | 
 | ||||||
|  | 				var statusID string | ||||||
|  | 			statusIDLoop: | ||||||
|  | 				for _, dupe := range dupedAttachments { | ||||||
|  | 					if dupe.StatusID != "" { | ||||||
|  | 						statusID = dupe.StatusID | ||||||
|  | 						break statusIDLoop | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				if statusID == "" { | ||||||
|  | 					l.Debugf("%s not associated with a status, moving on", dupedRemoteURL.RemoteURL) | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
|  | 				l.Debugf("%s is associated with status %s", dupedRemoteURL.RemoteURL, statusID) | ||||||
|  | 
 | ||||||
|  | 				// step 3: get the status that these attachments are supposedly associated with, bail if we can't get it | ||||||
|  | 				status := >smodel.Status{} | ||||||
|  | 				if err := tx.NewSelect(). | ||||||
|  | 					Model(status). | ||||||
|  | 					Where("id = ?", statusID). | ||||||
|  | 					Scan(ctx); err != nil { | ||||||
|  | 					if err != sql.ErrNoRows { | ||||||
|  | 						l.Errorf("error selecting status with id %s: %s", statusID, err) | ||||||
|  | 					} | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				// step 4: for each attachment, check if it's actually one that the status is currently set to use, and delete if not | ||||||
|  | 				for _, dupe := range dupedAttachments { | ||||||
|  | 					var currentlyUsed bool | ||||||
|  | 				currentlyUsedLoop: | ||||||
|  | 					for _, attachmentID := range status.AttachmentIDs { | ||||||
|  | 						if attachmentID == dupe.ID { | ||||||
|  | 							currentlyUsed = true | ||||||
|  | 							break currentlyUsedLoop | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 
 | ||||||
|  | 					if currentlyUsed { | ||||||
|  | 						l.Debugf("attachment with id %s is a correct current attachment, leaving it alone!", dupe.ID) | ||||||
|  | 						continue | ||||||
|  | 					} | ||||||
|  | 
 | ||||||
|  | 					deleteAttachment(ctx, l, dupe, s, tx) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			return nil | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	down := func(ctx context.Context, db *bun.DB) error { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if err := Migrations.Register(up, down); err != nil { | ||||||
|  | 		panic(err) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | @ -19,6 +19,9 @@ | ||||||
| package testrig | package testrig | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
|  | 	"os" | ||||||
|  | 	"path" | ||||||
|  | 
 | ||||||
| 	"github.com/coreos/go-oidc/v3/oidc" | 	"github.com/coreos/go-oidc/v3/oidc" | ||||||
| 	"github.com/superseriousbusiness/gotosocial/internal/config" | 	"github.com/superseriousbusiness/gotosocial/internal/config" | ||||||
| ) | ) | ||||||
|  | @ -64,7 +67,7 @@ var TestDefaults = config.Configuration{ | ||||||
| 	MediaRemoteCacheDays:     30, | 	MediaRemoteCacheDays:     30, | ||||||
| 
 | 
 | ||||||
| 	StorageBackend:       "local", | 	StorageBackend:       "local", | ||||||
| 	StorageLocalBasePath: "/gotosocial/storage", | 	StorageLocalBasePath: path.Join(os.TempDir(), "gotosocial"), | ||||||
| 
 | 
 | ||||||
| 	StatusesMaxChars:           5000, | 	StatusesMaxChars:           5000, | ||||||
| 	StatusesCWMaxChars:         100, | 	StatusesCWMaxChars:         100, | ||||||
|  |  | ||||||
|  | @ -21,6 +21,7 @@ package testrig | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"os" | 	"os" | ||||||
|  | 	"path" | ||||||
| 
 | 
 | ||||||
| 	"codeberg.org/gruf/go-store/kv" | 	"codeberg.org/gruf/go-store/kv" | ||||||
| 	"codeberg.org/gruf/go-store/storage" | 	"codeberg.org/gruf/go-store/storage" | ||||||
|  | @ -94,6 +95,8 @@ func StandardStorageSetup(s *kv.KVStore, relativePath string) { | ||||||
| 
 | 
 | ||||||
| // StandardStorageTeardown deletes everything in storage so that it's clean for the next test | // StandardStorageTeardown deletes everything in storage so that it's clean for the next test | ||||||
| func StandardStorageTeardown(s *kv.KVStore) { | func StandardStorageTeardown(s *kv.KVStore) { | ||||||
|  | 	defer os.RemoveAll(path.Join(os.TempDir(), "gotosocial")) | ||||||
|  | 
 | ||||||
| 	iter, err := s.Iterator(nil) | 	iter, err := s.Iterator(nil) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		panic(err) | 		panic(err) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue