[feature] status refetch support (#1690)

* revamp http client to not limit requests, instead use sender worker

Signed-off-by: kim <grufwub@gmail.com>

* remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping

Signed-off-by: kim <grufwub@gmail.com>

* improve batch senders to keep popping recipients until remote URL found

Signed-off-by: kim <grufwub@gmail.com>

* fix recipient looping issue

Signed-off-by: kim <grufwub@gmail.com>

* move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP

Signed-off-by: kim <grufwub@gmail.com>

* first draft of status refetching logic

Signed-off-by: kim <grufwub@gmail.com>

* fix testrig to use new federation alloc func signature

Signed-off-by: kim <grufwub@gmail.com>

* fix log format directive

Signed-off-by: kim <grufwub@gmail.com>

* add status fetched_at migration

Signed-off-by: kim <grufwub@gmail.com>

* remove unused / unchecked for error types

Signed-off-by: kim <grufwub@gmail.com>

* add back the used type...

Signed-off-by: kim <grufwub@gmail.com>

* add separate internal getStatus() function for derefThread() that doesn't recurse

Signed-off-by: kim <grufwub@gmail.com>

* improved mention and media attachment error handling

Signed-off-by: kim <grufwub@gmail.com>

* fix log and error format directives

Signed-off-by: kim <grufwub@gmail.com>

* update account deref to match status deref changes

Signed-off-by: kim <grufwub@gmail.com>

* very small code formatting change to make things clearer

Signed-off-by: kim <grufwub@gmail.com>

* add more code comments

Signed-off-by: kim <grufwub@gmail.com>

* improved code commenting

Signed-off-by: kim <grufwub@gmail.com>

* only check for required further derefs if needed

Signed-off-by: kim <grufwub@gmail.com>

* improved cache invalidation

Signed-off-by: kim <grufwub@gmail.com>

* tweak cache restarting to use a (very small) backoff

Signed-off-by: kim <grufwub@gmail.com>

* small readability changes and fixes

Signed-off-by: kim <grufwub@gmail.com>

* fix account sync issues

Signed-off-by: kim <grufwub@gmail.com>

* fix merge conflicts + update account enrichment to accept already-passed accountable

Signed-off-by: kim <grufwub@gmail.com>

* remove secondary function declaration

Signed-off-by: kim <grufwub@gmail.com>

* normalise dereferencer get status / account behaviour, fix remaining tests

Signed-off-by: kim <grufwub@gmail.com>

* fix remaining rebase conflicts, finish commenting code

Signed-off-by: kim <grufwub@gmail.com>

* appease the linter

Signed-off-by: kim <grufwub@gmail.com>

* add source file header

Signed-off-by: kim <grufwub@gmail.com>

* update to use TIMESTAMPTZ column type instead of just TIMESTAMP

Signed-off-by: kim <grufwub@gmail.com>

* don't pass in 'updated_at' to UpdateEmoji()

Signed-off-by: kim <grufwub@gmail.com>

* use new ap.Resolve{Account,Status}able() functions

Signed-off-by: kim <grufwub@gmail.com>

* remove the somewhat confusing rescoping of the same variable names

Signed-off-by: kim <grufwub@gmail.com>

* update migration file name, improved database delete error returns

Signed-off-by: kim <grufwub@gmail.com>

* formatting

Signed-off-by: kim <grufwub@gmail.com>

* improved multi-delete database functions to minimise DB calls

Signed-off-by: kim <grufwub@gmail.com>

* remove unused type

Signed-off-by: kim <grufwub@gmail.com>

* fix delete statements

Signed-off-by: kim <grufwub@gmail.com>

---------

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2023-05-12 10:15:54 +01:00 committed by GitHub
commit 6c9d8e78eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
55 changed files with 1552 additions and 1118 deletions

View file

@ -28,6 +28,7 @@ type Status struct {
ID string `validate:"required,ulid" bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `validate:"-" bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `validate:"-" bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
FetchedAt time.Time `validate:"required_with=!Local" bun:"type:timestamptz,nullzero"` // when was item (remote) last fetched.
PinnedAt time.Time `validate:"-" bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time.
URI string `validate:"required,url" bun:",unique,nullzero,notnull"` // activitypub URI of this status
URL string `validate:"url" bun:",nullzero"` // web url for viewing this status
@ -87,24 +88,43 @@ func (s *Status) GetBoostOfAccountID() string {
return s.BoostOfAccountID
}
func (s *Status) GetAttachmentByID(id string) (*MediaAttachment, bool) {
for _, media := range s.Attachments {
if media == nil {
log.Warnf(nil, "nil attachment in slice for status %s", s.URI)
continue
}
if media.ID == id {
return media, true
}
}
return nil, false
}
func (s *Status) GetAttachmentByRemoteURL(url string) (*MediaAttachment, bool) {
for _, media := range s.Attachments {
if media == nil {
log.Warnf(nil, "nil attachment in slice for status %s", s.URI)
continue
}
if media.RemoteURL == url {
return media, true
}
}
return nil, false
}
// AttachmentsPopulated returns whether media attachments are populated according to current AttachmentIDs.
func (s *Status) AttachmentsPopulated() bool {
if len(s.AttachmentIDs) != len(s.Attachments) {
// this is the quickest indicator.
return false
}
// Attachments must be in same order.
for i, id := range s.AttachmentIDs {
if s.Attachments[i] == nil {
log.Warnf(nil, "nil attachment in slice for status %s", s.URI)
continue
}
if s.Attachments[i].ID != id {
for _, id := range s.AttachmentIDs {
if _, ok := s.GetAttachmentByID(id); !ok {
return false
}
}
return true
}
@ -129,24 +149,43 @@ func (s *Status) TagsPopulated() bool {
return true
}
func (s *Status) GetMentionByID(id string) (*Mention, bool) {
for _, mention := range s.Mentions {
if mention == nil {
log.Warnf(nil, "nil mention in slice for status %s", s.URI)
continue
}
if mention.ID == id {
return mention, true
}
}
return nil, false
}
func (s *Status) GetMentionByTargetURI(uri string) (*Mention, bool) {
for _, mention := range s.Mentions {
if mention == nil {
log.Warnf(nil, "nil mention in slice for status %s", s.URI)
continue
}
if mention.TargetAccountURI == uri {
return mention, true
}
}
return nil, false
}
// MentionsPopulated returns whether mentions are populated according to current MentionIDs.
func (s *Status) MentionsPopulated() bool {
if len(s.MentionIDs) != len(s.Mentions) {
// this is the quickest indicator.
return false
}
// Mentions must be in same order.
for i, id := range s.MentionIDs {
if s.Mentions[i] == nil {
log.Warnf(nil, "nil mention in slice for status %s", s.URI)
continue
}
if s.Mentions[i].ID != id {
for _, id := range s.MentionIDs {
if _, ok := s.GetMentionByID(id); !ok {
return false
}
}
return true
}
@ -171,6 +210,36 @@ func (s *Status) EmojisPopulated() bool {
return true
}
// EmojissUpToDate returns whether status emoji attachments of receiving status are up-to-date
// according to emoji attachments of the passed status, by comparing their emoji URIs. We don't
// use IDs as this is used to determine whether there are new emojis to fetch.
func (s *Status) EmojisUpToDate(other *Status) bool {
if len(s.Emojis) != len(other.Emojis) {
// this is the quickest indicator.
return false
}
// Emojis must be in same order.
for i := range s.Emojis {
if s.Emojis[i] == nil {
log.Warnf(nil, "nil emoji in slice for status %s", s.URI)
return false
}
if other.Emojis[i] == nil {
log.Warnf(nil, "nil emoji in slice for status %s", other.URI)
return false
}
if s.Emojis[i].URI != other.Emojis[i].URI {
// Emoji URI has changed, not up-to-date!
return false
}
}
return true
}
// MentionsAccount returns whether status mentions the given account ID.
func (s *Status) MentionsAccount(id string) bool {
for _, mention := range s.Mentions {