[feature] persist worker queues to db (#3042)

* persist queued worker tasks to database on shutdown, fill worker queues from database on startup

* ensure the tasks are sorted by creation time before pushing them

* add migration to insert WorkerTask{} into database, add test for worker task persistence

* add test for recovering worker queues from database

* quick tweak

* whoops we ended up with double cleaner job scheduling

* insert each task separately, because bun is throwing some reflection error??

* add specific checking of cancelled worker contexts

* add http request signing to deliveries recovered from database

* add test for outgoing public key ID being correctly set on delivery

* replace select with Queue.PopCtx()

* get rid of loop now we don't use it

* remove field now we don't use it

* ensure that signing func is set

* header values weren't being copied over 🤦

* use ptr for httpclient.Request in delivery

* move worker queue filling to later in server init process

* fix rebase issues

* make logging less shouty

* use slices.Delete() instead of copying / reslicing

* have database return tasks in ascending order instead of sorting them

* add a 1 minute timeout to persisting worker queues
This commit is contained in:
kim 2024-07-30 11:58:31 +00:00 committed by GitHub
commit 87cff71af9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1191 additions and 93 deletions

View file

@ -19,6 +19,7 @@ package delivery
import (
"context"
"errors"
"slices"
"time"
@ -160,6 +161,13 @@ func (w *Worker) process(ctx context.Context) bool {
loop:
for {
// Before trying to get
// next delivery, check
// context still valid.
if ctx.Err() != nil {
return true
}
// Get next delivery.
dlv, ok := w.next(ctx)
if !ok {
@ -195,16 +203,30 @@ loop:
// Attempt delivery of AP request.
rsp, retry, err := w.Client.DoOnce(
&dlv.Request,
dlv.Request,
)
if err == nil {
switch {
case err == nil:
// Ensure body closed.
_ = rsp.Body.Close()
continue loop
}
if !retry {
case errors.Is(err, context.Canceled) &&
ctx.Err() != nil:
// In the case of our own context
// being cancelled, push delivery
// back onto queue for persisting.
//
// Note we specifically check against
// context.Canceled here as it will
// be faster than the mutex lock of
// ctx.Err(), so gives an initial
// faster check in the if-clause.
w.Queue.Push(dlv)
continue loop
case !retry:
// Drop deliveries when no
// retry requested, or they
// reached max (either).
@ -222,42 +244,36 @@ loop:
// next gets the next available delivery, blocking until available if necessary.
func (w *Worker) next(ctx context.Context) (*Delivery, bool) {
loop:
for {
// Try pop next queued.
dlv, ok := w.Queue.Pop()
// Try a fast-pop of queued
// delivery before anything.
dlv, ok := w.Queue.Pop()
if !ok {
// Check the backlog.
if len(w.backlog) > 0 {
if !ok {
// Check the backlog.
if len(w.backlog) > 0 {
// Sort by 'next' time.
sortDeliveries(w.backlog)
// Sort by 'next' time.
sortDeliveries(w.backlog)
// Pop next delivery.
dlv := w.popBacklog()
// Pop next delivery.
dlv := w.popBacklog()
return dlv, true
}
select {
// Backlog is empty, we MUST
// block until next enqueued.
case <-w.Queue.Wait():
continue loop
// Worker was stopped.
case <-ctx.Done():
return nil, false
}
return dlv, true
}
// Replace request context for worker state canceling.
ctx := gtscontext.WithValues(ctx, dlv.Request.Context())
dlv.Request.Request = dlv.Request.Request.WithContext(ctx)
return dlv, true
// Block on next delivery push
// OR worker context canceled.
dlv, ok = w.Queue.PopCtx(ctx)
if !ok {
return nil, false
}
}
// Replace request context for worker state canceling.
ctx = gtscontext.WithValues(ctx, dlv.Request.Context())
dlv.Request.Request = dlv.Request.Request.WithContext(ctx)
return dlv, true
}
// popBacklog pops next available from the backlog.