mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-03 18:02:26 -06:00 
			
		
		
		
	* search for mentions also by username,domain in status deref, handle deleted statuses in enrichStatusSafely() * return d.enrichStatusSafely() directly
		
			
				
	
	
		
			420 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			420 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// GoToSocial
 | 
						|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
 | 
						|
// SPDX-License-Identifier: AGPL-3.0-or-later
 | 
						|
//
 | 
						|
// This program is free software: you can redistribute it and/or modify
 | 
						|
// it under the terms of the GNU Affero General Public License as published by
 | 
						|
// the Free Software Foundation, either version 3 of the License, or
 | 
						|
// (at your option) any later version.
 | 
						|
//
 | 
						|
// This program is distributed in the hope that it will be useful,
 | 
						|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
// GNU Affero General Public License for more details.
 | 
						|
//
 | 
						|
// You should have received a copy of the GNU Affero General Public License
 | 
						|
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
package dereferencing
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"net/http"
 | 
						|
	"net/url"
 | 
						|
 | 
						|
	"codeberg.org/gruf/go-kv"
 | 
						|
	"github.com/superseriousbusiness/activity/pub"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/ap"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/config"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
 | 
						|
	"github.com/superseriousbusiness/gotosocial/internal/log"
 | 
						|
)
 | 
						|
 | 
						|
// maxIter defines how many iterations of descendants or
 | 
						|
// ancesters we are willing to follow before returning error.
 | 
						|
const maxIter = 512
 | 
						|
 | 
						|
// dereferenceThread handles dereferencing status thread after
 | 
						|
// fetch. Passing off appropriate parts to be enqueued for async
 | 
						|
// processing, or handling some parts synchronously when required.
 | 
						|
func (d *Dereferencer) dereferenceThread(
 | 
						|
	ctx context.Context,
 | 
						|
	requestUser string,
 | 
						|
	uri *url.URL,
 | 
						|
	status *gtsmodel.Status,
 | 
						|
	statusable ap.Statusable,
 | 
						|
	isNew bool,
 | 
						|
) {
 | 
						|
	if isNew {
 | 
						|
		// This is a new status that we need the ancestors of in
 | 
						|
		// order to determine visibility. Perform the initial part
 | 
						|
		// of thread dereferencing, i.e. parents, synchronously.
 | 
						|
		err := d.DereferenceStatusAncestors(ctx, requestUser, status)
 | 
						|
		if err != nil {
 | 
						|
			log.Error(ctx, err)
 | 
						|
		}
 | 
						|
 | 
						|
		// Enqueue dereferencing remaining status thread, (children), asychronously .
 | 
						|
		d.state.Workers.Dereference.Queue.Push(func(ctx context.Context) {
 | 
						|
			if err := d.DereferenceStatusDescendants(ctx, requestUser, uri, statusable); err != nil {
 | 
						|
				log.Error(ctx, err)
 | 
						|
			}
 | 
						|
		})
 | 
						|
	} else {
 | 
						|
		// This is an existing status, dereference the WHOLE thread asynchronously.
 | 
						|
		d.state.Workers.Dereference.Queue.Push(func(ctx context.Context) {
 | 
						|
			if err := d.DereferenceStatusAncestors(ctx, requestUser, status); err != nil {
 | 
						|
				log.Error(ctx, err)
 | 
						|
			}
 | 
						|
			if err := d.DereferenceStatusDescendants(ctx, requestUser, uri, statusable); err != nil {
 | 
						|
				log.Error(ctx, err)
 | 
						|
			}
 | 
						|
		})
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// DereferenceStatusAncestors iterates upwards from the given status, using InReplyToURI, to ensure that as many parent statuses as possible are dereferenced.
 | 
						|
func (d *Dereferencer) DereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
 | 
						|
	// Start log entry with fields
 | 
						|
	l := log.WithContext(ctx).
 | 
						|
		WithFields(kv.Fields{
 | 
						|
			{"username", username},
 | 
						|
			{"original", status.URI},
 | 
						|
		}...)
 | 
						|
 | 
						|
	// Keep track of already dereferenced statuses
 | 
						|
	// for this ancestor thread to prevent recursion.
 | 
						|
	derefdStatuses := make(map[string]struct{}, 10)
 | 
						|
 | 
						|
	// Mark given status as the one
 | 
						|
	// we're currently working on.
 | 
						|
	current := status
 | 
						|
 | 
						|
	for i := 0; i < maxIter; i++ {
 | 
						|
		if current.InReplyToURI == "" {
 | 
						|
			// Status has no parent, we've
 | 
						|
			// reached the top of the chain.
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
 | 
						|
		// Apparent current parent URI to log fields.
 | 
						|
		l = l.WithField("parent", current.InReplyToURI)
 | 
						|
		l.Trace("following status ancestor")
 | 
						|
 | 
						|
		// Check whether this parent has already been deref'd.
 | 
						|
		if _, ok := derefdStatuses[current.InReplyToURI]; ok {
 | 
						|
			l.Warn("self referencing status ancestor")
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
 | 
						|
		// Add this status's parent URI to map of deref'd.
 | 
						|
		derefdStatuses[current.InReplyToURI] = struct{}{}
 | 
						|
 | 
						|
		// Parse status parent URI for later use.
 | 
						|
		uri, err := url.Parse(current.InReplyToURI)
 | 
						|
		if err != nil {
 | 
						|
			l.Warnf("invalid uri: %v", err)
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
 | 
						|
		// Fetch parent status by current's reply URI, this handles
 | 
						|
		// case of existing (updating if necessary) or a new status.
 | 
						|
		parent, _, _, err := d.getStatusByURI(ctx, username, uri)
 | 
						|
 | 
						|
		// Check for a returned HTTP code via error.
 | 
						|
		switch code := gtserror.StatusCode(err); {
 | 
						|
 | 
						|
		// 404 may indicate deletion, but can also
 | 
						|
		// indicate that we don't have permission to
 | 
						|
		// view the status (it's followers-only and
 | 
						|
		// we don't follow, for example).
 | 
						|
		case code == http.StatusNotFound:
 | 
						|
 | 
						|
			// If this reply is followers-only or stricter,
 | 
						|
			// we can safely assume the status it replies
 | 
						|
			// to is also followers only or stricter.
 | 
						|
			//
 | 
						|
			// In this case we should leave the inReplyTo
 | 
						|
			// URI in place for visibility filtering,
 | 
						|
			// and just return since we can go no further.
 | 
						|
			if status.Visibility == gtsmodel.VisibilityFollowersOnly ||
 | 
						|
				status.Visibility == gtsmodel.VisibilityMutualsOnly ||
 | 
						|
				status.Visibility == gtsmodel.VisibilityDirect {
 | 
						|
				return nil
 | 
						|
			}
 | 
						|
 | 
						|
			// If the reply is public or unlisted then
 | 
						|
			// likely the replied-to status is/was public
 | 
						|
			// or unlisted and has indeed been deleted,
 | 
						|
			// fall through to the Gone case to clean up.
 | 
						|
			fallthrough
 | 
						|
 | 
						|
		// Gone (410) definitely indicates deletion.
 | 
						|
		// Update the status to remove references to
 | 
						|
		// the now-gone parent.
 | 
						|
		case code == http.StatusGone:
 | 
						|
			l.Trace("status orphaned")
 | 
						|
			current.InReplyTo = nil
 | 
						|
			current.InReplyToAccount = nil
 | 
						|
			return d.updateStatusParent(ctx,
 | 
						|
				current,
 | 
						|
				"", // status ID
 | 
						|
				"", // status URI
 | 
						|
				"", // account ID
 | 
						|
			)
 | 
						|
 | 
						|
		// An error was returned for a status during
 | 
						|
		// an attempted NEW dereference, return here.
 | 
						|
		//
 | 
						|
		// NOTE: this will catch all cases of a nil
 | 
						|
		// parent, all cases below can safely assume
 | 
						|
		// a non-nil parent in their code logic.
 | 
						|
		case err != nil && parent == nil:
 | 
						|
			return gtserror.Newf("error dereferencing new %s: %w", current.InReplyToURI, err)
 | 
						|
 | 
						|
		// An error was returned for an existing parent,
 | 
						|
		// we simply treat this as a temporary situation.
 | 
						|
		case err != nil:
 | 
						|
			l.Errorf("error getting parent: %v", err)
 | 
						|
		}
 | 
						|
 | 
						|
		// Start a new switch case
 | 
						|
		// as the following scenarios
 | 
						|
		// are possible with / without
 | 
						|
		// any returned error.
 | 
						|
		switch {
 | 
						|
 | 
						|
		// The current status is using an indirect URL
 | 
						|
		// in order to reference the parent. This is just
 | 
						|
		// weird and broken... Leave the URI in place but
 | 
						|
		// don't link the statuses via database IDs as it
 | 
						|
		// could cause all sorts of unexpected situations.
 | 
						|
		case current.InReplyToURI != parent.URI:
 | 
						|
			l.Errorf("indirect in_reply_to_uri => %s", parent.URI)
 | 
						|
 | 
						|
		// The ID has changed for currently stored parent ID
 | 
						|
		// (which may be empty, if new!) and fetched version.
 | 
						|
		//
 | 
						|
		// Update the current's inReplyTo fields to parent.
 | 
						|
		case current.InReplyToID != parent.ID:
 | 
						|
			l.Tracef("parent changed %s => %s", current.InReplyToID, parent.ID)
 | 
						|
			current.InReplyToAccount = parent.Account
 | 
						|
			if err := d.updateStatusParent(ctx,
 | 
						|
				current,
 | 
						|
				parent.ID,
 | 
						|
				parent.URI,
 | 
						|
				parent.AccountID,
 | 
						|
			); err != nil {
 | 
						|
				return err
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// Set next parent to use.
 | 
						|
		current.InReplyTo = parent
 | 
						|
		current = current.InReplyTo
 | 
						|
	}
 | 
						|
 | 
						|
	return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
 | 
						|
}
 | 
						|
 | 
						|
// DereferenceStatusDescendents iterates downwards from the given status, using its replies, to ensure that as many children statuses as possible are dereferenced.
 | 
						|
func (d *Dereferencer) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
 | 
						|
	statusIRIStr := statusIRI.String()
 | 
						|
 | 
						|
	// Start log entry with fields
 | 
						|
	l := log.WithContext(ctx).
 | 
						|
		WithFields(kv.Fields{
 | 
						|
			{"username", username},
 | 
						|
			{"status", statusIRIStr},
 | 
						|
		}...)
 | 
						|
 | 
						|
	// Log function start
 | 
						|
	l.Trace("beginning")
 | 
						|
 | 
						|
	// OUR instance hostname.
 | 
						|
	localhost := config.GetHost()
 | 
						|
 | 
						|
	// Keep track of already dereferenced collection
 | 
						|
	// pages for this thread to prevent recursion.
 | 
						|
	derefdPages := make(map[string]struct{}, 16)
 | 
						|
 | 
						|
	// frame represents a single stack frame when
 | 
						|
	// iteratively derefencing status descendants.
 | 
						|
	type frame struct {
 | 
						|
		// page is the current activity streams
 | 
						|
		// collection page we are on (as we often
 | 
						|
		// push a frame to stack mid-paging).
 | 
						|
		page ap.CollectionPageIterator
 | 
						|
 | 
						|
		// pageURI is the URI string of
 | 
						|
		// the frame's collection page
 | 
						|
		// (is useful for logging).
 | 
						|
		pageURI string
 | 
						|
	}
 | 
						|
 | 
						|
	var (
 | 
						|
		// current stack frame
 | 
						|
		current *frame
 | 
						|
 | 
						|
		// stack is a list of "shelved" descendand iterator
 | 
						|
		// frames. this is pushed to when a child status frame
 | 
						|
		// is found that we need to further iterate down, and
 | 
						|
		// popped from into 'current' when that child's tree
 | 
						|
		// of further descendants is exhausted.
 | 
						|
		stack = []*frame{
 | 
						|
			func() *frame {
 | 
						|
				// Start input frame is built from the first input.
 | 
						|
				page, pageURI := getAttachedStatusCollectionPage(parent)
 | 
						|
				if page == nil {
 | 
						|
					return nil
 | 
						|
				}
 | 
						|
				return &frame{page: page, pageURI: pageURI}
 | 
						|
			}(),
 | 
						|
		}
 | 
						|
 | 
						|
		// popStack will remove and return the top frame
 | 
						|
		// from the stack, or nil if currently empty.
 | 
						|
		popStack = func() *frame {
 | 
						|
			if len(stack) == 0 {
 | 
						|
				return nil
 | 
						|
			}
 | 
						|
 | 
						|
			// Get frame index
 | 
						|
			idx := len(stack) - 1
 | 
						|
 | 
						|
			// Pop last frame
 | 
						|
			frame := stack[idx]
 | 
						|
			stack = stack[:idx]
 | 
						|
 | 
						|
			return frame
 | 
						|
		}
 | 
						|
	)
 | 
						|
 | 
						|
stackLoop:
 | 
						|
	for i := 0; i < maxIter; i++ {
 | 
						|
		// Pop next frame, nil means we are at end
 | 
						|
		if current = popStack(); current == nil {
 | 
						|
			return nil
 | 
						|
		}
 | 
						|
 | 
						|
	pageLoop:
 | 
						|
		for {
 | 
						|
			l.Tracef("following collection page: %s", current.pageURI)
 | 
						|
 | 
						|
		itemLoop:
 | 
						|
			for {
 | 
						|
				// Get next item from page iter.
 | 
						|
				next := current.page.NextItem()
 | 
						|
				if next == nil {
 | 
						|
					break itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				// Check for available IRI.
 | 
						|
				itemIRI, _ := pub.ToId(next)
 | 
						|
				if itemIRI == nil {
 | 
						|
					continue itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				if itemIRI.Host == localhost {
 | 
						|
					// This child is one of ours,
 | 
						|
					continue itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				// Dereference the remote status and store in the database.
 | 
						|
				// getStatusByURI guards against the following conditions:
 | 
						|
				//   - refetching recently fetched statuses (recursion!)
 | 
						|
				//   - remote domain is blocked (will return unretrievable)
 | 
						|
				//   - any http type error for a new status returns unretrievable
 | 
						|
				_, statusable, _, err := d.getStatusByURI(ctx, username, itemIRI)
 | 
						|
				if err != nil {
 | 
						|
					l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
 | 
						|
					continue itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				if statusable == nil {
 | 
						|
					// A nil statusable return from
 | 
						|
					// getStatusByURI() indicates a
 | 
						|
					// remote status that was already
 | 
						|
					// dereferenced recently (so no
 | 
						|
					// need to go through descendents).
 | 
						|
					continue itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				// Extract any attached collection + ID URI from status.
 | 
						|
				page, pageURI := getAttachedStatusCollectionPage(statusable)
 | 
						|
				if page == nil {
 | 
						|
					continue itemLoop
 | 
						|
				}
 | 
						|
 | 
						|
				// Put current and next frame at top of stack
 | 
						|
				stack = append(stack, current, &frame{
 | 
						|
					pageURI: pageURI,
 | 
						|
					page:    page,
 | 
						|
				})
 | 
						|
 | 
						|
				// Now start at top of loop
 | 
						|
				continue stackLoop
 | 
						|
			}
 | 
						|
 | 
						|
			// Get the next page from iterator.
 | 
						|
			next := current.page.NextPage()
 | 
						|
			if next == nil || !next.IsIRI() {
 | 
						|
				continue stackLoop
 | 
						|
			}
 | 
						|
 | 
						|
			// Get the next page IRI.
 | 
						|
			nextURI := next.GetIRI()
 | 
						|
			nextURIStr := nextURI.String()
 | 
						|
 | 
						|
			// Check whether this page has already been deref'd.
 | 
						|
			if _, ok := derefdPages[nextURIStr]; ok {
 | 
						|
				l.Warnf("self referencing collection page(s): %s", nextURIStr)
 | 
						|
				continue stackLoop
 | 
						|
			}
 | 
						|
 | 
						|
			// Mark this collection page as deref'd.
 | 
						|
			derefdPages[nextURIStr] = struct{}{}
 | 
						|
 | 
						|
			// Dereference this next collection page by its IRI.
 | 
						|
			collectionPage, err := d.dereferenceCollectionPage(ctx,
 | 
						|
				username,
 | 
						|
				nextURI,
 | 
						|
			)
 | 
						|
			if err != nil {
 | 
						|
				l.Errorf("error dereferencing collection page %q: %s", nextURIStr, err)
 | 
						|
				continue stackLoop
 | 
						|
			}
 | 
						|
 | 
						|
			// Set the next collection page.
 | 
						|
			current.page = collectionPage
 | 
						|
			current.pageURI = nextURIStr
 | 
						|
			continue pageLoop
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr)
 | 
						|
}
 | 
						|
 | 
						|
// updateStatusParent updates the given status' parent
 | 
						|
// status URI, ID and account ID to given values in DB.
 | 
						|
func (d *Dereferencer) updateStatusParent(
 | 
						|
	ctx context.Context,
 | 
						|
	status *gtsmodel.Status,
 | 
						|
	parentStatusID string,
 | 
						|
	parentStatusURI string,
 | 
						|
	parentAccountID string,
 | 
						|
) error {
 | 
						|
	status.InReplyToAccountID = parentAccountID
 | 
						|
	status.InReplyToURI = parentStatusURI
 | 
						|
	status.InReplyToID = parentStatusID
 | 
						|
	if err := d.state.DB.UpdateStatus(ctx,
 | 
						|
		status,
 | 
						|
		"in_reply_to_id",
 | 
						|
		"in_reply_to_uri",
 | 
						|
		"in_reply_to_account_id",
 | 
						|
	); err != nil {
 | 
						|
		return gtserror.Newf("error updating status %s: %w", status.URI, err)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 |