mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 01:02:25 -05:00
~~Still WIP!~~ This PR allows v0.20.0 of GtS to be forward-compatible with the interaction request / authorization flow that will fully replace the current flow in v0.21.0. Basically, this means we need to recognize LikeRequest, ReplyRequest, and AnnounceRequest, and in response to those requests, deliver either a Reject or an Accept, with the latter pointing towards a LikeAuthorization, ReplyAuthorization, or AnnounceAuthorization, respectively. This can then be used by the remote instance to prove to third parties that the interaction has been accepted by the interactee. These Authorization types need to be dereferencable to third parties, so we need to serve them. As well as recognizing the above "polite" interaction request types, we also need to still serve appropriate responses to "impolite" interaction request types, where an instance that's unaware of interaction policies tries to interact with a post by sending a reply, like, or boost directly, without wrapping it in a WhateverRequest type. Doesn't fully close https://codeberg.org/superseriousbusiness/gotosocial/issues/4026 but gets damn near (just gotta update the federating with GtS documentation). Migrations tested on both Postgres and SQLite. Co-authored-by: kim <grufwub@gmail.com> Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4394 Co-authored-by: tobi <tobi.smethurst@protonmail.com> Co-committed-by: tobi <tobi.smethurst@protonmail.com>
422 lines
12 KiB
Go
422 lines
12 KiB
Go
// GoToSocial
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package transport
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"slices"
|
|
"strings"
|
|
|
|
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
|
|
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
|
|
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
|
|
"code.superseriousbusiness.org/gotosocial/internal/gtsmodel"
|
|
"code.superseriousbusiness.org/gotosocial/internal/id"
|
|
"code.superseriousbusiness.org/gotosocial/internal/log"
|
|
"code.superseriousbusiness.org/gotosocial/internal/util"
|
|
"code.superseriousbusiness.org/gotosocial/internal/validate"
|
|
"github.com/temoto/robotstxt"
|
|
)
|
|
|
|
func (t *transport) DereferenceInstance(ctx context.Context, iri *url.URL) (*gtsmodel.Instance, error) {
|
|
// Try to fetch robots.txt to check
|
|
// if we're allowed to try endpoints:
|
|
//
|
|
// - /api/v1/instance
|
|
// - /.well-known/nodeinfo
|
|
// - /nodeinfo/2.0|2.1 endpoints
|
|
robotsTxt, err := t.DereferenceRobots(ctx, iri.Scheme, iri.Host)
|
|
if err != nil {
|
|
log.Debugf(ctx, "couldn't fetch robots.txt from %s: %v", iri.Host, err)
|
|
}
|
|
|
|
var i *gtsmodel.Instance
|
|
|
|
// First try to dereference using /api/v1/instance.
|
|
// This will provide the most complete picture of an instance, and avoid unnecessary api calls.
|
|
//
|
|
// This will only work with Mastodon-api compatible instances: Mastodon, some Pleroma instances, GoToSocial.
|
|
log.Debugf(ctx, "trying to dereference instance %s by /api/v1/instance", iri.Host)
|
|
i, err = t.dereferenceByAPIV1Instance(ctx, iri, robotsTxt)
|
|
if err == nil {
|
|
log.Debugf(ctx, "successfully dereferenced instance using /api/v1/instance")
|
|
return i, nil
|
|
}
|
|
log.Debugf(ctx, "couldn't dereference instance using /api/v1/instance: %s", err)
|
|
|
|
// If that doesn't work, try to dereference using /.well-known/nodeinfo.
|
|
// This will involve two API calls and return less info overall, but should be more widely compatible.
|
|
log.Debugf(ctx, "trying to dereference instance %s by /.well-known/nodeinfo", iri.Host)
|
|
i, err = t.dereferenceByNodeInfo(ctx, iri, robotsTxt)
|
|
if err == nil {
|
|
log.Debugf(ctx, "successfully dereferenced instance using /.well-known/nodeinfo")
|
|
return i, nil
|
|
}
|
|
log.Debugf(ctx, "couldn't dereference instance using /.well-known/nodeinfo: %s", err)
|
|
|
|
// we couldn't dereference the instance using any of the known methods, so just return a minimal representation
|
|
log.Debugf(ctx, "returning minimal representation of instance %s", iri.Host)
|
|
return >smodel.Instance{
|
|
ID: id.NewRandomULID(),
|
|
Domain: iri.Host,
|
|
URI: iri.String(),
|
|
}, nil
|
|
}
|
|
|
|
func (t *transport) dereferenceByAPIV1Instance(
|
|
ctx context.Context,
|
|
iri *url.URL,
|
|
robotsTxt *robotstxt.RobotsData,
|
|
) (*gtsmodel.Instance, error) {
|
|
const path = "api/v1/instance"
|
|
|
|
// Bail if we're not allowed to fetch this endpoint.
|
|
if robotsTxt != nil && !robotsTxt.TestAgent("/"+path, t.controller.userAgent) {
|
|
err := gtserror.Newf("can't fetch %s: robots.txt disallows it", path)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
cleanIRI := &url.URL{
|
|
Scheme: iri.Scheme,
|
|
Host: iri.Host,
|
|
Path: path,
|
|
}
|
|
|
|
// Build IRI just once
|
|
iriStr := cleanIRI.String()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", iriStr, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Add("Accept", string(apiutil.AppJSON))
|
|
|
|
resp, err := t.GET(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Ensure a non-error status response.
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, gtserror.NewFromResponse(resp)
|
|
}
|
|
|
|
// Ensure that we can use data returned from this endpoint.
|
|
robots := resp.Header.Values("X-Robots-Tag")
|
|
if slices.ContainsFunc(
|
|
robots,
|
|
func(key string) bool {
|
|
return strings.Contains(key, "noindex")
|
|
},
|
|
) {
|
|
err := gtserror.Newf("can't use fetched %s: robots tags disallows it", path)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
// Ensure that the incoming request content-type is expected.
|
|
if ct := resp.Header.Get("Content-Type"); !apiutil.JSONContentType(ct) {
|
|
err := gtserror.Newf("non json response type: %s", ct)
|
|
return nil, gtserror.SetMalformed(err)
|
|
}
|
|
|
|
b, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
} else if len(b) == 0 {
|
|
return nil, errors.New("response bytes was len 0")
|
|
}
|
|
|
|
// Try to parse the returned bytes
|
|
// directly into an Instance model.
|
|
apiResp := &apimodel.InstanceV1{}
|
|
if err := json.Unmarshal(b, apiResp); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var contactUsername string
|
|
if apiResp.ContactAccount != nil {
|
|
contactUsername = apiResp.ContactAccount.Username
|
|
}
|
|
|
|
i := >smodel.Instance{
|
|
ID: id.NewRandomULID(),
|
|
Domain: iri.Host,
|
|
Title: apiResp.Title,
|
|
URI: iri.Scheme + "://" + iri.Host,
|
|
ShortDescription: apiResp.ShortDescription,
|
|
Description: apiResp.Description,
|
|
ContactEmail: apiResp.Email,
|
|
ContactAccountUsername: contactUsername,
|
|
Version: apiResp.Version,
|
|
}
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func (t *transport) dereferenceByNodeInfo(
|
|
ctx context.Context,
|
|
iri *url.URL,
|
|
robotsTxt *robotstxt.RobotsData,
|
|
) (*gtsmodel.Instance, error) {
|
|
// Retrieve the nodeinfo IRI from .well-known/nodeinfo.
|
|
niIRI, err := t.callNodeInfoWellKnown(ctx, iri, robotsTxt)
|
|
if err != nil {
|
|
return nil, gtserror.Newf("error during initial call to .well-known: %w", err)
|
|
}
|
|
|
|
// Use the returned nodeinfo IRI to make a followup call.
|
|
ni, err := t.callNodeInfo(ctx, niIRI, robotsTxt)
|
|
if err != nil {
|
|
return nil, gtserror.Newf("error during call to %s: %w", niIRI.String(), err)
|
|
}
|
|
|
|
// We got a response of some kind!
|
|
//
|
|
// Start building out the bare minimum
|
|
// instance model, we'll add to it if we can.
|
|
i := >smodel.Instance{
|
|
ID: id.NewRandomULID(),
|
|
Domain: iri.Host,
|
|
URI: iri.String(),
|
|
}
|
|
|
|
var title string
|
|
if i, present := ni.Metadata["nodeName"]; present {
|
|
// it's present, check it's a string
|
|
if v, ok := i.(string); ok {
|
|
// it is a string!
|
|
title = v
|
|
}
|
|
}
|
|
i.Title = title
|
|
|
|
var shortDescription string
|
|
if i, present := ni.Metadata["nodeDescription"]; present {
|
|
// it's present, check it's a string
|
|
if v, ok := i.(string); ok {
|
|
// it is a string!
|
|
shortDescription = v
|
|
}
|
|
}
|
|
i.ShortDescription = shortDescription
|
|
|
|
var contactEmail string
|
|
var contactAccountUsername string
|
|
if i, present := ni.Metadata["maintainer"]; present {
|
|
// it's present, check it's a map
|
|
if v, ok := i.(map[string]string); ok {
|
|
// see if there's an email in the map
|
|
if email, present := v["email"]; present {
|
|
if err := validate.Email(email); err == nil {
|
|
// valid email address
|
|
contactEmail = email
|
|
}
|
|
}
|
|
// see if there's a 'name' in the map
|
|
if name, present := v["name"]; present {
|
|
// name could be just a username, or could be a mention string eg @whatever@aaaa.com
|
|
username, _, err := util.ExtractNamestringParts(name)
|
|
if err == nil {
|
|
// it was a mention string
|
|
contactAccountUsername = username
|
|
} else {
|
|
// not a mention string
|
|
contactAccountUsername = name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
i.ContactEmail = contactEmail
|
|
i.ContactAccountUsername = contactAccountUsername
|
|
|
|
var software string
|
|
if ni.Software.Name != "" {
|
|
software = ni.Software.Name
|
|
}
|
|
if ni.Software.Version != "" {
|
|
software = software + " " + ni.Software.Version
|
|
}
|
|
i.Version = software
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func (t *transport) callNodeInfoWellKnown(
|
|
ctx context.Context,
|
|
iri *url.URL,
|
|
robotsTxt *robotstxt.RobotsData,
|
|
) (*url.URL, error) {
|
|
const path = ".well-known/nodeinfo"
|
|
|
|
// Bail if we're not allowed to fetch this endpoint.
|
|
if robotsTxt != nil && !robotsTxt.TestAgent("/"+path, t.controller.userAgent) {
|
|
err := gtserror.Newf("can't fetch %s: robots.txt disallows it", path)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
cleanIRI := &url.URL{
|
|
Scheme: iri.Scheme,
|
|
Host: iri.Host,
|
|
Path: path,
|
|
}
|
|
|
|
// Build IRI just once
|
|
iriStr := cleanIRI.String()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", iriStr, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Add("Accept", string(apiutil.AppJSON))
|
|
|
|
resp, err := t.GET(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Ensure a non-error status response.
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, gtserror.NewFromResponse(resp)
|
|
}
|
|
|
|
// Ensure that we can use data returned from this endpoint.
|
|
robots := resp.Header.Values("X-Robots-Tag")
|
|
if slices.ContainsFunc(
|
|
robots,
|
|
func(key string) bool {
|
|
return strings.Contains(key, "noindex")
|
|
},
|
|
) {
|
|
err := gtserror.Newf("can't use fetched %s: robots tags disallows it", path)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
// Ensure that the returned content-type is expected.
|
|
if ct := resp.Header.Get("Content-Type"); !apiutil.JSONContentType(ct) {
|
|
err := gtserror.Newf("non json response type: %s", ct)
|
|
return nil, gtserror.SetMalformed(err)
|
|
}
|
|
|
|
b, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
} else if len(b) == 0 {
|
|
return nil, gtserror.New("response bytes was len 0")
|
|
}
|
|
|
|
wellKnownResp := &apimodel.WellKnownResponse{}
|
|
if err := json.Unmarshal(b, wellKnownResp); err != nil {
|
|
return nil, gtserror.Newf("could not unmarshal server response as WellKnownResponse: %w", err)
|
|
}
|
|
|
|
// Look through the links for the first one that
|
|
// matches nodeinfo schema, this is what we need.
|
|
var nodeinfoHref *url.URL
|
|
for _, l := range wellKnownResp.Links {
|
|
if l.Href == "" || !strings.HasPrefix(l.Rel, "http://nodeinfo.diaspora.software/ns/schema/2") {
|
|
continue
|
|
}
|
|
nodeinfoHref, err = url.Parse(l.Href)
|
|
if err != nil {
|
|
return nil, gtserror.Newf("couldn't parse url %s: %w", l.Href, err)
|
|
}
|
|
}
|
|
if nodeinfoHref == nil {
|
|
return nil, gtserror.New("could not find nodeinfo rel in well known response")
|
|
}
|
|
|
|
return nodeinfoHref, nil
|
|
}
|
|
|
|
func (t *transport) callNodeInfo(
|
|
ctx context.Context,
|
|
iri *url.URL,
|
|
robotsTxt *robotstxt.RobotsData,
|
|
) (*apimodel.Nodeinfo, error) {
|
|
// Normalize robots.txt test path.
|
|
testPath := iri.Path
|
|
if !strings.HasPrefix(testPath, "/") {
|
|
testPath = "/" + testPath
|
|
}
|
|
|
|
// Bail if we're not allowed to fetch this endpoint.
|
|
if robotsTxt != nil && !robotsTxt.TestAgent(testPath, t.controller.userAgent) {
|
|
err := gtserror.Newf("can't fetch %s: robots.txt disallows it", testPath)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
// Build IRI just once
|
|
iriStr := iri.String()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", iriStr, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Add("Accept", string(apiutil.AppJSON))
|
|
|
|
resp, err := t.GET(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Ensure a non-error status response.
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, gtserror.NewFromResponse(resp)
|
|
}
|
|
|
|
// Ensure that the incoming request content-type is expected.
|
|
if ct := resp.Header.Get("Content-Type"); !apiutil.NodeInfo2ContentType(ct) {
|
|
err := gtserror.Newf("non nodeinfo schema 2.0 response: %s", ct)
|
|
return nil, gtserror.SetMalformed(err)
|
|
}
|
|
|
|
// Ensure that we can use data returned from this endpoint.
|
|
robots := resp.Header.Values("X-Robots-Tag")
|
|
if slices.ContainsFunc(
|
|
robots,
|
|
func(key string) bool {
|
|
return strings.Contains(key, "noindex")
|
|
},
|
|
) {
|
|
err := gtserror.Newf("can't use fetched %s: robots tags disallows it", iri.Path)
|
|
return nil, gtserror.SetNotPermitted(err)
|
|
}
|
|
|
|
b, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
} else if len(b) == 0 {
|
|
return nil, gtserror.New("response bytes was len 0")
|
|
}
|
|
|
|
niResp := &apimodel.Nodeinfo{}
|
|
if err := json.Unmarshal(b, niResp); err != nil {
|
|
return nil, gtserror.Newf("could not unmarshal server response as Nodeinfo: %w", err)
|
|
}
|
|
|
|
return niResp, nil
|
|
}
|