[bugfix] Update GetURL to extract url from Link objects with href (#4249)

# Description

> If this is a code change, please include a summary of what you've coded, and link to the issue(s) it closes/implements.
>
> If this is a documentation change, please briefly describe what you've changed and why.

This pull request updates our parsing of the `url` property in incoming ActivityPub items to also include Link items, and not just bare URIs.

The first discovered url is still used as the *gtsmodel.Account or *gtsmodel.Status `url` property, so this change only really affects our dereference URL anti-spoof check thingy.

~~Should fix https://codeberg.org/superseriousbusiness/gotosocial/issues/4248 but I need to run it and test it myself first to be sure.~~

Fixes https://codeberg.org/superseriousbusiness/gotosocial/issues/4248

## Checklist

Please put an x inside each checkbox to indicate that you've read and followed it: `[ ]` -> `[x]`

If this is a documentation change, only the first checkbox must be filled (you can delete the others if you want).

- [x] I/we have read the [GoToSocial contribution guidelines](https://codeberg.org/superseriousbusiness/gotosocial/src/branch/main/CONTRIBUTING.md).
- [x] I/we have discussed the proposed changes already, either in an issue on the repository, or in the Matrix chat.
- [x] I/we have not leveraged AI to create the proposed changes.
- [x] I/we have performed a self-review of added code.
- [x] I/we have written code that is legible and maintainable by others.
- [x] I/we have commented the added code, particularly in hard-to-understand areas.
- [ ] I/we have made any necessary changes to documentation.
- [x] I/we have added tests that cover new code.
- [x] I/we have run tests and they pass locally with the changes.
- [x] I/we have run `go fmt ./...` and `golangci-lint run`.

Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4249
Co-authored-by: tobi <tobi.smethurst@protonmail.com>
Co-committed-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
tobi 2025-06-09 12:32:00 +02:00 committed by tobi
commit 32738d83a8
5 changed files with 332 additions and 38 deletions

View file

@ -357,10 +357,14 @@ func ExtractIconURI(i WithIcon) (*url.URL, error) {
continue
}
imageURL, err := ExtractURL(image)
if err == nil && imageURL != nil {
return imageURL, nil
imageURL := GetURL(image)
if len(imageURL) == 0 {
// Nothing here.
continue
}
// Got a hit.
return imageURL[0], nil
}
return nil, gtserror.New("could not extract valid image URI from icon")
@ -399,10 +403,14 @@ func ExtractImageURI(i WithImage) (*url.URL, error) {
continue
}
imageURL, err := ExtractURL(image)
if err == nil && imageURL != nil {
return imageURL, nil
imageURL := GetURL(image)
if len(imageURL) == 0 {
// Nothing here.
continue
}
// Got a hit.
return imageURL[0], nil
}
return nil, gtserror.New("could not extract valid image URI from image")
@ -488,28 +496,6 @@ func ExtractFields(i WithAttachment) []*gtsmodel.Field {
return fields
}
// ExtractURL extracts the first URI it can find from the
// given WithURL interface, or an error if no URL was set.
// The ID of a type will not work, this function wants a URI
// specifically.
func ExtractURL(i WithURL) (*url.URL, error) {
urlProp := i.GetActivityStreamsUrl()
if urlProp == nil {
return nil, gtserror.New("url property was nil")
}
for iter := urlProp.Begin(); iter != urlProp.End(); iter = iter.Next() {
if !iter.IsIRI() {
continue
}
// Found it.
return iter.GetIRI(), nil
}
return nil, gtserror.New("no valid URL property found")
}
// ExtractPubKeyFromActor extracts the public key, public key ID, and public
// key owner ID from an interface, or an error if something goes wrong.
func ExtractPubKeyFromActor(i WithPublicKey) (
@ -676,15 +662,15 @@ func ExtractAttachments(i WithAttachment) ([]*gtsmodel.MediaAttachment, error) {
// (just remote URL, description, and blurhash) from the given
// Attachmentable interface, or an error if no remote URL is set.
func ExtractAttachment(i Attachmentable) (*gtsmodel.MediaAttachment, error) {
// Get the URL for the attachment file.
// Get the first URL for the attachment file.
// If no URL is set, we can't do anything.
remoteURL, err := ExtractURL(i)
if err != nil {
return nil, gtserror.Newf("error extracting attachment URL: %w", err)
remoteURL := GetURL(i)
if len(remoteURL) == 0 {
return nil, gtserror.New("empty attachment URL")
}
return &gtsmodel.MediaAttachment{
RemoteURL: remoteURL.String(),
RemoteURL: remoteURL[0].String(),
Description: ExtractDescription(i),
Blurhash: ExtractBlurhash(i),
FileMeta: gtsmodel.FileMeta{

View file

@ -35,7 +35,7 @@ func (suite *ExtractAttachmentsTestSuite) TestExtractAttachmentMissingURL() {
d1.SetActivityStreamsUrl(streams.NewActivityStreamsUrlProperty())
attachment, err := ap.ExtractAttachment(d1)
suite.EqualError(err, "ExtractAttachment: error extracting attachment URL: ExtractURL: no valid URL property found")
suite.EqualError(err, "ExtractAttachment: empty attachment URL")
suite.Nil(attachment)
}

View file

@ -135,7 +135,8 @@ func AppendBcc(with WithBcc, bcc ...*url.URL) {
}, bcc...)
}
// GetURL returns the IRIs contained in the URL property of 'with'.
// GetURL returns IRIs contained
// in the URL property of 'with'.
func GetURL(with WithURL) []*url.URL {
urlProp := with.GetActivityStreamsUrl()
if urlProp == nil || urlProp.Len() == 0 {
@ -144,9 +145,31 @@ func GetURL(with WithURL) []*url.URL {
urls := make([]*url.URL, 0, urlProp.Len())
for i := 0; i < urlProp.Len(); i++ {
at := urlProp.At(i)
// See if it's a plain URI.
if at.IsXMLSchemaAnyURI() {
u := at.GetXMLSchemaAnyURI()
urls = append(urls, u)
continue
}
// See if it's a Link obj
// with an href property.
if at.IsActivityStreamsLink() {
l := at.GetActivityStreamsLink()
hr := l.GetActivityStreamsHref()
if hr == nil {
// No href.
continue
}
if hr.IsXMLSchemaAnyURI() {
u := hr.Get()
urls = append(urls, u)
} else if hr.IsIRI() {
u := hr.GetIRI()
urls = append(urls, u)
}
}
}
return urls

View file

@ -0,0 +1,283 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package ap_test
import (
"bytes"
"io"
"net/url"
"testing"
"code.superseriousbusiness.org/gotosocial/internal/ap"
"code.superseriousbusiness.org/gotosocial/testrig"
"github.com/stretchr/testify/suite"
)
type PropertiesTestSuite struct {
suite.Suite
}
func (suite *PropertiesTestSuite) TestGetStatusableURL() {
// Pretty good representation of
// how a peertube video is federated.
const peertubeVideo = `{
"@context": [
"https://www.w3.org/ns/activitystreams"
],
"to": [
"https://www.w3.org/ns/activitystreams#Public"
],
"cc": [
"https://example.org/accounts/someone/followers"
],
"type": "Video",
"id": "https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2",
"url": [
{
"type": "Link",
"mediaType": "text/html",
"href": "https://example.org/w/jifTXYpdLJSU269svW8Jdb"
},
{
"type": "Link",
"mediaType": "text/html",
"href": "https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2"
},
{
"type": "Link",
"mediaType": "application/x-mpegURL",
"href": "https://example.org/static/streaming-playlists/hls/942d51e6-9320-4f40-980b-76bba0652bc2/3d412b0f-3f2e-4509-9d0f-0142223b1752-master.m3u8",
"tag": [
{
"type": "Infohash",
"name": "4b5a702f76333963655575616e627a5261426269"
},
{
"type": "Infohash",
"name": "4f6c5552324a39324a55447036735649586b4875"
},
{
"type": "Infohash",
"name": "476d4154793667574d6d594c7276523471364732"
},
{
"type": "Link",
"name": "sha256",
"mediaType": "application/json",
"href": "https://example.org/static/streaming-playlists/hls/942d51e6-9320-4f40-980b-76bba0652bc2/0c607a4c-ab78-4bed-aeef-9970abd88e77-segments-sha256.json"
},
{
"type": "Link",
"mediaType": "video/mp4",
"href": "https://example.org/static/streaming-playlists/hls/942d51e6-9320-4f40-980b-76bba0652bc2/c6b6c9fb-83da-425c-9ce6-680e00eb9ecb-480-fragmented.mp4",
"height": 480,
"width": 854,
"size": 11260985,
"fps": 30,
"attachment": [
{
"type": "PropertyValue",
"name": "ffprobe_codec_type",
"value": "video"
},
{
"type": "PropertyValue",
"name": "peertube_format_flag",
"value": "fragmented"
}
]
},
{
"type": "Link",
"rel": [
"metadata",
"video/mp4"
],
"mediaType": "application/json",
"href": "https://example.org/api/v1/videos/942d51e6-9320-4f40-980b-76bba0652bc2/metadata/236424",
"height": 480,
"width": 854,
"fps": 30
},
{
"type": "Link",
"mediaType": "application/x-bittorrent",
"href": "https://example.org/lazy-static/torrents/fac3fb9c-55a6-4e56-82f5-8de8a3f62d8f-480-hls.torrent",
"height": 480,
"width": 854,
"fps": 30
},
{
"type": "Link",
"mediaType": "application/x-bittorrent;x-scheme-handler/magnet",
"href": "magnet:?xs=https%3A%2F%2Fexample.org%2Flazy-static%2Ftorrents%2Ffac3fb9c-55a6-4e56-82f5-8de8a3f62d8f-480-hls.torrent&xt=urn:btih:b5a55918c3a05c2459156b6f34570ea64c69fd5a&dn=Na+proch%C3%A1zce+%E2%99%A5%EF%B8%8F+Walking+with+our+gang+%F0%9F%98%83&tr=https%3A%2F%2Fexample.org%2Ftracker%2Fannounce&tr=wss%3A%2F%2Fexample.org%3A443%2Ftracker%2Fsocket&ws=https%3A%2F%2Fexample.org%2Fstatic%2Fstreaming-playlists%2Fhls%2F942d51e6-9320-4f40-980b-76bba0652bc2%2Fc6b6c9fb-83da-425c-9ce6-680e00eb9ecb-480-fragmented.mp4",
"height": 480,
"width": 854,
"fps": 30
},
{
"type": "Link",
"mediaType": "video/mp4",
"href": "https://example.org/static/streaming-playlists/hls/942d51e6-9320-4f40-980b-76bba0652bc2/113ebf59-8e27-42f5-b971-d315f3fec77d-0-fragmented.mp4",
"height": 0,
"width": 0,
"size": 1472647,
"fps": 0,
"attachment": [
{
"type": "PropertyValue",
"name": "ffprobe_codec_type",
"value": "audio"
},
{
"type": "PropertyValue",
"name": "peertube_format_flag",
"value": "fragmented"
}
]
},
{
"type": "Link",
"rel": [
"metadata",
"video/mp4"
],
"mediaType": "application/json",
"href": "https://example.org/api/v1/videos/942d51e6-9320-4f40-980b-76bba0652bc2/metadata/236425",
"height": 0,
"width": 0,
"fps": 0
},
{
"type": "Link",
"mediaType": "application/x-bittorrent",
"href": "https://example.org/lazy-static/torrents/babc50e0-4643-4467-bde0-5d837d71fed5-0-hls.torrent",
"height": 0,
"width": 0,
"fps": 0
},
{
"type": "Link",
"mediaType": "application/x-bittorrent;x-scheme-handler/magnet",
"href": "magnet:?xs=https%3A%2F%2Fexample.org%2Flazy-static%2Ftorrents%2Fbabc50e0-4643-4467-bde0-5d837d71fed5-0-hls.torrent&xt=urn:btih:395086b81fae8b1b9f7d0a03375c66214acff459&dn=Na+proch%C3%A1zce+%E2%99%A5%EF%B8%8F+Walking+with+our+gang+%F0%9F%98%83&tr=https%3A%2F%2Fexample.org%2Ftracker%2Fannounce&tr=wss%3A%2F%2Fexample.org%3A443%2Ftracker%2Fsocket&ws=https%3A%2F%2Fexample.org%2Fstatic%2Fstreaming-playlists%2Fhls%2F942d51e6-9320-4f40-980b-76bba0652bc2%2F113ebf59-8e27-42f5-b971-d315f3fec77d-0-fragmented.mp4",
"height": 0,
"width": 0,
"fps": 0
}
]
},
{
"type": "Link",
"name": "tracker-http",
"rel": [
"tracker",
"http"
],
"href": "https://example.org/tracker/announce"
},
{
"type": "Link",
"name": "tracker-websocket",
"rel": [
"tracker",
"websocket"
],
"href": "wss://example.org:443/tracker/socket"
}
]
}`
// Mix of plain IRIs and Links,
// we should be able to parse this.
//
// The last one with no href should be ignored.
const mixedPlainURIsAndLinks = `{
"@context": [
"https://www.w3.org/ns/activitystreams"
],
"to": [
"https://www.w3.org/ns/activitystreams#Public"
],
"cc": [
"https://example.org/accounts/someone/followers"
],
"type": "Video",
"id": "https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2",
"url": [
"https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2",
{
"type": "Link",
"mediaType": "text/html",
"href": "https://example.org/w/jifTXYpdLJSU269svW8Jdb"
},
{
"type": "Link",
"mediaType": "text/html",
"href": "https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2"
},
{
"type": "Link",
"mediaType": "text/html"
}
]
}`
for i, test := range []struct {
in string
expectedURLs []*url.URL
}{
{
in: peertubeVideo,
expectedURLs: []*url.URL{
testrig.URLMustParse("https://example.org/w/jifTXYpdLJSU269svW8Jdb"),
testrig.URLMustParse("https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2"),
testrig.URLMustParse("https://example.org/static/streaming-playlists/hls/942d51e6-9320-4f40-980b-76bba0652bc2/3d412b0f-3f2e-4509-9d0f-0142223b1752-master.m3u8"),
testrig.URLMustParse("https://example.org/tracker/announce"),
testrig.URLMustParse("wss://example.org:443/tracker/socket"),
},
},
{
in: mixedPlainURIsAndLinks,
expectedURLs: []*url.URL{
testrig.URLMustParse("https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2"),
testrig.URLMustParse("https://example.org/w/jifTXYpdLJSU269svW8Jdb"),
testrig.URLMustParse("https://example.org/videos/watch/942d51e6-9320-4f40-980b-76bba0652bc2"),
},
},
} {
// Parse input to statusable.
statusable, err := ap.ResolveStatusable(
suite.T().Context(),
io.NopCloser(bytes.NewBufferString(test.in)),
)
if err != nil {
suite.FailNow(err.Error())
}
// Ensure URL fields as expected.
suite.EqualValues(
test.expectedURLs,
ap.GetURL(statusable),
"mismatch in test case %d", i,
)
}
}
func TestPropertiesTestSuite(t *testing.T) {
suite.Run(t, new(PropertiesTestSuite))
}

View file

@ -268,10 +268,12 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
// status.URL
//
// Web URL of this status (optional).
if statusURL, err := ap.ExtractURL(statusable); err == nil {
status.URL = statusURL.String()
if statusURL := ap.GetURL(statusable); len(statusURL) != 0 {
// Take the first hit.
status.URL = statusURL[0].String()
} else {
status.URL = status.URI // Fall back to the URI.
// Fall back to the URI.
status.URL = status.URI
}
// status.Content