diff --git a/internal/ap/normalize.go b/internal/ap/normalize.go index 23035419f..5b88d9085 100644 --- a/internal/ap/normalize.go +++ b/internal/ap/normalize.go @@ -113,7 +113,7 @@ func normalizeContent(rawContent interface{}) string { // // TODO: sanitize differently based on mediaType. // https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype - content = text.SanitizeToHTML(content) + content = text.SanitizeHTML(content) content = text.MinifyHTML(content) return content } @@ -248,7 +248,7 @@ func NormalizeIncomingSummary(item WithSummary, rawJSON map[string]interface{}) // Summary should be HTML encoded: // https://www.w3.org/TR/activitystreams-vocabulary/#dfn-summary - summary = text.SanitizeToHTML(summary) + summary = text.SanitizeHTML(summary) summary = text.MinifyHTML(summary) // Set normalized summary property from the raw string; this @@ -339,7 +339,7 @@ func NormalizeIncomingName(item WithName, rawJSON map[string]interface{}) { // // todo: We probably want to update this to allow // *escaped* HTML markup, but for now just nuke it. - name = text.SanitizeToPlaintext(name) + name = text.RemoveHTML(name) // Set normalized name property from the raw string; this // will replace any existing name property on the item. @@ -369,7 +369,7 @@ func NormalizeIncomingValue(item WithValue, rawJSON map[string]interface{}) { // Value often contains links or // mentions or other little snippets. // Sanitize to HTML to allow these. - value = text.SanitizeToHTML(value) + value = text.SanitizeHTML(value) // Set normalized name property from the raw string; this // will replace any existing value property on the item. diff --git a/internal/api/client/admin/reportsget_test.go b/internal/api/client/admin/reportsget_test.go index 8639e0c6e..ec15b05d3 100644 --- a/internal/api/client/admin/reportsget_test.go +++ b/internal/api/client/admin/reportsget_test.go @@ -508,7 +508,7 @@ func (suite *ReportsGetTestSuite) TestReportsGetAll() { "muted": false, "bookmarked": false, "pinned": false, - "content": "dark souls status bot: \"thoughts of dog\"", + "content": "\u003cp\u003edark souls status bot: \"thoughts of dog\"\u003c/p\u003e", "reblog": null, "account": { "id": "01F8MH5ZK5VRH73AKHQM6Y9VNX", @@ -765,7 +765,7 @@ func (suite *ReportsGetTestSuite) TestReportsGetCreatedByAccount() { "muted": false, "bookmarked": false, "pinned": false, - "content": "dark souls status bot: \"thoughts of dog\"", + "content": "\u003cp\u003edark souls status bot: \"thoughts of dog\"\u003c/p\u003e", "reblog": null, "account": { "id": "01F8MH5ZK5VRH73AKHQM6Y9VNX", @@ -1022,7 +1022,7 @@ func (suite *ReportsGetTestSuite) TestReportsGetTargetAccount() { "muted": false, "bookmarked": false, "pinned": false, - "content": "dark souls status bot: \"thoughts of dog\"", + "content": "\u003cp\u003edark souls status bot: \"thoughts of dog\"\u003c/p\u003e", "reblog": null, "account": { "id": "01F8MH5ZK5VRH73AKHQM6Y9VNX", diff --git a/internal/api/client/search/searchget_test.go b/internal/api/client/search/searchget_test.go index 2c4efd19c..318010387 100644 --- a/internal/api/client/search/searchget_test.go +++ b/internal/api/client/search/searchget_test.go @@ -916,7 +916,7 @@ func (suite *SearchGetTestSuite) TestSearchAAny() { } suite.Len(searchResult.Accounts, 5) - suite.Len(searchResult.Statuses, 8) + suite.Len(searchResult.Statuses, 9) suite.Len(searchResult.Hashtags, 0) } @@ -959,7 +959,7 @@ func (suite *SearchGetTestSuite) TestSearchAAnyFollowingOnly() { } suite.Len(searchResult.Accounts, 2) - suite.Len(searchResult.Statuses, 8) + suite.Len(searchResult.Statuses, 9) suite.Len(searchResult.Hashtags, 0) } @@ -1002,7 +1002,7 @@ func (suite *SearchGetTestSuite) TestSearchAStatuses() { } suite.Len(searchResult.Accounts, 0) - suite.Len(searchResult.Statuses, 8) + suite.Len(searchResult.Statuses, 9) suite.Len(searchResult.Hashtags, 0) } diff --git a/internal/api/client/statuses/statusboost_test.go b/internal/api/client/statuses/statusboost_test.go index 51b7d7652..03bc10513 100644 --- a/internal/api/client/statuses/statusboost_test.go +++ b/internal/api/client/statuses/statusboost_test.go @@ -144,7 +144,7 @@ func (suite *StatusBoostTestSuite) TestPostBoost() { }, "bookmarked": true, "card": null, - "content": "hello world! #welcome ! first post on the instance :rainbow: !", + "content": "

hello world! #welcome ! first post on the instance :rainbow: !

", "created_at": "right the hell just now babyee", "edited_at": null, "emojis": [ @@ -330,7 +330,7 @@ func (suite *StatusBoostTestSuite) TestPostBoostOwnFollowersOnly() { }, "bookmarked": false, "card": null, - "content": "hi!", + "content": "

hi!

", "created_at": "right the hell just now babyee", "edited_at": null, "emojis": [], diff --git a/internal/api/client/statuses/statusfave_test.go b/internal/api/client/statuses/statusfave_test.go index 8851b4d58..5862039f7 100644 --- a/internal/api/client/statuses/statusfave_test.go +++ b/internal/api/client/statuses/statusfave_test.go @@ -103,7 +103,7 @@ func (suite *StatusFaveTestSuite) TestPostFave() { }, "bookmarked": false, "card": null, - "content": "๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•", + "content": "

๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•

", "created_at": "right the hell just now babyee", "edited_at": null, "emojis": [], diff --git a/internal/api/client/statuses/statushistory_test.go b/internal/api/client/statuses/statushistory_test.go index 61c15b58a..fe650402f 100644 --- a/internal/api/client/statuses/statushistory_test.go +++ b/internal/api/client/statuses/statushistory_test.go @@ -91,7 +91,7 @@ func (suite *StatusHistoryTestSuite) TestGetHistory() { suite.Equal(`[ { - "content": "hello everyone!", + "content": "\u003cp\u003ehello everyone!\u003c/p\u003e", "spoiler_text": "introduction post", "sensitive": true, "created_at": "2021-10-20T10:40:37.000Z", diff --git a/internal/api/client/statuses/statusmute_test.go b/internal/api/client/statuses/statusmute_test.go index 20ee590f8..bdc0cc0ad 100644 --- a/internal/api/client/statuses/statusmute_test.go +++ b/internal/api/client/statuses/statusmute_test.go @@ -108,7 +108,7 @@ func (suite *StatusMuteTestSuite) TestMuteUnmuteStatus() { "muted": true, "bookmarked": false, "pinned": false, - "content": "hello everyone!", + "content": "\u003cp\u003ehello everyone!\u003c/p\u003e", "reblog": null, "application": { "name": "really cool gts application", @@ -197,7 +197,7 @@ func (suite *StatusMuteTestSuite) TestMuteUnmuteStatus() { "muted": false, "bookmarked": false, "pinned": false, - "content": "hello everyone!", + "content": "\u003cp\u003ehello everyone!\u003c/p\u003e", "reblog": null, "application": { "name": "really cool gts application", diff --git a/internal/api/model/status.go b/internal/api/model/status.go index 2ee3123e6..a461b945f 100644 --- a/internal/api/model/status.go +++ b/internal/api/model/status.go @@ -123,6 +123,10 @@ type Status struct { type WebStatus struct { *Status + // HTML version of spoiler content + // (ie., not converted to plaintext). + SpoilerContent string `json:"-"` + // Override API account with web account. Account *WebAccount `json:"account"` diff --git a/internal/api/util/opengraph.go b/internal/api/util/opengraph.go index 094c80021..121f29595 100644 --- a/internal/api/util/opengraph.go +++ b/internal/api/util/opengraph.go @@ -67,7 +67,7 @@ func OGBase(instance *apimodel.InstanceV1) *OGMeta { } og := &OGMeta{ - Title: text.SanitizeToPlaintext(instance.Title) + " - GoToSocial", + Title: text.RemoveHTML(instance.Title) + " - GoToSocial", Type: "website", Locale: locale, URL: instance.URI, @@ -161,7 +161,7 @@ func AccountTitle(account *apimodel.WebAccount, accountDomain string) string { // ParseDescription returns a string description which is // safe to use as a template.HTMLAttr inside templates. func ParseDescription(in string) string { - i := text.SanitizeToPlaintext(in) + i := text.RemoveHTML(in) i = strings.ReplaceAll(i, "\n", " ") i = strings.Join(strings.Fields(i), " ") i = html.EscapeString(i) diff --git a/internal/cache/size.go b/internal/cache/size.go index 1c8c5fe2e..7641d6cf8 100644 --- a/internal/cache/size.go +++ b/internal/cache/size.go @@ -665,6 +665,7 @@ func sizeofStatus() uintptr { BoostOfID: exampleID, BoostOfAccountID: exampleID, ContentWarning: exampleUsername, // similar length + ContentWarningText: exampleUsername, // similar length Visibility: gtsmodel.VisibilityPublic, Sensitive: func() *bool { ok := false; return &ok }(), Language: "en", diff --git a/internal/db/bundb/migrations/20250305205820_content_warning_fixes.go b/internal/db/bundb/migrations/20250305205820_content_warning_fixes.go new file mode 100644 index 000000000..cf4de834c --- /dev/null +++ b/internal/db/bundb/migrations/20250305205820_content_warning_fixes.go @@ -0,0 +1,61 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package migrations + +import ( + "context" + "fmt" + "reflect" + + newmodel "github.com/superseriousbusiness/gotosocial/internal/db/bundb/migrations/20250305205820_content_warning_fixes" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + var newStatus *newmodel.Status + newStatusType := reflect.TypeOf(newStatus) + + // Generate new Status.ContentWarningText column definition from bun. + colDef, err := getBunColumnDef(tx, newStatusType, "ContentWarningText") + if err != nil { + return fmt.Errorf("error making column def: %w", err) + } + + log.Info(ctx, "adding statuses.content_warning_text column...") + _, err = tx.NewAddColumn().Model(newStatus). + ColumnExpr(colDef). + Exec(ctx) + if err != nil { + return fmt.Errorf("error adding column: %w", err) + } + + return nil + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return nil + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/bundb/migrations/20250305205820_content_warning_fixes/interactionpolicy.go b/internal/db/bundb/migrations/20250305205820_content_warning_fixes/interactionpolicy.go new file mode 100644 index 000000000..9895acc22 --- /dev/null +++ b/internal/db/bundb/migrations/20250305205820_content_warning_fixes/interactionpolicy.go @@ -0,0 +1,99 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package gtsmodel + +// A policy URI is GoToSocial's internal representation of +// one ActivityPub URI for an Actor or a Collection of Actors, +// specific to the domain of enforcing interaction policies. +// +// A PolicyValue can be stored in the database either as one +// of the Value constants defined below (to save space), OR as +// a full-fledged ActivityPub URI. +// +// A PolicyValue should be translated to the canonical string +// value of the represented URI when federating an item, or +// from the canonical string value of the URI when receiving +// or retrieving an item. +// +// For example, if the PolicyValue `followers` was being +// federated outwards in an interaction policy attached to an +// item created by the actor `https://example.org/users/someone`, +// then it should be translated to their followers URI when sent, +// eg., `https://example.org/users/someone/followers`. +// +// Likewise, if GoToSocial receives an item with an interaction +// policy containing `https://example.org/users/someone/followers`, +// and the item was created by `https://example.org/users/someone`, +// then the followers URI would be converted to `followers` +// for internal storage. +type PolicyValue string + +const ( + // Stand-in for ActivityPub magic public URI, + // which encompasses every possible Actor URI. + PolicyValuePublic PolicyValue = "public" + // Stand-in for the Followers Collection of + // the item owner's Actor. + PolicyValueFollowers PolicyValue = "followers" + // Stand-in for the Following Collection of + // the item owner's Actor. + PolicyValueFollowing PolicyValue = "following" + // Stand-in for the Mutuals Collection of + // the item owner's Actor. + // + // (TODO: Reserved, currently unused). + PolicyValueMutuals PolicyValue = "mutuals" + // Stand-in for Actor URIs tagged in the item. + PolicyValueMentioned PolicyValue = "mentioned" + // Stand-in for the Actor URI of the item owner. + PolicyValueAuthor PolicyValue = "author" +) + +type PolicyValues []PolicyValue + +// An InteractionPolicy determines which +// interactions will be accepted for an +// item, and according to what rules. +type InteractionPolicy struct { + // Conditions in which a Like + // interaction will be accepted + // for an item with this policy. + CanLike PolicyRules + // Conditions in which a Reply + // interaction will be accepted + // for an item with this policy. + CanReply PolicyRules + // Conditions in which an Announce + // interaction will be accepted + // for an item with this policy. + CanAnnounce PolicyRules +} + +// PolicyRules represents the rules according +// to which a certain interaction is permitted +// to various Actor and Actor Collection URIs. +type PolicyRules struct { + // Always is for PolicyValues who are + // permitted to do an interaction + // without requiring approval. + Always PolicyValues + // WithApproval is for PolicyValues who + // are conditionally permitted to do + // an interaction, pending approval. + WithApproval PolicyValues +} diff --git a/internal/db/bundb/migrations/20250305205820_content_warning_fixes/status.go b/internal/db/bundb/migrations/20250305205820_content_warning_fixes/status.go new file mode 100644 index 000000000..b48591937 --- /dev/null +++ b/internal/db/bundb/migrations/20250305205820_content_warning_fixes/status.go @@ -0,0 +1,62 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package gtsmodel + +import ( + "time" +) + +type Status struct { + ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` + CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` + EditedAt time.Time `bun:"type:timestamptz,nullzero"` + FetchedAt time.Time `bun:"type:timestamptz,nullzero"` + PinnedAt time.Time `bun:"type:timestamptz,nullzero"` + URI string `bun:",unique,nullzero,notnull"` + URL string `bun:",nullzero"` + Content string `bun:""` + AttachmentIDs []string `bun:"attachments,array"` + TagIDs []string `bun:"tags,array"` + MentionIDs []string `bun:"mentions,array"` + EmojiIDs []string `bun:"emojis,array"` + Local *bool `bun:",nullzero,notnull,default:false"` + AccountID string `bun:"type:CHAR(26),nullzero,notnull"` + AccountURI string `bun:",nullzero,notnull"` + InReplyToID string `bun:"type:CHAR(26),nullzero"` + InReplyToURI string `bun:",nullzero"` + InReplyToAccountID string `bun:"type:CHAR(26),nullzero"` + BoostOfID string `bun:"type:CHAR(26),nullzero"` + BoostOfAccountID string `bun:"type:CHAR(26),nullzero"` + ThreadID string `bun:"type:CHAR(26),nullzero"` + EditIDs []string `bun:"edits,array"` + PollID string `bun:"type:CHAR(26),nullzero"` + ContentWarning string `bun:",nullzero"` + ContentWarningText string `bun:""` + Visibility Visibility `bun:",nullzero,notnull"` + Sensitive *bool `bun:",nullzero,notnull,default:false"` + Language string `bun:",nullzero"` + CreatedWithApplicationID string `bun:"type:CHAR(26),nullzero"` + ActivityStreamsType string `bun:",nullzero,notnull"` + Text string `bun:""` + Federated *bool `bun:",notnull"` + InteractionPolicy *InteractionPolicy `bun:""` + PendingApproval *bool `bun:",nullzero,notnull,default:false"` + ApprovedByURI string `bun:",nullzero"` +} + +type Visibility int16 diff --git a/internal/gtsmodel/status.go b/internal/gtsmodel/status.go index e170e7464..006ba06b6 100644 --- a/internal/gtsmodel/status.go +++ b/internal/gtsmodel/status.go @@ -33,7 +33,7 @@ type Status struct { PinnedAt time.Time `bun:"type:timestamptz,nullzero"` // Status was pinned by owning account at this time. URI string `bun:",unique,nullzero,notnull"` // activitypub URI of this status URL string `bun:",nullzero"` // web url for viewing this status - Content string `bun:""` // content of this status; likely html-formatted but not guaranteed + Content string `bun:""` // Content HTML for this status. AttachmentIDs []string `bun:"attachments,array"` // Database IDs of any media attachments associated with this status Attachments []*MediaAttachment `bun:"attached_media,rel:has-many"` // Attachments corresponding to attachmentIDs TagIDs []string `bun:"tags,array"` // Database IDs of any tags used in this status @@ -61,7 +61,8 @@ type Status struct { Edits []*StatusEdit `bun:"-"` // PollID string `bun:"type:CHAR(26),nullzero"` // Poll *Poll `bun:"-"` // - ContentWarning string `bun:",nullzero"` // cw string for this status + ContentWarning string `bun:",nullzero"` // Content warning HTML for this status. + ContentWarningText string `bun:""` // Original text of the content warning without formatting Visibility Visibility `bun:",nullzero,notnull"` // visibility entry for this status Sensitive *bool `bun:",nullzero,notnull,default:false"` // mark the status as sensitive? Language string `bun:",nullzero"` // what language is this status written in? diff --git a/internal/processing/account/rss_test.go b/internal/processing/account/rss_test.go index 5606151c2..0733b9d79 100644 --- a/internal/processing/account/rss_test.go +++ b/internal/processing/account/rss_test.go @@ -43,10 +43,10 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSAdmin() { Wed, 20 Oct 2021 10:41:37 +0000 Wed, 20 Oct 2021 10:41:37 +0000 - open to see some puppies + open to see some <strong>puppies</strong> http://localhost:8080/@admin/statuses/01F8MHAAY43M6RJ473VQFCVH37 @admin@localhost:8080 made a new post: "๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•" - + ๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•๐Ÿ•

]]>
@admin@localhost:8080 http://localhost:8080/@admin/statuses/01F8MHAAY43M6RJ473VQFCVH37 Wed, 20 Oct 2021 12:36:45 +0000 @@ -56,7 +56,7 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSAdmin() { hello world! #welcome ! first post on the instance :rainbow: ! http://localhost:8080/@admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R @admin@localhost:8080 posted 1 attachment: "hello world! #welcome ! first post on the instance :rainbow: !" - !]]> + hello world! #welcome ! first post on the instance :rainbow: !

]]>
@admin@localhost:8080 http://localhost:8080/@admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R @@ -145,7 +145,7 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSZork() { introduction post http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY @the_mighty_zork@localhost:8080 made a new post: "hello everyone!" - + hello everyone!

]]>
@the_mighty_zork@localhost:8080 http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY Wed, 20 Oct 2021 10:40:37 +0000 diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 2bdbf96f4..e65589fc5 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -98,7 +98,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form } // Parse new display name (always from plaintext). - account.DisplayName = text.SanitizeToPlaintext(displayName) + account.DisplayName = text.RemoveHTML(displayName) acctColumns = append(acctColumns, "display_name") } @@ -145,7 +145,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form } if form.AvatarDescription != nil { - desc := text.SanitizeToPlaintext(*form.AvatarDescription) + desc := text.RemoveHTML(*form.AvatarDescription) form.AvatarDescription = &desc } @@ -175,7 +175,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form } if form.HeaderDescription != nil { - desc := text.SanitizeToPlaintext(*form.HeaderDescription) + desc := text.RemoveHTML(*form.HeaderDescription) form.HeaderDescription = util.Ptr(desc) } @@ -265,7 +265,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form return nil, gtserror.NewErrorBadRequest(err, err.Error()) } - account.Settings.CustomCSS = text.SanitizeToPlaintext(customCSS) + account.Settings.CustomCSS = text.RemoveHTML(customCSS) settingsColumns = append(settingsColumns, "custom_css") } @@ -356,8 +356,8 @@ func (p *Processor) updateFields( // Sanitize raw field values. fieldRaw := >smodel.Field{ - Name: text.SanitizeToPlaintext(name), - Value: text.SanitizeToPlaintext(value), + Name: text.RemoveHTML(name), + Value: text.RemoveHTML(value), } fieldsRaw = append(fieldsRaw, fieldRaw) } @@ -385,7 +385,7 @@ func (p *Processor) processAccountText( emojis := make(map[string]*gtsmodel.Emoji) // Retrieve display name emojis. - for _, emoji := range p.formatter.FromPlainEmojiOnly( + for _, emoji := range p.formatter.FromPlainBasic( ctx, p.parseMention, account.ID, @@ -413,7 +413,7 @@ func (p *Processor) processAccountText( // Name stays plain, but we still need to // see if there are any emojis set in it. field.Name = fieldRaw.Name - for _, emoji := range p.formatter.FromPlainEmojiOnly( + for _, emoji := range p.formatter.FromPlainBasic( ctx, p.parseMention, account.ID, diff --git a/internal/processing/admin/domainallow.go b/internal/processing/admin/domainallow.go index 13f0307f2..d752ef202 100644 --- a/internal/processing/admin/domainallow.go +++ b/internal/processing/admin/domainallow.go @@ -53,8 +53,8 @@ func (p *Processor) createDomainAllow( ID: id.NewULID(), Domain: domain, CreatedByAccountID: adminAcct.ID, - PrivateComment: text.SanitizeToPlaintext(privateComment), - PublicComment: text.SanitizeToPlaintext(publicComment), + PrivateComment: text.RemoveHTML(privateComment), + PublicComment: text.RemoveHTML(publicComment), Obfuscate: &obfuscate, SubscriptionID: subscriptionID, } diff --git a/internal/processing/admin/domainblock.go b/internal/processing/admin/domainblock.go index f8c1a6708..62a6d5fea 100644 --- a/internal/processing/admin/domainblock.go +++ b/internal/processing/admin/domainblock.go @@ -53,8 +53,8 @@ func (p *Processor) createDomainBlock( ID: id.NewULID(), Domain: domain, CreatedByAccountID: adminAcct.ID, - PrivateComment: text.SanitizeToPlaintext(privateComment), - PublicComment: text.SanitizeToPlaintext(publicComment), + PrivateComment: text.RemoveHTML(privateComment), + PublicComment: text.RemoveHTML(publicComment), Obfuscate: &obfuscate, SubscriptionID: subscriptionID, } diff --git a/internal/processing/instance.go b/internal/processing/instance.go index 2f4c40416..62a1685a0 100644 --- a/internal/processing/instance.go +++ b/internal/processing/instance.go @@ -165,7 +165,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe } // Don't allow html in site title. - instance.Title = text.SanitizeToPlaintext(title) + instance.Title = text.RemoveHTML(title) columns = append(columns, "title") } @@ -235,7 +235,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe return nil, gtserror.NewErrorBadRequest(err, err.Error()) } - instance.CustomCSS = text.SanitizeToPlaintext(customCSS) + instance.CustomCSS = text.RemoveHTML(customCSS) columns = append(columns, []string{"custom_css"}...) } diff --git a/internal/processing/media/update.go b/internal/processing/media/update.go index c8592395f..1eaa74764 100644 --- a/internal/processing/media/update.go +++ b/internal/processing/media/update.go @@ -87,7 +87,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, media // processDescription will sanitize and valid description against server configuration. func processDescription(description string) (string, gtserror.WithCode) { - description = text.SanitizeToPlaintext(description) + description = text.RemoveHTML(description) chars := len([]rune(description)) if min := config.GetMediaDescriptionMinChars(); chars < min { diff --git a/internal/processing/status/common.go b/internal/processing/status/common.go index 3f2b7b6cb..e1e6e1902 100644 --- a/internal/processing/status/common.go +++ b/internal/processing/status/common.go @@ -142,9 +142,13 @@ func (p *Processor) processContent( ) } - // format is the currently set text formatting - // function, according to the provided content-type. - var format text.FormatFunc + var ( + // format is the currently set text formatting + // function, according to the provided content-type. + format text.FormatFunc + // formatCW is like format, but for content warning. + formatCW text.FormatFunc + ) if contentType == "" { // If content type wasn't specified, use @@ -157,10 +161,12 @@ func (p *Processor) processContent( // Format status according to text/plain. case "", string(apimodel.StatusContentTypePlain): format = p.formatter.FromPlain + formatCW = p.formatter.FromPlainBasic // Format status according to text/markdown. case string(apimodel.StatusContentTypeMarkdown): format = p.formatter.FromMarkdown + formatCW = p.formatter.FromMarkdownBasic // Unknown. default: @@ -192,26 +198,23 @@ func (p *Processor) processContent( status.Emojis = contentRes.Emojis status.Tags = contentRes.Tags - // From here-on-out just use emoji-only - // plain-text formatting as the FormatFunc. - format = p.formatter.FromPlainEmojiOnly - // Sanitize content warning and format. - warning := text.SanitizeToPlaintext(contentWarning) - warningRes := formatInput(format, warning) + cwRes := formatInput(formatCW, contentWarning) // Gather results of the formatted. - status.ContentWarning = warningRes.HTML - status.Emojis = append(status.Emojis, warningRes.Emojis...) + status.ContentWarning = cwRes.HTML + status.Emojis = append(status.Emojis, cwRes.Emojis...) if poll != nil { // Pre-allocate slice of poll options of expected length. status.PollOptions = make([]string, len(poll.Options)) for i, option := range poll.Options { - // Sanitize each poll option and format. - option = text.SanitizeToPlaintext(option) - optionRes := formatInput(format, option) + // Strip each poll option and format. + // + // For polls just use basic formatting. + option = text.RemoveHTML(option) + optionRes := formatInput(p.formatter.FromPlainBasic, option) // Gather results of the formatted. status.PollOptions[i] = optionRes.HTML diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go index 727c12084..b6c265511 100644 --- a/internal/processing/status/create.go +++ b/internal/processing/status/create.go @@ -143,6 +143,14 @@ func (p *Processor) Create( } } + // Only store ContentWarningText if the parsed + // result is different from the given SpoilerText, + // otherwise skip to avoid duplicating db columns. + var contentWarningText string + if content.ContentWarning != form.SpoilerText { + contentWarningText = form.SpoilerText + } + status := >smodel.Status{ ID: statusID, URI: accountURIs.StatusesURI + "/" + statusID, @@ -160,9 +168,10 @@ func (p *Processor) Create( Language: content.Language, // Set formatted status content. - Content: content.Content, - ContentWarning: content.ContentWarning, - Text: form.Status, // raw + Content: content.Content, + ContentWarning: content.ContentWarning, + Text: form.Status, // raw + ContentWarningText: contentWarningText, // raw // Set gathered mentions. MentionIDs: content.MentionIDs, diff --git a/internal/processing/status/create_test.go b/internal/processing/status/create_test.go index 16cefcebf..6fe1c24d6 100644 --- a/internal/processing/status/create_test.go +++ b/internal/processing/status/create_test.go @@ -60,33 +60,6 @@ func (suite *StatusCreateTestSuite) TestProcessContentWarningWithQuotationMarks( suite.Equal("\"test\"", apiStatus.SpoilerText) } -func (suite *StatusCreateTestSuite) TestProcessContentWarningWithHTMLEscapedQuotationMarks() { - ctx := context.Background() - - creatingAccount := suite.testAccounts["local_account_1"] - creatingApplication := suite.testApplications["application_1"] - - statusCreateForm := &apimodel.StatusCreateRequest{ - Status: "poopoo peepee", - MediaIDs: []string{}, - Poll: nil, - InReplyToID: "", - Sensitive: false, - SpoilerText: ""test"", // the html-escaped quotation marks should appear as normal quotation marks in the finished text - Visibility: apimodel.VisibilityPublic, - LocalOnly: util.Ptr(false), - ScheduledAt: nil, - Language: "en", - ContentType: apimodel.StatusContentTypePlain, - } - - apiStatus, err := suite.status.Create(ctx, creatingAccount, creatingApplication, statusCreateForm) - suite.NoError(err) - suite.NotNil(apiStatus) - - suite.Equal("\"test\"", apiStatus.SpoilerText) -} - func (suite *StatusCreateTestSuite) TestProcessStatusMarkdownWithUnderscoreEmoji() { ctx := context.Background() diff --git a/internal/processing/status/delete.go b/internal/processing/status/delete.go index 700909f44..8fec8fc5e 100644 --- a/internal/processing/status/delete.go +++ b/internal/processing/status/delete.go @@ -50,6 +50,13 @@ func (p *Processor) Delete(ctx context.Context, requestingAccount *gtsmodel.Acco return nil, errWithCode } + // Replace content warning with raw + // version if it's available, to make + // delete + redraft work nicer. + if targetStatus.ContentWarningText != "" { + apiStatus.SpoilerText = targetStatus.ContentWarningText + } + // Process delete side effects. p.state.Workers.Client.Queue.Push(&messages.FromClientAPI{ APObjectType: ap.ObjectNote, diff --git a/internal/processing/status/edit.go b/internal/processing/status/edit.go index 95665074e..96fbbee27 100644 --- a/internal/processing/status/edit.go +++ b/internal/processing/status/edit.go @@ -297,13 +297,22 @@ func (p *Processor) Edit( // update the other necessary status fields. status.Content = content.Content status.ContentWarning = content.ContentWarning - status.Text = form.Status + status.Text = form.Status // raw status.Language = content.Language status.Sensitive = &form.Sensitive status.AttachmentIDs = form.MediaIDs status.Attachments = media status.EditedAt = now + // Only store ContentWarningText if the parsed + // result is different from the given SpoilerText, + // otherwise skip to avoid duplicating db columns. + var contentWarningText string + if content.ContentWarning != form.SpoilerText { + contentWarningText = form.SpoilerText + } + status.ContentWarningText = contentWarningText // raw + if poll != nil { // Set relevent fields for latest with poll. status.ActivityStreamsType = ap.ActivityQuestion diff --git a/internal/processing/status/get.go b/internal/processing/status/get.go index 812f01683..9ef52e0a6 100644 --- a/internal/processing/status/get.go +++ b/internal/processing/status/get.go @@ -52,9 +52,20 @@ func (p *Processor) SourceGet(ctx context.Context, requester *gtsmodel.Account, "target status not found", ) } + + // Try to use unparsed content + // warning text if available, + // fall back to parsed cw html. + var spoilerText string + if status.ContentWarningText != "" { + spoilerText = status.ContentWarningText + } else { + spoilerText = status.ContentWarning + } + return &apimodel.StatusSource{ ID: status.ID, Text: status.Text, - SpoilerText: status.ContentWarning, + SpoilerText: spoilerText, }, nil } diff --git a/internal/processing/stream/statusupdate_test.go b/internal/processing/stream/statusupdate_test.go index 180538c60..1f0bcd142 100644 --- a/internal/processing/stream/statusupdate_test.go +++ b/internal/processing/stream/statusupdate_test.go @@ -71,7 +71,7 @@ func (suite *StatusUpdateTestSuite) TestStreamNotification() { "muted": false, "bookmarked": false, "pinned": false, - "content": "dark souls status bot: \"thoughts of dog\"", + "content": "\u003cp\u003edark souls status bot: \"thoughts of dog\"\u003c/p\u003e", "reblog": null, "account": { "id": "01F8MH5ZK5VRH73AKHQM6Y9VNX", diff --git a/internal/processing/user/create.go b/internal/processing/user/create.go index d2891ef0e..fb7188ab9 100644 --- a/internal/processing/user/create.go +++ b/internal/processing/user/create.go @@ -122,7 +122,7 @@ func (p *Processor) Create( Username: form.Username, Email: form.Email, Password: form.Password, - Reason: text.SanitizeToPlaintext(reason), + Reason: text.RemoveHTML(reason), SignUpIP: form.IP, Locale: form.Locale, AppID: app.ID, diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go index 07e176278..a078111c1 100644 --- a/internal/text/formatter_test.go +++ b/internal/text/formatter_test.go @@ -93,6 +93,16 @@ func (suite *TextStandardTestSuite) FromMarkdown(input string) *text.FormatResul ) } +func (suite *TextStandardTestSuite) FromMarkdownBasic(input string) *text.FormatResult { + return suite.formatter.FromMarkdownBasic( + context.Background(), + suite.parseMention, + suite.testAccounts["local_account_1"].ID, + "dummy_status_ID", + input, + ) +} + func (suite *TextStandardTestSuite) FromPlain(input string) *text.FormatResult { return suite.formatter.FromPlain( context.Background(), diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 50cd6a141..4fa8bf9f8 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -20,6 +20,8 @@ package text import ( "bytes" "context" + "regexp" + "strings" "codeberg.org/gruf/go-byteutil" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -27,11 +29,15 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer/html" ) // FromMarkdown fulfils FormatFunc by parsing // the given markdown input into a FormatResult. +// +// Inline (aka unsafe) HTML elements are allowed, +// as they should be sanitized afterwards anyway. func (f *Formatter) FromMarkdown( ctx context.Context, parseMention gtsmodel.ParseMentionFunc, @@ -39,18 +45,79 @@ func (f *Formatter) FromMarkdown( statusID string, input string, ) *FormatResult { - result := new(FormatResult) + return f.fromMarkdown( + ctx, + false, // basic = false + parseMention, + authorID, + statusID, + input, + ) +} + +// FromMarkdownBasic fulfils FormatFunc by parsing +// the given markdown input into a FormatResult. +// +// Unlike FromMarkdown, it will only parse emojis with +// the custom renderer, leaving aside mentions and tags. +// +// Inline (aka unsafe) HTML elements are not allowed. +// +// If the result is a single paragraph, +// it will not be wrapped in

tags. +func (f *Formatter) FromMarkdownBasic( + ctx context.Context, + parseMention gtsmodel.ParseMentionFunc, + authorID string, + statusID string, + input string, +) *FormatResult { + res := f.fromMarkdown( + ctx, + true, // basic = true + parseMention, + authorID, + statusID, + input, + ) + + res.HTML = unwrapParagraph(res.HTML) + return res +} + +// fromMarkdown parses the given input text either +// with or without emojis, and returns the result. +func (f *Formatter) fromMarkdown( + ctx context.Context, + basic bool, + parseMention gtsmodel.ParseMentionFunc, + authorID string, + statusID string, + input string, +) *FormatResult { + var ( + result = new(FormatResult) + rendererOptions = []renderer.Option{ + html.WithXHTML(), + html.WithHardWraps(), + } + ) + + if !basic { + // Allow raw HTML. We sanitize + // at the end so this is OK. + rendererOptions = append( + rendererOptions, + html.WithUnsafe(), + ) + } // Instantiate goldmark parser for // markdown, using custom renderer // to add hashtag/mention links. md := goldmark.New( goldmark.WithRendererOptions( - html.WithXHTML(), - html.WithHardWraps(), - // Allows raw HTML. We sanitize - // at the end so this is OK. - html.WithUnsafe(), + rendererOptions..., ), goldmark.WithExtensions( &customRenderer{ @@ -59,7 +126,9 @@ func (f *Formatter) FromMarkdown( parseMention, authorID, statusID, - false, // emojiOnly = false. + // If basic, pass + // emojiOnly = true. + basic, result, }, // Turns URLs into links. @@ -85,8 +154,36 @@ func (f *Formatter) FromMarkdown( // Clean and shrink HTML. result.HTML = byteutil.B2S(htmlBytes.Bytes()) - result.HTML = SanitizeToHTML(result.HTML) + result.HTML = SanitizeHTML(result.HTML) result.HTML = MinifyHTML(result.HTML) return result } + +var parasRegexp = regexp.MustCompile(``) + +// unwrapParagraph removes opening and closing paragraph tags +// of input HTML, if input html is a single paragraph only. +func unwrapParagraph(html string) string { + if !strings.HasPrefix(html, "

") { + return html + } + + if !strings.HasSuffix(html, "

") { + return html + } + + // Make a substring excluding the + // opening and closing paragraph tags. + sub := html[3 : len(html)-4] + + // If there are still other paragraph tags left + // inside the substring, return html unchanged. + containsOtherParas := parasRegexp.MatchString(sub) + if containsOtherParas { + return html + } + + // Return the substring. + return sub +} diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 153673415..0aed299ae 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -41,43 +41,45 @@ that was some JSON :) ` const ( - simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." - simpleMarkdownExpected = "

Title

Here's a simple text in markdown.

Here's a link.

" - withCodeBlockExpected = "

Title

Below is some JSON.

{\n  "key": "value",\n  "another_key": [\n    "value1",\n    "value2"\n  ]\n}\n

that was some JSON :)

" - withInlineCode = "`Nobody tells you about the SECRET CODE, do they?`" - withInlineCodeExpected = "

Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?

" - withInlineCode2 = "`Nobody tells you about the SECRET CODE, do they?`" - withInlineCode2Expected = "

Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?

" - withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" - withHashtagExpected = "

Title

here's a simple status that uses hashtag #Hashtag!

" - withTamilHashtag = "here's a simple status that uses a hashtag in Tamil #เฎคเฎฎเฎฟเฎดเฏ" - withTamilHashtagExpected = "

here's a simple status that uses a hashtag in Tamil #เฎคเฎฎเฎฟเฎดเฏ

" - mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a link.\n\nHere's an image: \"The" - mdWithHTMLExpected = "

Title

Here's a simple text in markdown.

Here's a link.

Here's an image:

" - mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: " - mdWithCheekyHTMLExpected = "

Title

Here's a simple text in markdown.

Here's a cheeky little script:

" - mdWithHashtagInitial = "#welcome #Hashtag" - mdWithHashtagInitialExpected = "

#welcome #Hashtag

" - mdCodeBlockWithNewlines = "some code coming up\n\n```\n\n\n\n```\nthat was some code" - mdCodeBlockWithNewlinesExpected = "

some code coming up

\n\n\n

that was some code

" - mdWithFootnote = "fox mulder,fbi.[^1]\n\n[^1]: federated bureau of investigation" - mdWithFootnoteExpected = "

fox mulder,fbi.[^1]

[^1]: federated bureau of investigation

" - mdWithBlockQuote = "get ready, there's a block quote coming:\n\n>line1\n>line2\n>\n>line3\n\n" - mdWithBlockQuoteExpected = "

get ready, there's a block quote coming:

line1
line2

line3

" - mdHashtagAndCodeBlock = "#Hashtag\n\n```\n#Hashtag\n```" - mdHashtagAndCodeBlockExpected = "

#Hashtag

#Hashtag\n
" - mdMentionAndCodeBlock = "@the_mighty_zork\n\n```\n@the_mighty_zork\n```" - mdMentionAndCodeBlockExpected = "

@the_mighty_zork

@the_mighty_zork\n
" - mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping" - mdWithSmartypantsExpected = "

\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping

" - mdWithAsciiHeart = "hello <3 old friend <3 i loved u hello <3 old friend <3 i loved u </3 :(( you stole my heart

" - mdWithStrikethrough = "I have ~~mdae~~ made an error" - mdWithStrikethroughExpected = "

I have mdae made an error

" - mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial" - mdWithLinkExpected = "

Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial

" - mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" - mdObjectInCodeBlockExpected = "

@foss_satan this is how to mention a user

@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n

hope that helps

" + simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." + simpleMarkdownExpected = "

Title

Here's a simple text in markdown.

Here's a link.

" + withCodeBlockExpected = "

Title

Below is some JSON.

{\n  "key": "value",\n  "another_key": [\n    "value1",\n    "value2"\n  ]\n}\n

that was some JSON :)

" + withInlineCode = "`Nobody tells you about the SECRET CODE, do they?`" + withInlineCodeExpected = "

Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?

" + withInlineCode2 = "`Nobody tells you about the
SECRET CODE, do they?`" + withInlineCode2Expected = "

Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?

" + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "

Title

here's a simple status that uses hashtag #Hashtag!

" + withTamilHashtag = "here's a simple status that uses a hashtag in Tamil #เฎคเฎฎเฎฟเฎดเฏ" + withTamilHashtagExpected = "

here's a simple status that uses a hashtag in Tamil #เฎคเฎฎเฎฟเฎดเฏ

" + mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a link.\n\nHere's an image: \"The" + mdWithHTMLExpected = "

Title

Here's a simple text in markdown.

Here's a link.

Here's an image:

" + mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: " + mdWithCheekyHTMLExpected = "

Title

Here's a simple text in markdown.

Here's a cheeky little script:

" + mdWithHashtagInitial = "#welcome #Hashtag" + mdWithHashtagInitialExpected = "

#welcome #Hashtag

" + mdCodeBlockWithNewlines = "some code coming up\n\n```\n\n\n\n```\nthat was some code" + mdCodeBlockWithNewlinesExpected = "

some code coming up

\n\n\n

that was some code

" + mdWithFootnote = "fox mulder,fbi.[^1]\n\n[^1]: federated bureau of investigation" + mdWithFootnoteExpected = "

fox mulder,fbi.[^1]

[^1]: federated bureau of investigation

" + mdWithBlockQuote = "get ready, there's a block quote coming:\n\n>line1\n>line2\n>\n>line3\n\n" + mdWithBlockQuoteExpected = "

get ready, there's a block quote coming:

line1
line2

line3

" + mdHashtagAndCodeBlock = "#Hashtag\n\n```\n#Hashtag\n```" + mdHashtagAndCodeBlockExpected = "

#Hashtag

#Hashtag\n
" + mdMentionAndCodeBlock = "@the_mighty_zork\n\n```\n@the_mighty_zork\n```" + mdMentionAndCodeBlockExpected = "

@the_mighty_zork

@the_mighty_zork\n
" + mdMentionAndCodeBlockBasicExpected = "

@the_mighty_zork

@the_mighty_zork\n
" + mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping" + mdWithSmartypantsExpected = "

\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping

" + mdWithAsciiHeart = "hello <3 old friend <3 i loved u hello <3 old friend <3 i loved u </3 :(( you stole my heart

" + mdWithStrikethrough = "I have ~~mdae~~ made an error" + mdWithStrikethroughExpected = "

I have mdae made an error

" + mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial" + mdWithLinkExpected = "

Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial

" + mdWithLinkBasicExpected = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial" + mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" + mdObjectInCodeBlockExpected = "

@foss_satan this is how to mention a user

@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n

hope that helps

" // Hashtags can be italicized but only with *, not _. mdItalicHashtag = "*#hashtag*" mdItalicHashtagExpected = "

#hashtag

" @@ -169,6 +171,11 @@ func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() { suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML) } +func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlockBasic() { + formatted := suite.FromMarkdownBasic(mdMentionAndCodeBlock) + suite.Equal(mdMentionAndCodeBlockBasicExpected, formatted.HTML) +} + func (suite *MarkdownTestSuite) TestParseSmartypants() { formatted := suite.FromMarkdown(mdWithSmartypants) suite.Equal(mdWithSmartypantsExpected, formatted.HTML) @@ -189,6 +196,11 @@ func (suite *MarkdownTestSuite) TestParseLink() { suite.Equal(mdWithLinkExpected, formatted.HTML) } +func (suite *MarkdownTestSuite) TestParseLinkBasic() { + formatted := suite.FromMarkdownBasic(mdWithLink) + suite.Equal(mdWithLinkBasicExpected, formatted.HTML) +} + func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() { formatted := suite.FromMarkdown(mdObjectInCodeBlock) suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML) diff --git a/internal/text/plain.go b/internal/text/plain.go index 362941773..246d0001c 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -20,8 +20,10 @@ package text import ( "bytes" "context" + "strings" "codeberg.org/gruf/go-byteutil" + "github.com/k3a/html2text" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/regexes" @@ -52,7 +54,7 @@ func (f *Formatter) FromPlain( return f.fromPlain( ctx, plainTextParser, - false, // emojiOnly = false + false, // basic = false parseMention, authorID, statusID, @@ -85,7 +87,7 @@ func (f *Formatter) FromPlainNoParagraph( return f.fromPlain( ctx, plainTextParser, - false, // emojiOnly = false + false, // basic = false parseMention, authorID, statusID, @@ -93,12 +95,14 @@ func (f *Formatter) FromPlainNoParagraph( ) } -// FromPlainEmojiOnly fulfils FormatFunc by parsing +// FromPlainBasic fulfils FormatFunc by parsing // the given plaintext input into a FormatResult. // // Unlike FromPlain, it will only parse emojis with // the custom renderer, leaving aside mentions and tags. -func (f *Formatter) FromPlainEmojiOnly( +// +// Resulting HTML will also NOT be wrapped in

tags. +func (f *Formatter) FromPlainBasic( ctx context.Context, parseMention gtsmodel.ParseMentionFunc, authorID string, @@ -116,7 +120,7 @@ func (f *Formatter) FromPlainEmojiOnly( return f.fromPlain( ctx, plainTextParser, - true, // emojiOnly = true + true, // basic = true parseMention, authorID, statusID, @@ -130,7 +134,7 @@ func (f *Formatter) FromPlainEmojiOnly( func (f *Formatter) fromPlain( ctx context.Context, plainTextParser parser.Parser, - emojiOnly bool, + basic bool, parseMention gtsmodel.ParseMentionFunc, authorID string, statusID string, @@ -156,7 +160,9 @@ func (f *Formatter) fromPlain( parseMention, authorID, statusID, - emojiOnly, + // If basic, pass + // emojiOnly = true. + basic, result, }, // Turns URLs into links. @@ -181,8 +187,20 @@ func (f *Formatter) fromPlain( // Clean and shrink HTML. result.HTML = byteutil.B2S(htmlBytes.Bytes()) - result.HTML = SanitizeToHTML(result.HTML) + result.HTML = SanitizeHTML(result.HTML) result.HTML = MinifyHTML(result.HTML) return result } + +// HTMLToPlain parses the given HTML and then outputs +// it to close-as-possible equivalent plaintext. +func HTMLToPlain(html string) string { + plain := html2text.HTML2TextWithOptions( + html, + html2text.WithLinksInnerText(), + html2text.WithUnixLineBreaks(), + html2text.WithListSupport(), + ) + return strings.TrimSpace(plain) +} diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index ffa64ce44..594c5e13e 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/text" ) const ( @@ -28,8 +29,8 @@ const ( simpleExpected = "

this is a plain and simple status

" simpleExpectedNoParagraph = "this is a plain and simple status" withTag = "here's a simple status that uses hashtag #welcome!" - withTagExpected = "

here's a simple status that uses hashtag #welcome!

" - withTagExpectedNoParagraph = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "

here's a simple status that uses hashtag #welcome!

" + withTagExpectedNoParagraph = "here's a simple status that uses hashtag #welcome!" withHTML = "
blah this should just be html escaped blah
" withHTMLExpected = "

<div>blah this should just be html escaped blah</div>

" moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:" @@ -183,6 +184,73 @@ func (suite *PlainTestSuite) TestNumbersAreNotHashtags() { suite.Len(f.Tags, 0) } +func (suite *PlainTestSuite) TestHTMLToPlain() { + for _, t := range []struct { + html string + expectedPlain string + }{ + { + // Check newlines between paras preserved. + html: "

butting into a serious discussion about programming languages*: \"elixir? I barely know 'er! honk honk!\"

*insofar as any discussion about programming languages can truly be considered \"serious\" since programmers are fucking clowns

", + expectedPlain: `butting into a serious discussion about programming languages*: "elixir? I barely know 'er! honk honk!" + +*insofar as any discussion about programming languages can truly be considered "serious" since programmers are fucking clowns`, + }, + { + // This one looks a bit wacky but nobody should + // be putting definition lists in summaries *really*. + html: "
Published
Replies
0
Favourites
4
Reblogs
0
Language
Englishen
", + expectedPlain: `PublishedJan 16, 2025, 00:49Replies0Favourites4Reblogs0LanguageEnglishen`, + }, + { + // Check
converted to newlines and leading / trailing space removed. + html: "

i'm a milf,
i'm a lover,
do your mom,
do your brother

i'm a sinner,
i'm a saint,
i will not be ashamed!



", + expectedPlain: `i'm a milf, +i'm a lover, +do your mom, +do your brother + +i'm a sinner, +i'm a saint, +i will not be ashamed!`, + }, + { + // Check newlines, links, lists still more or less readable as such. + html: "

Hello everyone, after a week or two down the release candidate mines, we've emerged blinking into the light carrying with us #GoToSocial v0.18.0 Scroingly Sloth!

https://github.com/superseriousbusiness/gotosocial/releases/tag/v0.18.0

Please read the migration notes carefully for instructions on how to upgrade to this version. This version contains several very long migrations so you will need to be patient when upgrading, and backup your database first!!

Release highlights

  • Status edit support: one of our most-requested features! You can now edit your own statuses, and see instance edit history from other accounts too (if your instance has them stored).
  • Push notifications: probably the second most-requested feature! GoToSocial can now send push notifications to clients via their configured push providers.
    You may need to uninstall / reinstall client applications, or log out and back in again, for this feature to work. (And if you're using Tusky, make sure you've got ntfy installed).
  • Global instance css customization: admins can now apply custom CSS across their entire instance via the settings panel.
  • Domain permission subscriptions: it's now possible to configure your instance to subscribe to CSV, JSON, or plaintext lists of domain permissions.
    Each night, your instance will fetch and automatically create domain permissions (or permission drafts) based on what it finds in a subscribed list.
    See the domain permission subscription documentation for more information.
  • Trusted-proxies helper: instances with improperly configured trusted-proxies settings will now show a warning on the homepage, so admins can make sure their instance is configured correctly. Check your own instance homepage after updating to see if you need to do anything.
  • Better outbox sorting: messages from GoToSocial are now delivered more quickly to people you mention, so conversations across instances should feel a bit snappier.
  • Log in button: there's now a login button in the top right of the instance homepage, which leads to a helpful page about clients, with a link to the settings panel. Should make things less confusing for new users!
  • Granular stats controls: with the instance-stats-mode setting, admins can now choose if and how their instance serves stats via the nodeinfo endpoints. Existing behavior from v0.17.0 is the default.
  • Post backdating: via the API you can now backdate posts (if enabled in config.yaml). This is our first step towards making it possible to import your post history from elsewhere into your GoToSocial instance. While there's no way to do this in the settings panel yet, you can already use third-party tools like Slurp to import posts from a Mastodon export (see Slurp).
  • Configurable sign-up limits: you can now configure your sign-up backlog length and sign-up throttling (defaults remain the same).
  • NetBSD and FreeBSD builds: yep!
  • Respect users prefers-color-scheme preference: there's now a light mode default theme to complement our trusty dark mode theme, and the theme will switch based on a visitor's prefers-color-scheme configuration. This applies to all page and profiles, with the exception of some custom themes. Works in the settings panel too!

Thanks for reading! And seriously back up your database.

", + expectedPlain: `Hello everyone, after a week or two down the release candidate mines, we've emerged blinking into the light carrying with us #GoToSocial v0.18.0 Scroingly Sloth! + +https://github.com/superseriousbusiness/gotosocial/releases/tag/v0.18.0 + +Please read the migration notes carefully for instructions on how to upgrade to this version. This version contains several very long migrations so you will need to be patient when upgrading, and backup your database first!! + +Release highlights + + + - Status edit support: one of our most-requested features! You can now edit your own statuses, and see instance edit history from other accounts too (if your instance has them stored). + - Push notifications: probably the second most-requested feature! GoToSocial can now send push notifications to clients via their configured push providers. +You may need to uninstall / reinstall client applications, or log out and back in again, for this feature to work. (And if you're using Tusky, make sure you've got ntfy installed ). + - Global instance css customization: admins can now apply custom CSS across their entire instance via the settings panel. + - Domain permission subscriptions: it's now possible to configure your instance to subscribe to CSV, JSON, or plaintext lists of domain permissions. +Each night, your instance will fetch and automatically create domain permissions (or permission drafts) based on what it finds in a subscribed list. +See the domain permission subscription documentation for more information. + - Trusted-proxies helper: instances with improperly configured trusted-proxies settings will now show a warning on the homepage, so admins can make sure their instance is configured correctly. Check your own instance homepage after updating to see if you need to do anything. + - Better outbox sorting: messages from GoToSocial are now delivered more quickly to people you mention, so conversations across instances should feel a bit snappier. + - Log in button: there's now a login button in the top right of the instance homepage, which leads to a helpful page about clients, with a link to the settings panel. Should make things less confusing for new users! + - Granular stats controls: with the instance-stats-mode setting, admins can now choose if and how their instance serves stats via the nodeinfo endpoints. Existing behavior from v0.17.0 is the default. + - Post backdating: via the API you can now backdate posts (if enabled in config.yaml). This is our first step towards making it possible to import your post history from elsewhere into your GoToSocial instance. While there's no way to do this in the settings panel yet, you can already use third-party tools like Slurp to import posts from a Mastodon export (see Slurp ). + - Configurable sign-up limits: you can now configure your sign-up backlog length and sign-up throttling (defaults remain the same). + - NetBSD and FreeBSD builds: yep! + - Respect users prefers-color-scheme preference: there's now a light mode default theme to complement our trusty dark mode theme, and the theme will switch based on a visitor's prefers-color-scheme configuration. This applies to all page and profiles, with the exception of some custom themes. Works in the settings panel too! + + +Thanks for reading! And seriously back up your database.`, + }, + } { + plain := text.HTMLToPlain(t.html) + suite.Equal(t.expectedPlain, plain) + } +} + func TestPlainTestSuite(t *testing.T) { suite.Run(t, new(PlainTestSuite)) } diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go deleted file mode 100644 index 43a3504b3..000000000 --- a/internal/text/removehtml_test.go +++ /dev/null @@ -1,56 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package text - -import ( - "testing" - - "github.com/stretchr/testify/suite" -) - -const ( - test_removeHTML = `

Another test @foss_satan

#Hashtag

Text

` - test_removedHTML = `Another test @foss_satan#HashtagText` - test_withEscapedLiteral = `it\u0026amp;#39;s its it is` - test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` - test_withEscaped = "it\u0026amp;#39;s its it is" - test_withEscapedExpected = "it&#39;s its it is" -) - -type RemoveHTMLTestSuite struct { - suite.Suite -} - -func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() { - s := removeHTML(test_withEscapedLiteral) - suite.Equal(test_withEscapedLiteralExpected, s) -} - -func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() { - s := removeHTML(test_withEscaped) - suite.Equal(test_withEscapedExpected, s) -} - -func (suite *RemoveHTMLTestSuite) TestRemoveHTML() { - s := removeHTML(test_removeHTML) - suite.Equal(test_removedHTML, s) -} - -func TestRemoveHTMLTestSuite(t *testing.T) { - suite.Run(t, &RemoveHTMLTestSuite{}) -} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 81c436264..b532bec9a 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -163,27 +163,21 @@ var regular *bluemonday.Policy = func() *bluemonday.Policy { // Source: https://github.com/microcosm-cc/bluemonday#usage var strict *bluemonday.Policy = bluemonday.StrictPolicy() -// removeHTML strictly removes *all* recognized -// HTML elements from the given string. -func removeHTML(in string) string { - return strict.Sanitize(in) -} - -// SanitizeToHTML sanitizes only risky html elements +// SanitizeHTML sanitizes only risky html elements // from the given string, allowing safe ones through. -func SanitizeToHTML(in string) string { +func SanitizeHTML(in string) string { return regular.Sanitize(in) } -// SanitizeToPlaintext runs text through basic sanitization. -// This removes any html elements that were in the string, -// and returns clean plaintext. -func SanitizeToPlaintext(in string) string { +// RemoveHTML runs text through strict sanitization. +// This removes any html elements that were in the +// string, and returns pruned plaintext. +func RemoveHTML(in string) string { // Unescape first to catch any tricky critters. content := html.UnescapeString(in) // Remove all detected HTML. - content = removeHTML(content) + content = strict.Sanitize(content) // Unescape again to return plaintext. content = html.UnescapeString(content) diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go index ae49c942c..68f9fefce 100644 --- a/internal/text/sanitize_test.go +++ b/internal/text/sanitize_test.go @@ -36,30 +36,30 @@ type SanitizeTestSuite struct { } func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { - s := text.SanitizeToHTML(sanitizeOutgoing) + s := text.SanitizeHTML(sanitizeOutgoing) suite.Equal(sanitizedOutgoing, s) } func (suite *SanitizeTestSuite) TestSanitizeHTML() { - s := text.SanitizeToHTML(sanitizeHTML) + s := text.SanitizeHTML(sanitizeHTML) suite.Equal(sanitizedHTML, s) } func (suite *SanitizeTestSuite) TestSanitizeCaption1() { dodgyCaption := "this is just a normal caption ;)" - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("this is just a normal caption ;)", sanitized) } func (suite *SanitizeTestSuite) TestSanitizeCaption2() { dodgyCaption := "here's a LOUD caption" - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("here's a LOUD caption", sanitized) } func (suite *SanitizeTestSuite) TestSanitizeCaption3() { dodgyCaption := "" - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("", sanitized) } @@ -75,21 +75,21 @@ with some newlines ` - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) } func (suite *SanitizeTestSuite) TestSanitizeCaption5() { // html-escaped: " hello world" dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("hello world", sanitized) } func (suite *SanitizeTestSuite) TestSanitizeCaption6() { // html-encoded: " hello world" dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeToPlaintext(dodgyCaption) + sanitized := text.RemoveHTML(dodgyCaption) suite.Equal("hello world", sanitized) } @@ -104,27 +104,27 @@ func (suite *SanitizeTestSuite) TestSanitizeCustomCSS() { overflow: hidden; text-overflow: ellipsis; }` - sanitized := text.SanitizeToPlaintext(customCSS) + sanitized := text.RemoveHTML(customCSS) suite.Equal(customCSS, sanitized) // should be the same as it was before } func (suite *SanitizeTestSuite) TestSanitizeNaughtyCustomCSS1() { // try to break out of pee pee poo poo