[feature] Add opt-in RSS feed for account's latest Public posts (#897)

* start adding rss functionality

* add gorilla/feeds dependency

* first bash at building rss feed
still needs work, this is an interim commit

* tidy up a bit

* add publicOnly option to GetAccountLastPosted

* implement rss endpoint

* fix test

* add initial user docs for rss

* update rss logo

* docs update

* add rssFeed to frontend

* feed -> feed.rss

* enableRSS

* increase rss logo size a lil bit

* add rss toggle

* move emojify to text package

* fiddle with rss feed formatting

* add Text field to test statuses

* move status to rss item to typeconverter

* update bun schema for enablerss

* simplify 304 checking

* assume account not rss

* update tests

* update swagger docs

* allow more characters in title, trim nicer

* update last posted to be more consistent
This commit is contained in:
tobi 2022-10-08 14:00:39 +02:00 committed by GitHub
commit 80663061d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
58 changed files with 2282 additions and 211 deletions

View file

@ -19,7 +19,9 @@
package web
import (
"fmt"
"net/http"
"path"
"path/filepath"
"strings"
@ -60,9 +62,91 @@ func (m *Module) mountAssetsFilesystem(group *gin.RouterGroup) {
fs := fileSystem{http.Dir(webAssetsAbsFilePath)}
// use the cache middleware on all handlers in this group
group.Use(m.cacheControlMiddleware(fs))
group.Use(m.assetsCacheControlMiddleware(fs))
// serve static file system in the root of this group,
// will end up being something like "/assets/"
group.StaticFS("/", fs)
}
// getAssetFileInfo tries to fetch the ETag for the given filePath from the module's
// assetsETagCache. If it can't be found there, it uses the provided http.FileSystem
// to generate a new ETag to go in the cache, which it then returns.
func (m *Module) getAssetETag(filePath string, fs http.FileSystem) (string, error) {
file, err := fs.Open(filePath)
if err != nil {
return "", fmt.Errorf("error opening %s: %s", filePath, err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
return "", fmt.Errorf("error statting %s: %s", filePath, err)
}
fileLastModified := fileInfo.ModTime()
if cachedETag, ok := m.eTagCache.Get(filePath); ok && !fileLastModified.After(cachedETag.lastModified) {
// only return our cached etag if the file wasn't
// modified since last time, otherwise generate a
// new one; eat fresh!
return cachedETag.eTag, nil
}
eTag, err := generateEtag(file)
if err != nil {
return "", fmt.Errorf("error generating etag: %s", err)
}
// put new entry in cache before we return
m.eTagCache.Set(filePath, eTagCacheEntry{
eTag: eTag,
lastModified: fileLastModified,
})
return eTag, nil
}
// assetsCacheControlMiddleware implements Cache-Control header setting, and checks
// for files inside the given http.FileSystem.
//
// The middleware checks if the file has been modified using If-None-Match etag,
// if present. If the file hasn't been modified, the middleware returns 304.
//
// See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
// and: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
func (m *Module) assetsCacheControlMiddleware(fs http.FileSystem) gin.HandlerFunc {
return func(c *gin.Context) {
// set this Cache-Control header to instruct clients to validate the response with us
// before each reuse (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control)
c.Header(cacheControlHeader, cacheControlNoCache)
ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
// derive the path of the requested asset inside the provided filesystem
upath := c.Request.URL.Path
if !strings.HasPrefix(upath, "/") {
upath = "/" + upath
}
assetFilePath := strings.TrimPrefix(path.Clean(upath), assetsPathPrefix)
// either fetch etag from ttlcache or generate it
eTag, err := m.getAssetETag(assetFilePath, fs)
if err != nil {
log.Errorf("error getting ETag for %s: %s", assetFilePath, err)
return
}
// Regardless of what happens further down, set the etag header
// so that the client has the up-to-date version.
c.Header(eTagHeader, eTag)
// If client already has latest version of the asset, 304 + bail.
if ifNoneMatch == eTag {
c.AbortWithStatus(http.StatusNotModified)
return
}
// else let the rest of the request be processed normally
}
}

View file

@ -1,138 +0,0 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package web
import (
// nolint:gosec
"crypto/sha1"
"encoding/hex"
"fmt"
"io"
"net/http"
"path"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
type eTagCacheEntry struct {
eTag string
fileLastModified time.Time
}
// generateEtag generates a strong (byte-for-byte) etag using
// the entirety of the provided reader.
func generateEtag(r io.Reader) (string, error) {
// nolint:gosec
hash := sha1.New()
if _, err := io.Copy(hash, r); err != nil {
return "", err
}
b := make([]byte, 0, sha1.Size)
b = hash.Sum(b)
return `"` + hex.EncodeToString(b) + `"`, nil
}
// getAssetFileInfo tries to fetch the ETag for the given filePath from the module's
// assetsETagCache. If it can't be found there, it uses the provided http.FileSystem
// to generate a new ETag to go in the cache, which it then returns.
func (m *Module) getAssetETag(filePath string, fs http.FileSystem) (string, error) {
file, err := fs.Open(filePath)
if err != nil {
return "", fmt.Errorf("error opening %s: %s", filePath, err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
return "", fmt.Errorf("error statting %s: %s", filePath, err)
}
fileLastModified := fileInfo.ModTime()
if cachedETag, ok := m.assetsETagCache.Get(filePath); ok && !fileLastModified.After(cachedETag.fileLastModified) {
// only return our cached etag if the file wasn't
// modified since last time, otherwise generate a
// new one; eat fresh!
return cachedETag.eTag, nil
}
eTag, err := generateEtag(file)
if err != nil {
return "", fmt.Errorf("error generating etag: %s", err)
}
// put new entry in cache before we return
m.assetsETagCache.Set(filePath, eTagCacheEntry{
eTag: eTag,
fileLastModified: fileLastModified,
})
return eTag, nil
}
// cacheControlMiddleware implements Cache-Control header setting, and checks for
// files inside the given http.FileSystem.
//
// The middleware checks if the file has been modified using If-None-Match etag,
// if present. If the file hasn't been modified, the middleware returns 304.
//
// See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
// and: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
func (m *Module) cacheControlMiddleware(fs http.FileSystem) gin.HandlerFunc {
return func(c *gin.Context) {
// no-cache prevents clients using default caching or heuristic caching,
// and also ensures that clients will validate their cached version against
// the version stored on the server to keep up to date.
c.Header("Cache-Control", "no-cache")
ifNoneMatch := c.Request.Header.Get("If-None-Match")
// derive the path of the requested asset inside the provided filesystem
upath := c.Request.URL.Path
if !strings.HasPrefix(upath, "/") {
upath = "/" + upath
}
assetFilePath := strings.TrimPrefix(path.Clean(upath), assetsPathPrefix)
// either fetch etag from ttlcache or generate it
eTag, err := m.getAssetETag(assetFilePath, fs)
if err != nil {
log.Errorf("error getting ETag for %s: %s", assetFilePath, err)
return
}
// Regardless of what happens further down, set the etag header
// so that the client has the up-to-date version.
c.Header("Etag", eTag)
// If client already has latest version of the asset, 304 + bail.
if ifNoneMatch == eTag {
c.AbortWithStatus(http.StatusNotModified)
return
}
// else let the rest of the request be processed normally
}
}

View file

@ -29,6 +29,8 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
)
const textCSSUTF8 = string(api.TextCSS + "; charset=utf-8")
func (m *Module) customCSSGETHandler(c *gin.Context) {
if !config.GetAccountsAllowCustomCSS() {
err := errors.New("accounts-allow-custom-css is not enabled on this instance")
@ -55,6 +57,6 @@ func (m *Module) customCSSGETHandler(c *gin.Context) {
return
}
c.Header("Cache-Control", "no-cache")
c.Data(http.StatusOK, "text/css; charset=utf-8", []byte(customCSS))
c.Header(cacheControlHeader, cacheControlNoCache)
c.Data(http.StatusOK, textCSSUTF8, []byte(customCSS))
}

61
internal/web/etag.go Normal file
View file

@ -0,0 +1,61 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package web
import (
// nolint:gosec
"crypto/sha1"
"encoding/hex"
"io"
"time"
"github.com/superseriousbusiness/gotosocial/internal/log"
"codeberg.org/gruf/go-cache/v2"
)
func newETagCache() cache.Cache[string, eTagCacheEntry] {
eTagCache := cache.New[string, eTagCacheEntry]()
eTagCache.SetTTL(time.Hour, false)
if !eTagCache.Start(time.Minute) {
log.Panic("could not start eTagCache")
}
return eTagCache
}
type eTagCacheEntry struct {
eTag string
lastModified time.Time
}
// generateEtag generates a strong (byte-for-byte) etag using
// the entirety of the provided reader.
func generateEtag(r io.Reader) (string, error) {
// nolint:gosec
hash := sha1.New()
if _, err := io.Copy(hash, r); err != nil {
return "", err
}
b := make([]byte, 0, sha1.Size)
b = hash.Sum(b)
return `"` + hex.EncodeToString(b) + `"`, nil
}

View file

@ -82,6 +82,11 @@ func (m *Module) profileGETHandler(c *gin.Context) {
return
}
var rssFeed string
if account.EnableRSS {
rssFeed = "/@" + account.Username + "/feed.rss"
}
// only allow search engines / robots to view this page if account is discoverable
var robotsMeta string
if account.Discoverable {
@ -118,6 +123,7 @@ func (m *Module) profileGETHandler(c *gin.Context) {
"instance": instance,
"account": account,
"ogMeta": ogBase(instance).withAccount(account),
"rssFeed": rssFeed,
"robotsMeta": robotsMeta,
"statuses": statusResp.Items,
"statuses_next": statusResp.NextLink,

154
internal/web/rss.go Normal file
View file

@ -0,0 +1,154 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package web
import (
"bytes"
"errors"
"fmt"
"net/http"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/superseriousbusiness/gotosocial/internal/api"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/log"
)
const appRSSUTF8 = string(api.AppRSSXML + "; charset=utf-8")
func (m *Module) GetRSSETag(urlPath string, lastModified time.Time, getRSSFeed func() (string, gtserror.WithCode)) (string, error) {
if cachedETag, ok := m.eTagCache.Get(urlPath); ok && !lastModified.After(cachedETag.lastModified) {
// only return our cached etag if the file wasn't
// modified since last time, otherwise generate a
// new one; eat fresh!
return cachedETag.eTag, nil
}
rssFeed, errWithCode := getRSSFeed()
if errWithCode != nil {
return "", fmt.Errorf("error getting rss feed: %s", errWithCode)
}
eTag, err := generateEtag(bytes.NewReader([]byte(rssFeed)))
if err != nil {
return "", fmt.Errorf("error generating etag: %s", err)
}
// put new entry in cache before we return
m.eTagCache.Set(urlPath, eTagCacheEntry{
eTag: eTag,
lastModified: lastModified,
})
return eTag, nil
}
func extractIfModifiedSince(header string) time.Time {
if header == "" {
return time.Time{}
}
t, err := http.ParseTime(header)
if err != nil {
log.Errorf("couldn't parse if-modified-since %s: %s", header, err)
return time.Time{}
}
return t
}
func (m *Module) rssFeedGETHandler(c *gin.Context) {
// set this Cache-Control header to instruct clients to validate the response with us
// before each reuse (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control)
c.Header(cacheControlHeader, cacheControlNoCache)
ctx := c.Request.Context()
if _, err := api.NegotiateAccept(c, api.AppRSSXML); err != nil {
api.ErrorHandler(c, gtserror.NewErrorNotAcceptable(err, err.Error()), m.processor.InstanceGet)
return
}
// usernames on our instance will always be lowercase
username := strings.ToLower(c.Param(usernameKey))
if username == "" {
err := errors.New("no account username specified")
api.ErrorHandler(c, gtserror.NewErrorBadRequest(err, err.Error()), m.processor.InstanceGet)
return
}
ifNoneMatch := c.Request.Header.Get(ifNoneMatchHeader)
ifModifiedSince := extractIfModifiedSince(c.Request.Header.Get(ifModifiedSinceHeader))
getRssFeed, accountLastPostedPublic, errWithCode := m.processor.AccountGetRSSFeedForUsername(ctx, username)
if errWithCode != nil {
api.ErrorHandler(c, errWithCode, m.processor.InstanceGet)
return
}
var rssFeed string
cacheKey := c.Request.URL.Path
cacheEntry, ok := m.eTagCache.Get(cacheKey)
if !ok || cacheEntry.lastModified.Before(accountLastPostedPublic) {
// we either have no cache entry for this, or we have an expired cache entry; generate a new one
rssFeed, errWithCode = getRssFeed()
if errWithCode != nil {
api.ErrorHandler(c, errWithCode, m.processor.InstanceGet)
return
}
eTag, err := generateEtag(bytes.NewBufferString(rssFeed))
if err != nil {
api.ErrorHandler(c, gtserror.NewErrorInternalError(err), m.processor.InstanceGet)
return
}
cacheEntry.lastModified = accountLastPostedPublic
cacheEntry.eTag = eTag
m.eTagCache.Put(cacheKey, cacheEntry)
}
c.Header(eTagHeader, cacheEntry.eTag)
c.Header(lastModifiedHeader, accountLastPostedPublic.Format(http.TimeFormat))
if ifNoneMatch == cacheEntry.eTag {
c.AbortWithStatus(http.StatusNotModified)
return
}
lmUnix := cacheEntry.lastModified.Unix()
imsUnix := ifModifiedSince.Unix()
if lmUnix <= imsUnix {
c.AbortWithStatus(http.StatusNotModified)
return
}
if rssFeed == "" {
// we had a cache entry already so we didn't call to get the rss feed yet
rssFeed, errWithCode = getRssFeed()
if errWithCode != nil {
api.ErrorHandler(c, errWithCode, m.processor.InstanceGet)
return
}
}
c.Data(http.StatusOK, appRSSUTF8, []byte(rssFeed))
}

View file

@ -21,7 +21,6 @@ package web
import (
"errors"
"net/http"
"time"
"codeberg.org/gruf/go-cache/v2"
"github.com/gin-gonic/gin"
@ -36,6 +35,7 @@ const (
confirmEmailPath = "/" + uris.ConfirmEmailPath
profilePath = "/@:" + usernameKey
customCSSPath = profilePath + "/custom.css"
rssFeedPath = profilePath + "/feed.rss"
statusPath = profilePath + "/statuses/:" + statusIDKey
assetsPathPrefix = "/assets"
userPanelPath = "/settings/user"
@ -44,23 +44,26 @@ const (
tokenParam = "token"
usernameKey = "username"
statusIDKey = "status"
cacheControlHeader = "Cache-Control" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
cacheControlNoCache = "no-cache" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
ifModifiedSinceHeader = "If-Modified-Since" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
ifNoneMatchHeader = "If-None-Match" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
eTagHeader = "ETag" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
lastModifiedHeader = "Last-Modified" // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified
)
// Module implements the api.ClientModule interface for web pages.
type Module struct {
processor processing.Processor
assetsETagCache cache.Cache[string, eTagCacheEntry]
processor processing.Processor
eTagCache cache.Cache[string, eTagCacheEntry]
}
// New returns a new api.ClientModule for web pages.
func New(processor processing.Processor) api.ClientModule {
assetsETagCache := cache.New[string, eTagCacheEntry]()
assetsETagCache.SetTTL(time.Hour, false)
assetsETagCache.Start(time.Minute)
return &Module{
processor: processor,
assetsETagCache: assetsETagCache,
processor: processor,
eTagCache: newETagCache(),
}
}
@ -99,6 +102,8 @@ func (m *Module) Route(s router.Router) error {
// serve custom css at /@username/custom.css
s.AttachHandler(http.MethodGet, customCSSPath, m.customCSSGETHandler)
s.AttachHandler(http.MethodGet, rssFeedPath, m.rssFeedGETHandler)
// serve statuses
s.AttachHandler(http.MethodGet, statusPath, m.threadGETHandler)