[chore] remove nollamas middleware for now (after discussions with a security advisor) (#4433)

i'll keep this on a separate branch for now while i experiment with other possible alternatives, but for now both our hacky implementation especially, and more popular ones (like anubis) aren't looking too great on the deterrent front: https://github.com/eternal-flame-AD/pow-buster

Co-authored-by: tobi <tobi.smethurst@protonmail.com>
Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4433
Co-authored-by: kim <grufwub@gmail.com>
Co-committed-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2025-09-17 14:16:53 +02:00 committed by kim
commit 6801ce299a
28 changed files with 207 additions and 1395 deletions

View file

@ -279,13 +279,12 @@ type CacheConfiguration struct {
}
type AdvancedConfig struct {
CookiesSamesite string `name:"cookies-samesite" usage:"'strict' or 'lax', see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite"`
SenderMultiplier int `name:"sender-multiplier" usage:"Multiplier to use per cpu for batching outgoing fedi messages. 0 or less turns batching off (not recommended)."`
CSPExtraURIs []string `name:"csp-extra-uris" usage:"Additional URIs to allow when building content-security-policy for media + images."`
HeaderFilterMode string `name:"header-filter-mode" usage:"Set incoming request header filtering mode."`
RateLimit RateLimitConfig `name:"rate-limit"`
Throttling ThrottlingConfig `name:"throttling"`
ScraperDeterrence ScraperDeterrenceConfig `name:"scraper-deterrence"`
CookiesSamesite string `name:"cookies-samesite" usage:"'strict' or 'lax', see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite"`
SenderMultiplier int `name:"sender-multiplier" usage:"Multiplier to use per cpu for batching outgoing fedi messages. 0 or less turns batching off (not recommended)."`
CSPExtraURIs []string `name:"csp-extra-uris" usage:"Additional URIs to allow when building content-security-policy for media + images."`
HeaderFilterMode string `name:"header-filter-mode" usage:"Set incoming request header filtering mode."`
RateLimit RateLimitConfig `name:"rate-limit"`
Throttling ThrottlingConfig `name:"throttling"`
}
type RateLimitConfig struct {
@ -297,8 +296,3 @@ type ThrottlingConfig struct {
Multiplier int `name:"multiplier" usage:"Multiplier to use per cpu for http request throttling. 0 or less turns throttling off."`
RetryAfter time.Duration `name:"retry-after" usage:"Retry-After duration response to send for throttled requests."`
}
type ScraperDeterrenceConfig struct {
Enabled bool `name:"enabled" usage:"Enable proof-of-work based scraper deterrence on profile / status pages"`
Difficulty uint32 `name:"difficulty" usage:"The proof-of-work difficulty, which determines roughly how many hash-encode rounds required of each client."`
}

View file

@ -175,7 +175,6 @@ func TestCLIParsing(t *testing.T) {
"--config-path", "testdata/test3.yaml",
},
expected: expectedKV(
kv.Field{"advanced-scraper-deterrence-enabled", true},
kv.Field{"advanced-rate-limit-requests", 5000},
),
},

View file

@ -154,11 +154,6 @@ var Defaults = Configuration{
Multiplier: 8, // 8 open requests per CPU
RetryAfter: 30 * time.Second,
},
ScraperDeterrence: ScraperDeterrenceConfig{
Enabled: false,
Difficulty: 100000,
},
},
Cache: CacheConfiguration{

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,3 @@
advanced:
scraper-deterrence:
enabled: true
rate-limit:
requests: 5000

View file

@ -1,385 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package middleware
import (
"context"
"crypto/rand"
"crypto/sha256"
"crypto/subtle"
"encoding/hex"
"hash"
"io"
"net/http"
"strconv"
"time"
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/log"
"code.superseriousbusiness.org/gotosocial/internal/oauth"
"codeberg.org/gruf/go-bitutil"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
)
// NoLLaMas returns a piece of HTTP middleware that provides a deterrence
// on routes it is applied to, against bots and scrapers. It generates a
// unique but deterministic challenge for each HTTP client within an hour
// TTL that requires a proof-of-work solution to pass onto the next handler.
// On successful solution, the client is provided a cookie that allows them
// to bypass this check within that hour TTL. The outcome of this is that it
// should make scraping of these endpoints economically unfeasible, when enabled,
// and with an absurdly minimal performance impact. The downside is that it
// requires javascript to be enabled on the client to pass the middleware check.
//
// Heavily inspired by: https://github.com/TecharoHQ/anubis
func NoLLaMas(
cookiePolicy apiutil.CookiePolicy,
getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode),
) gin.HandlerFunc {
if !config.GetAdvancedScraperDeterrenceEnabled() {
// NoLLaMas middleware disabled.
return func(*gin.Context) {}
}
var seed [32]byte
// Read random data for the token seed.
_, err := io.ReadFull(rand.Reader, seed[:])
if err != nil {
panic(err)
}
// Configure nollamas.
var nollamas nollamas
nollamas.entropy = seed
nollamas.ttl = time.Hour
nollamas.rounds = config.GetAdvancedScraperDeterrenceDifficulty()
nollamas.getInstanceV1 = getInstanceV1
nollamas.policy = cookiePolicy
return nollamas.Serve
}
// i.e. hash slice length.
const hashLen = sha256.Size
// i.e. hex.EncodedLen(hashLen).
const encodedHashLen = 2 * hashLen
// hashWithBufs encompasses a hash along
// with the necessary buffers to generate
// a hashsum and then encode that sum.
type hashWithBufs struct {
hash hash.Hash
hbuf [hashLen]byte
ebuf [encodedHashLen]byte
}
// write is a passthrough to hash.Hash{}.Write().
func (h *hashWithBufs) write(b []byte) {
_, _ = h.hash.Write(b)
}
// writeString is a passthrough to hash.Hash{}.Write([]byte(s)).
func (h *hashWithBufs) writeString(s string) {
_, _ = h.hash.Write(byteutil.S2B(s))
}
// EncodedSum returns the hex encoded sum of hash.Sum().
func (h *hashWithBufs) EncodedSum() string {
_ = h.hash.Sum(h.hbuf[:0])
hex.Encode(h.ebuf[:], h.hbuf[:])
return string(h.ebuf[:])
}
// Reset will reset hash and buffers.
func (h *hashWithBufs) Reset() {
h.ebuf = [encodedHashLen]byte{}
h.hbuf = [hashLen]byte{}
h.hash.Reset()
}
type nollamas struct {
// our instance cookie policy.
policy apiutil.CookiePolicy
// unique entropy
// to prevent hashes
// being guessable
entropy [32]byte
// success cookie TTL
ttl time.Duration
// rounds determines roughly how
// many hash-encode rounds each
// client is required to complete.
rounds uint32
// extra fields required for
// our template rendering.
getInstanceV1 func(ctx context.Context) (*apimodel.InstanceV1, gtserror.WithCode)
}
func (m *nollamas) Serve(c *gin.Context) {
if c.Request.Method != http.MethodGet {
// Only interested in protecting
// crawlable 'GET' endpoints.
c.Next()
return
}
// Extract request context.
ctx := c.Request.Context()
if ctx.Value(oauth.SessionAuthorizedToken) != nil {
// Don't guard against requests
// providing valid OAuth tokens.
c.Next()
return
}
if gtscontext.HTTPSignature(ctx) != "" {
// Don't guard against requests
// providing HTTP signatures.
c.Next()
return
}
// Prepare new hash with buffers.
hash := hashWithBufs{hash: sha256.New()}
// Extract client fingerprint data.
userAgent := c.GetHeader("User-Agent")
clientIP := c.ClientIP()
// Generate a unique token for this request,
// only valid for a period of now +- m.ttl.
token := m.getToken(&hash, userAgent, clientIP)
// Check for a provided success token.
cookie, _ := c.Cookie("gts-nollamas")
// Check whether passed cookie
// is the expected success token.
if subtle.ConstantTimeCompare(
byteutil.S2B(cookie),
byteutil.S2B(token),
) == 1 {
// They passed us a valid, expected
// token. They already passed checks.
c.Next()
return
}
// From here-on out, all
// possibilities are handled
// by us. Prevent further http
// handlers from being called.
c.Abort()
// Generate challenge for this unique (yet deterministic) token,
// returning seed, wanted 'challenge' result and expected solution.
seed, challenge, solution := m.getChallenge(&hash, token)
// Prepare new log entry.
l := log.WithContext(ctx).
WithField("userAgent", userAgent).
WithField("seed", seed).
WithField("rounds", solution)
// Extract and parse query.
query := c.Request.URL.Query()
// Check query to see if an in-progress
// challenge solution has been provided.
nonce := query.Get("nollamas_solution")
if nonce == "" {
// No solution given, likely new client!
// Simply present them with challenge.
m.renderChallenge(c, seed, challenge)
l.Info("posing new challenge")
return
}
// Check nonce matches expected.
if subtle.ConstantTimeCompare(
byteutil.S2B(solution),
byteutil.S2B(nonce),
) != 1 {
// Their nonce failed, re-challenge them.
m.renderChallenge(c, challenge, solution)
l.Infof("invalid solution provided: %s", nonce)
return
}
l.Info("challenge passed")
// Drop solution query and encode.
query.Del("nollamas_solution")
c.Request.URL.RawQuery = query.Encode()
// They passed the challenge! Set success token
// cookie and allow them to continue to next handlers.
m.policy.SetCookie(c, "gts-nollamas", token, int(m.ttl/time.Second), "/")
c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
}
func (m *nollamas) renderChallenge(c *gin.Context, seed, challenge string) {
// Fetch current instance information for templating vars.
instance, errWithCode := m.getInstanceV1(c.Request.Context())
if errWithCode != nil {
apiutil.ErrorHandler(c, errWithCode, m.getInstanceV1)
return
}
// Write templated challenge response to client.
apiutil.TemplateWebPage(c, apiutil.WebPage{
Template: "nollamas.tmpl",
Instance: instance,
Stylesheets: []string{
"/assets/dist/nollamas.css",
// Include fork-awesome stylesheet
// to get nice loading spinner.
"/assets/Fork-Awesome/css/fork-awesome.min.css",
},
Extra: map[string]any{
"seed": seed,
"challenge": challenge,
},
Javascript: []apiutil.JavascriptEntry{
{
Src: "/assets/dist/nollamas.js",
Defer: true,
},
},
})
}
// getToken generates a unique yet deterministic token for given HTTP request
// details, seeded by runtime generated entropy data and ttl rounded timestamp.
func (m *nollamas) getToken(hash *hashWithBufs, userAgent, clientIP string) string {
// Reset before
// using hash.
hash.Reset()
// Use our unique entropy to seed hash,
// to ensure we have cryptographically
// unique, yet deterministic, tokens
// generated for a given http client.
hash.write(m.entropy[:])
// Also seed the generated input with
// current time rounded to TTL, so our
// single comparison handles expiries.
now := time.Now().Round(m.ttl).Unix()
hash.write([]byte{
byte(now >> 56),
byte(now >> 48),
byte(now >> 40),
byte(now >> 32),
byte(now >> 24),
byte(now >> 16),
byte(now >> 8),
byte(now),
})
// Append client request data.
hash.writeString(userAgent)
hash.writeString(clientIP)
// Return hex encoded hash.
return hash.EncodedSum()
}
// getChallenge prepares a new challenge given the deterministic input token for this request.
// it will return an input seed string, a challenge string which is the end result the client
// should be looking for, and the solution for this such that challenge = hex(sha256(seed + solution)).
// the solution will always be a string-encoded 64bit integer calculated from m.rounds + random jitter.
func (m *nollamas) getChallenge(hash *hashWithBufs, token string) (seed, challenge, solution string) {
// For their unique seed string just use a
// single portion of their 'success' token.
// SHA256 is not yet cracked, this is not an
// application of a hash requiring serious
// cryptographic security and it rotates on
// a TTL basis, so it should be fine.
seed = token[:len(token)/4]
// BEFORE resetting the hash, get the last
// two bytes of NON-hex-encoded data from
// token generation to use for random jitter.
// This is taken from the end of the hash as
// this is the "unseen" end part of token.
//
// (if we used hex-encoded data it would
// only ever be '0-9' or 'a-z' ASCII chars).
//
// Security-wise, same applies as-above.
jitter := int16(hash.hbuf[len(hash.hbuf)-2]) |
int16(hash.hbuf[len(hash.hbuf)-1])<<8
var rounds int64
switch {
// For some small percentage of
// clients we purposely low-ball
// their rounds required, to make
// it so gaming it with a starting
// nonce value may suddenly fail.
case jitter%37 == 0:
rounds = int64(m.rounds/10) + int64(jitter/10)
case jitter%31 == 0:
rounds = int64(m.rounds/5) + int64(jitter/5)
case jitter%29 == 0:
rounds = int64(m.rounds/3) + int64(jitter/3)
case jitter%13 == 0:
rounds = int64(m.rounds/2) + int64(jitter/2)
// Determine an appropriate number of hash rounds
// we want the client to perform on input seed. This
// is determined as configured m.rounds +- jitter.
// This will be the 'solution' to create 'challenge'.
default:
rounds = int64(m.rounds) + int64(jitter) //nolint:gosec
}
// Encode (positive) determined hash rounds as string.
solution = strconv.FormatInt(bitutil.Abs64(rounds), 10)
// Reset before
// using hash.
hash.Reset()
// Calculate the expected result
// of hex(sha256(seed + solution)),
// i.e. the proposed 'challenge'.
hash.writeString(seed)
hash.writeString(solution)
challenge = hash.EncodedSum()
return
}

View file

@ -1,178 +0,0 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package middleware_test
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"net/http"
"net/http/httptest"
"slices"
"strconv"
"strings"
"testing"
"code.superseriousbusiness.org/gotosocial/internal/api/model"
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
"code.superseriousbusiness.org/gotosocial/internal/config"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"code.superseriousbusiness.org/gotosocial/internal/middleware"
"code.superseriousbusiness.org/gotosocial/internal/router"
"codeberg.org/gruf/go-byteutil"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
)
func TestNoLLaMasMiddleware(t *testing.T) {
// Gin test engine.
e := gin.New()
// Setup necessary configuration variables.
config.SetAdvancedScraperDeterrenceEnabled(true)
config.SetWebTemplateBaseDir("../../web/template")
// Load templates into engine.
err := router.LoadTemplates(e)
assert.NoError(t, err)
// Add middleware to the gin engine handler stack.
middleware := middleware.NoLLaMas(apiutil.CookiePolicy{}, getInstanceV1)
e.Use(middleware)
// Set test handler we can
// easily check if was used.
e.Handle("GET", "/", testHandler)
// Test with differing user-agents.
for _, userAgent := range []string{
"CURL",
"Mozilla FireSox",
"Google Gnome",
} {
testNoLLaMasMiddleware(t, e, userAgent)
}
}
func testNoLLaMasMiddleware(t *testing.T, e *gin.Engine, userAgent string) {
// Prepare a test request for gin engine.
r := httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw := httptest.NewRecorder()
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res := rw.Result()
// It should have been stopped
// by middleware and NOT used
// the expected test handler.
ok := usedTestHandler(res)
assert.False(t, ok)
// Read entire response body.
b, err := io.ReadAll(res.Body)
if err != nil {
panic(err)
}
var seed string
var challenge string
// Parse output body and find the challenge / difficulty.
for _, line := range strings.Split(string(b), "\n") {
line = strings.TrimSpace(line)
switch {
case strings.HasPrefix(line, "data-nollamas-seed=\""):
line = line[20:]
line = line[:len(line)-1]
seed = line
case strings.HasPrefix(line, "data-nollamas-challenge=\""):
line = line[25:]
line = line[:len(line)-1]
challenge = line
}
}
// Ensure valid posed challenge.
assert.NotEmpty(t, challenge)
assert.NotEmpty(t, seed)
// Prepare a test request for gin engine.
r = httptest.NewRequest("GET", "/", nil)
r.Header.Set("User-Agent", userAgent)
rw = httptest.NewRecorder()
t.Logf("seed=%s", seed)
t.Logf("challenge=%s", challenge)
// Now compute and set solution query paramater.
solution := computeSolution(seed, challenge)
r.URL.RawQuery = "nollamas_solution=" + solution
t.Logf("solution=%s", solution)
// Pass req through
// engine handler.
e.ServeHTTP(rw, r)
// Get http result.
res = rw.Result()
// Should have received redirect.
uri, err := res.Location()
assert.NoError(t, err)
assert.Equal(t, uri.String(), "/")
// Ensure our expected solution cookie (to bypass challenge) was set.
ok = slices.ContainsFunc(res.Cookies(), func(c *http.Cookie) bool {
return c.Name == "gts-nollamas"
})
assert.True(t, ok)
}
// computeSolution does the functional equivalent of our nollamas workerTask.js.
func computeSolution(seed, challenge string) string {
for i := 0; ; i++ {
solution := strconv.Itoa(i)
combined := seed + solution
hash := sha256.Sum256(byteutil.S2B(combined))
encoded := hex.EncodeToString(hash[:])
if encoded != challenge {
continue
}
return solution
}
}
// usedTestHandler returns whether testHandler() was used.
func usedTestHandler(res *http.Response) bool {
return res.Header.Get("test-handler") == "ok"
}
func testHandler(c *gin.Context) {
c.Writer.Header().Set("test-handler", "ok")
c.Writer.WriteHeader(http.StatusOK)
}
func getInstanceV1(context.Context) (*model.InstanceV1, gtserror.WithCode) {
return &model.InstanceV1{}, nil
}

View file

@ -101,16 +101,12 @@ func (m *Module) Route(r *router.Router, mi ...gin.HandlerFunc) {
// Handlers that serve profiles and statuses should use
// the SignatureCheck middleware, so that requests with
// content-type application/activity+json can be served,
// and (if enabled) the nollamas middleware, to protect
// against scraping by shitty LLM bullshit.
// content-type application/activity+json can be served.
profileGroup := r.AttachGroup(profileGroupPath)
profileGroup.Use(mi...)
profileGroup.Use(middleware.SignatureCheck(m.isURIBlocked), middleware.CacheControl(middleware.CacheControlConfig{
Directives: []string{"no-store"},
}))
nollamas := middleware.NoLLaMas(m.cookiePolicy, m.processor.InstanceGetV1)
profileGroup.Use(nollamas)
profileGroup.Handle(http.MethodGet, "", m.profileGETHandler) // use empty path here since it's the base of the group
profileGroup.Handle(http.MethodGet, statusPath, m.threadGETHandler)