mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 15:52:26 -05:00
# Description This updates our proof-of-work middleware, NoLLaMas, to work on a more easily configurable algorithm (thank you f0x for bringing this to my attention!). Instead of requiring that a solution with pre-determined number of '0' chars be found, it now pre-computes a result with a pre-determined nonce value that it expects the client to iterate up-to. (though with some level of jitter applied, to prevent it being too-easily gamed). This allows the user to configure roughly how many hash-encode rounds they want their clients to have to complete. ## Checklist - [x] I/we have read the [GoToSocial contribution guidelines](https://codeberg.org/superseriousbusiness/gotosocial/src/branch/main/CONTRIBUTING.md). - [x] I/we have discussed the proposed changes already, either in an issue on the repository, or in the Matrix chat. - [x] I/we have not leveraged AI to create the proposed changes. - [x] I/we have performed a self-review of added code. - [x] I/we have written code that is legible and maintainable by others. - [x] I/we have commented the added code, particularly in hard-to-understand areas. - [x] I/we have made any necessary changes to documentation. - [ ] I/we have added tests that cover new code. - [x] I/we have run tests and they pass locally with the changes. - [x] I/we have run `go fmt ./...` and `golangci-lint run`. Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4186 Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com>
385 lines
11 KiB
Go
385 lines
11 KiB
Go
// GoToSocial
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package middleware
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"crypto/sha256"
|
|
"crypto/subtle"
|
|
"encoding/hex"
|
|
"hash"
|
|
"io"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
|
|
apiutil "code.superseriousbusiness.org/gotosocial/internal/api/util"
|
|
"code.superseriousbusiness.org/gotosocial/internal/config"
|
|
"code.superseriousbusiness.org/gotosocial/internal/gtscontext"
|
|
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
|
|
"code.superseriousbusiness.org/gotosocial/internal/log"
|
|
"code.superseriousbusiness.org/gotosocial/internal/oauth"
|
|
"codeberg.org/gruf/go-bitutil"
|
|
"codeberg.org/gruf/go-byteutil"
|
|
"github.com/gin-gonic/gin"
|
|
)
|
|
|
|
// NoLLaMas returns a piece of HTTP middleware that provides a deterrence
|
|
// on routes it is applied to, against bots and scrapers. It generates a
|
|
// unique but deterministic challenge for each HTTP client within an hour
|
|
// TTL that requires a proof-of-work solution to pass onto the next handler.
|
|
// On successful solution, the client is provided a cookie that allows them
|
|
// to bypass this check within that hour TTL. The outcome of this is that it
|
|
// should make scraping of these endpoints economically unfeasible, when enabled,
|
|
// and with an absurdly minimal performance impact. The downside is that it
|
|
// requires javascript to be enabled on the client to pass the middleware check.
|
|
//
|
|
// Heavily inspired by: https://github.com/TecharoHQ/anubis
|
|
func NoLLaMas(
|
|
cookiePolicy apiutil.CookiePolicy,
|
|
getInstanceV1 func(context.Context) (*apimodel.InstanceV1, gtserror.WithCode),
|
|
) gin.HandlerFunc {
|
|
|
|
if !config.GetAdvancedScraperDeterrenceEnabled() {
|
|
// NoLLaMas middleware disabled.
|
|
return func(*gin.Context) {}
|
|
}
|
|
|
|
var seed [32]byte
|
|
|
|
// Read random data for the token seed.
|
|
_, err := io.ReadFull(rand.Reader, seed[:])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
// Configure nollamas.
|
|
var nollamas nollamas
|
|
nollamas.entropy = seed
|
|
nollamas.ttl = time.Hour
|
|
nollamas.rounds = config.GetAdvancedScraperDeterrenceDifficulty()
|
|
nollamas.getInstanceV1 = getInstanceV1
|
|
nollamas.policy = cookiePolicy
|
|
return nollamas.Serve
|
|
}
|
|
|
|
// i.e. hash slice length.
|
|
const hashLen = sha256.Size
|
|
|
|
// i.e. hex.EncodedLen(hashLen).
|
|
const encodedHashLen = 2 * hashLen
|
|
|
|
// hashWithBufs encompasses a hash along
|
|
// with the necessary buffers to generate
|
|
// a hashsum and then encode that sum.
|
|
type hashWithBufs struct {
|
|
hash hash.Hash
|
|
hbuf [hashLen]byte
|
|
ebuf [encodedHashLen]byte
|
|
}
|
|
|
|
// write is a passthrough to hash.Hash{}.Write().
|
|
func (h *hashWithBufs) write(b []byte) {
|
|
_, _ = h.hash.Write(b)
|
|
}
|
|
|
|
// writeString is a passthrough to hash.Hash{}.Write([]byte(s)).
|
|
func (h *hashWithBufs) writeString(s string) {
|
|
_, _ = h.hash.Write(byteutil.S2B(s))
|
|
}
|
|
|
|
// EncodedSum returns the hex encoded sum of hash.Sum().
|
|
func (h *hashWithBufs) EncodedSum() string {
|
|
_ = h.hash.Sum(h.hbuf[:0])
|
|
hex.Encode(h.ebuf[:], h.hbuf[:])
|
|
return string(h.ebuf[:])
|
|
}
|
|
|
|
// Reset will reset hash and buffers.
|
|
func (h *hashWithBufs) Reset() {
|
|
h.ebuf = [encodedHashLen]byte{}
|
|
h.hbuf = [hashLen]byte{}
|
|
h.hash.Reset()
|
|
}
|
|
|
|
type nollamas struct {
|
|
// our instance cookie policy.
|
|
policy apiutil.CookiePolicy
|
|
|
|
// unique entropy
|
|
// to prevent hashes
|
|
// being guessable
|
|
entropy [32]byte
|
|
|
|
// success cookie TTL
|
|
ttl time.Duration
|
|
|
|
// rounds determines roughly how
|
|
// many hash-encode rounds each
|
|
// client is required to complete.
|
|
rounds uint32
|
|
|
|
// extra fields required for
|
|
// our template rendering.
|
|
getInstanceV1 func(ctx context.Context) (*apimodel.InstanceV1, gtserror.WithCode)
|
|
}
|
|
|
|
func (m *nollamas) Serve(c *gin.Context) {
|
|
if c.Request.Method != http.MethodGet {
|
|
// Only interested in protecting
|
|
// crawlable 'GET' endpoints.
|
|
c.Next()
|
|
return
|
|
}
|
|
|
|
// Extract request context.
|
|
ctx := c.Request.Context()
|
|
|
|
if ctx.Value(oauth.SessionAuthorizedToken) != nil {
|
|
// Don't guard against requests
|
|
// providing valid OAuth tokens.
|
|
c.Next()
|
|
return
|
|
}
|
|
|
|
if gtscontext.HTTPSignature(ctx) != "" {
|
|
// Don't guard against requests
|
|
// providing HTTP signatures.
|
|
c.Next()
|
|
return
|
|
}
|
|
|
|
// Prepare new hash with buffers.
|
|
hash := hashWithBufs{hash: sha256.New()}
|
|
|
|
// Extract client fingerprint data.
|
|
userAgent := c.GetHeader("User-Agent")
|
|
clientIP := c.ClientIP()
|
|
|
|
// Generate a unique token for this request,
|
|
// only valid for a period of now +- m.ttl.
|
|
token := m.getToken(&hash, userAgent, clientIP)
|
|
|
|
// Check for a provided success token.
|
|
cookie, _ := c.Cookie("gts-nollamas")
|
|
|
|
// Check whether passed cookie
|
|
// is the expected success token.
|
|
if subtle.ConstantTimeCompare(
|
|
byteutil.S2B(cookie),
|
|
byteutil.S2B(token),
|
|
) == 1 {
|
|
|
|
// They passed us a valid, expected
|
|
// token. They already passed checks.
|
|
c.Next()
|
|
return
|
|
}
|
|
|
|
// From here-on out, all
|
|
// possibilities are handled
|
|
// by us. Prevent further http
|
|
// handlers from being called.
|
|
c.Abort()
|
|
|
|
// Generate challenge for this unique (yet deterministic) token,
|
|
// returning seed, wanted 'challenge' result and expected solution.
|
|
seed, challenge, solution := m.getChallenge(&hash, token)
|
|
|
|
// Prepare new log entry.
|
|
l := log.WithContext(ctx).
|
|
WithField("userAgent", userAgent).
|
|
WithField("seed", seed).
|
|
WithField("rounds", solution)
|
|
|
|
// Extract and parse query.
|
|
query := c.Request.URL.Query()
|
|
|
|
// Check query to see if an in-progress
|
|
// challenge solution has been provided.
|
|
nonce := query.Get("nollamas_solution")
|
|
if nonce == "" {
|
|
|
|
// No solution given, likely new client!
|
|
// Simply present them with challenge.
|
|
m.renderChallenge(c, seed, challenge)
|
|
l.Info("posing new challenge")
|
|
return
|
|
}
|
|
|
|
// Check nonce matches expected.
|
|
if subtle.ConstantTimeCompare(
|
|
byteutil.S2B(solution),
|
|
byteutil.S2B(nonce),
|
|
) != 1 {
|
|
|
|
// Their nonce failed, re-challenge them.
|
|
m.renderChallenge(c, challenge, solution)
|
|
l.Infof("invalid solution provided: %s", nonce)
|
|
return
|
|
}
|
|
|
|
l.Info("challenge passed")
|
|
|
|
// Drop solution query and encode.
|
|
query.Del("nollamas_solution")
|
|
c.Request.URL.RawQuery = query.Encode()
|
|
|
|
// They passed the challenge! Set success token
|
|
// cookie and allow them to continue to next handlers.
|
|
m.policy.SetCookie(c, "gts-nollamas", token, int(m.ttl/time.Second), "/")
|
|
c.Redirect(http.StatusTemporaryRedirect, c.Request.URL.RequestURI())
|
|
}
|
|
|
|
func (m *nollamas) renderChallenge(c *gin.Context, seed, challenge string) {
|
|
// Fetch current instance information for templating vars.
|
|
instance, errWithCode := m.getInstanceV1(c.Request.Context())
|
|
if errWithCode != nil {
|
|
apiutil.ErrorHandler(c, errWithCode, m.getInstanceV1)
|
|
return
|
|
}
|
|
|
|
// Write templated challenge response to client.
|
|
apiutil.TemplateWebPage(c, apiutil.WebPage{
|
|
Template: "nollamas.tmpl",
|
|
Instance: instance,
|
|
Stylesheets: []string{
|
|
"/assets/dist/nollamas.css",
|
|
// Include fork-awesome stylesheet
|
|
// to get nice loading spinner.
|
|
"/assets/Fork-Awesome/css/fork-awesome.min.css",
|
|
},
|
|
Extra: map[string]any{
|
|
"seed": seed,
|
|
"challenge": challenge,
|
|
},
|
|
Javascript: []apiutil.JavascriptEntry{
|
|
{
|
|
Src: "/assets/dist/nollamas.js",
|
|
Defer: true,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
// getToken generates a unique yet deterministic token for given HTTP request
|
|
// details, seeded by runtime generated entropy data and ttl rounded timestamp.
|
|
func (m *nollamas) getToken(hash *hashWithBufs, userAgent, clientIP string) string {
|
|
|
|
// Reset before
|
|
// using hash.
|
|
hash.Reset()
|
|
|
|
// Use our unique entropy to seed hash,
|
|
// to ensure we have cryptographically
|
|
// unique, yet deterministic, tokens
|
|
// generated for a given http client.
|
|
hash.write(m.entropy[:])
|
|
|
|
// Also seed the generated input with
|
|
// current time rounded to TTL, so our
|
|
// single comparison handles expiries.
|
|
now := time.Now().Round(m.ttl).Unix()
|
|
hash.write([]byte{
|
|
byte(now >> 56),
|
|
byte(now >> 48),
|
|
byte(now >> 40),
|
|
byte(now >> 32),
|
|
byte(now >> 24),
|
|
byte(now >> 16),
|
|
byte(now >> 8),
|
|
byte(now),
|
|
})
|
|
|
|
// Append client request data.
|
|
hash.writeString(userAgent)
|
|
hash.writeString(clientIP)
|
|
|
|
// Return hex encoded hash.
|
|
return hash.EncodedSum()
|
|
}
|
|
|
|
// getChallenge prepares a new challenge given the deterministic input token for this request.
|
|
// it will return an input seed string, a challenge string which is the end result the client
|
|
// should be looking for, and the solution for this such that challenge = hex(sha256(seed + solution)).
|
|
// the solution will always be a string-encoded 64bit integer calculated from m.rounds + random jitter.
|
|
func (m *nollamas) getChallenge(hash *hashWithBufs, token string) (seed, challenge, solution string) {
|
|
|
|
// For their unique seed string just use a
|
|
// single portion of their 'success' token.
|
|
// SHA256 is not yet cracked, this is not an
|
|
// application of a hash requiring serious
|
|
// cryptographic security and it rotates on
|
|
// a TTL basis, so it should be fine.
|
|
seed = token[:len(token)/4]
|
|
|
|
// BEFORE resetting the hash, get the last
|
|
// two bytes of NON-hex-encoded data from
|
|
// token generation to use for random jitter.
|
|
// This is taken from the end of the hash as
|
|
// this is the "unseen" end part of token.
|
|
//
|
|
// (if we used hex-encoded data it would
|
|
// only ever be '0-9' or 'a-z' ASCII chars).
|
|
//
|
|
// Security-wise, same applies as-above.
|
|
jitter := int16(hash.hbuf[len(hash.hbuf)-2]) |
|
|
int16(hash.hbuf[len(hash.hbuf)-1])<<8
|
|
|
|
var rounds int64
|
|
switch {
|
|
// For some small percentage of
|
|
// clients we purposely low-ball
|
|
// their rounds required, to make
|
|
// it so gaming it with a starting
|
|
// nonce value may suddenly fail.
|
|
case jitter%37 == 0:
|
|
rounds = int64(m.rounds/10) + int64(jitter/10)
|
|
case jitter%31 == 0:
|
|
rounds = int64(m.rounds/5) + int64(jitter/5)
|
|
case jitter%29 == 0:
|
|
rounds = int64(m.rounds/3) + int64(jitter/3)
|
|
case jitter%13 == 0:
|
|
rounds = int64(m.rounds/2) + int64(jitter/2)
|
|
|
|
// Determine an appropriate number of hash rounds
|
|
// we want the client to perform on input seed. This
|
|
// is determined as configured m.rounds +- jitter.
|
|
// This will be the 'solution' to create 'challenge'.
|
|
default:
|
|
rounds = int64(m.rounds) + int64(jitter) //nolint:gosec
|
|
}
|
|
|
|
// Encode (positive) determined hash rounds as string.
|
|
solution = strconv.FormatInt(bitutil.Abs64(rounds), 10)
|
|
|
|
// Reset before
|
|
// using hash.
|
|
hash.Reset()
|
|
|
|
// Calculate the expected result
|
|
// of hex(sha256(seed + solution)),
|
|
// i.e. the proposed 'challenge'.
|
|
hash.writeString(seed)
|
|
hash.writeString(solution)
|
|
challenge = hash.EncodedSum()
|
|
|
|
return
|
|
}
|