From ab4089d674ae9a70d98cc9ff5af956d6f256cbe6 Mon Sep 17 00:00:00 2001 From: tobi Date: Mon, 16 Sep 2024 11:21:16 +0200 Subject: [PATCH] address review comments --- .drone.yml | 4 +- .goreleaser.yml | 1 - go.mod | 1 - go.sum | 6 - internal/typeutils/converter.go | 28 ++- internal/typeutils/internaltofrontend.go | 47 ++-- internal/typeutils/internaltofrontend_test.go | 1 - internal/typeutils/util.go | 85 +++---- internal/typeutils/util_test.go | 87 +++---- scripts/build.sh | 3 +- vendor/github.com/cespare/xxhash/LICENSE.txt | 22 -- vendor/github.com/cespare/xxhash/README.md | 50 ---- vendor/github.com/cespare/xxhash/rotate.go | 14 -- vendor/github.com/cespare/xxhash/rotate19.go | 14 -- vendor/github.com/cespare/xxhash/xxhash.go | 168 ------------- .../github.com/cespare/xxhash/xxhash_amd64.go | 12 - .../github.com/cespare/xxhash/xxhash_amd64.s | 233 ------------------ .../github.com/cespare/xxhash/xxhash_other.go | 75 ------ .../github.com/cespare/xxhash/xxhash_safe.go | 10 - .../cespare/xxhash/xxhash_unsafe.go | 30 --- vendor/modules.txt | 3 - 21 files changed, 115 insertions(+), 779 deletions(-) delete mode 100644 vendor/github.com/cespare/xxhash/LICENSE.txt delete mode 100644 vendor/github.com/cespare/xxhash/README.md delete mode 100644 vendor/github.com/cespare/xxhash/rotate.go delete mode 100644 vendor/github.com/cespare/xxhash/rotate19.go delete mode 100644 vendor/github.com/cespare/xxhash/xxhash.go delete mode 100644 vendor/github.com/cespare/xxhash/xxhash_amd64.go delete mode 100644 vendor/github.com/cespare/xxhash/xxhash_amd64.s delete mode 100644 vendor/github.com/cespare/xxhash/xxhash_other.go delete mode 100644 vendor/github.com/cespare/xxhash/xxhash_safe.go delete mode 100644 vendor/github.com/cespare/xxhash/xxhash_unsafe.go diff --git a/.drone.yml b/.drone.yml index ea945db12..1a5239a13 100644 --- a/.drone.yml +++ b/.drone.yml @@ -45,7 +45,7 @@ steps: go test -failfast -timeout=20m - -tags "netgo osusergo static_build kvformat timetzdata purego" + -tags "netgo osusergo static_build kvformat timetzdata" ./... - ./test/envparsing.sh - ./test/swagger.sh @@ -207,6 +207,6 @@ steps: --- kind: signature -hmac: 3f3a24557b67760dd0c4091eaaed4842b0545f5aa65f90ce70d5e45da23c5260 +hmac: f4008d87e4e5b67251eb89f255c1224e6ab5818828cab24fc319b8f829176058 ... diff --git a/.goreleaser.yml b/.goreleaser.yml index 3d1bedd11..6a7fccfd0 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -27,7 +27,6 @@ builds: - static_build - kvformat - timetzdata - - purego - >- {{ if and (index .Env "DEBUG") (.Env.DEBUG) }}debugenv{{ end }} - >- diff --git a/go.mod b/go.mod index 8d23218d6..67fe84d22 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,6 @@ require ( github.com/DmitriyVTitov/size v1.5.0 github.com/KimMachineGun/automemlimit v0.6.1 github.com/buckket/go-blurhash v1.1.0 - github.com/cespare/xxhash v1.1.0 github.com/coreos/go-oidc/v3 v3.11.0 github.com/gin-contrib/cors v1.7.2 github.com/gin-contrib/gzip v1.0.1 diff --git a/go.sum b/go.sum index db3fc7812..749487a68 100644 --- a/go.sum +++ b/go.sum @@ -98,8 +98,6 @@ github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0 github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= -github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/ajg/form v1.5.1 h1:t9c7v8JUKu/XxOGBU0yjNpaMloxGEJhUkqFRq0ibGeU= github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= @@ -120,8 +118,6 @@ github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -512,8 +508,6 @@ github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIK github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= diff --git a/internal/typeutils/converter.go b/internal/typeutils/converter.go index a4395163d..8284bda96 100644 --- a/internal/typeutils/converter.go +++ b/internal/typeutils/converter.go @@ -29,26 +29,30 @@ import ( ) type Converter struct { - state *state.State - defaultAvatars []string - randAvatars sync.Map - visFilter *visibility.Filter - intFilter *interaction.Filter - statusHashesToFilterableText cache.TTLCache[string, string] + state *state.State + defaultAvatars []string + randAvatars sync.Map + visFilter *visibility.Filter + intFilter *interaction.Filter + + // TTL cache of statuses -> filterable text fields. + // To ensure up-to-date fields, cache is keyed as: + // [status.ID][status.UpdatedAt.Unix()]` + statusesFilterableFields cache.TTLCache[string, []string] } func NewConverter(state *state.State) *Converter { - statusHashesToFilterableText := cache.NewTTL[string, string](0, 512, 0) + statusHashesToFilterableText := cache.NewTTL[string, []string](0, 512, 0) statusHashesToFilterableText.SetTTL(time.Hour, true) if !statusHashesToFilterableText.Start(time.Minute) { log.Panic(nil, "failed to start statusHashesToFilterableText cache") } return &Converter{ - state: state, - defaultAvatars: populateDefaultAvatars(), - visFilter: visibility.NewFilter(state), - intFilter: interaction.NewFilter(state), - statusHashesToFilterableText: statusHashesToFilterableText, + state: state, + defaultAvatars: populateDefaultAvatars(), + visFilter: visibility.NewFilter(state), + intFilter: interaction.NewFilter(state), + statusesFilterableFields: statusHashesToFilterableText, } } diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 261d71bb4..58797e790 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -21,6 +21,7 @@ import ( "context" "errors" "fmt" + "slices" "strings" "time" @@ -938,35 +939,47 @@ func (c *Converter) statusToAPIFilterResults( return nil, nil } - // Derive a hash of this status. - statusHash := StatusHash(s) + // Key this status based on ID + last updated time, + // to ensure we always filter on latest version. + statusKey := fmt.Sprintf("%s%d", s.ID, s.UpdatedAt.Unix()) - // Check if we have the filterable - // text stored already for this hash. - statusText, stored := c.statusHashesToFilterableText.Get(statusHash) + // Check if we have filterable fields cached for this status. + fields, stored := c.statusesFilterableFields.Get(statusKey) if !stored { - // We don't have this filterable text - // cached, calculate + cache it now. - statusText = filterableText(s) - c.statusHashesToFilterableText.Set(statusHash, statusText) + // We don't have filterable fields + // cached, calculate + cache now. + fields = filterableFields(s) + c.statusesFilterableFields.Set(statusKey, fields) } // Record all matching warn filters and the reasons they matched. filterResults := make([]apimodel.FilterResult, 0, len(filters)) for _, filter := range filters { if !filterAppliesInContext(filter, filterContext) { - // Filter doesn't apply to this context. - continue - } - if filter.Expired(now) { + // Filter doesn't apply + // to this context. continue } - // List all matching keywords. + if filter.Expired(now) { + // Filter doesn't + // apply anymore. + continue + } + + // Assemble matching keywords (if any) from this filter. keywordMatches := make([]string, 0, len(filter.Keywords)) - for _, filterKeyword := range filter.Keywords { - if filterKeyword.Regexp.MatchString(statusText) { - keywordMatches = append(keywordMatches, filterKeyword.Keyword) + for _, keyword := range filter.Keywords { + // Check if at least one filterable field + // in the status matches on this filter. + if slices.ContainsFunc( + fields, + func(field string) bool { + return keyword.Regexp.MatchString(field) + }, + ) { + // At least one field matched on this filter. + keywordMatches = append(keywordMatches, keyword.Keyword) } } diff --git a/internal/typeutils/internaltofrontend_test.go b/internal/typeutils/internaltofrontend_test.go index 569aef83a..a44afe67e 100644 --- a/internal/typeutils/internaltofrontend_test.go +++ b/internal/typeutils/internaltofrontend_test.go @@ -1066,7 +1066,6 @@ func (suite *InternalToFrontendTestSuite) testHashtagFilteredStatusToFrontend(wh testStatus := new(gtsmodel.Status) *testStatus = *suite.testStatuses["admin_account_status_1"] testStatus.Content = `

doggo doggin' it

#dogsofmastodon

` - testStatus.Text = "doggo doggin' it\n\n#dogsofmastodon" if boost { boost, err := suite.typeconverter.StatusToBoost( diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go index bb61ffb66..3a867ba35 100644 --- a/internal/typeutils/util.go +++ b/internal/typeutils/util.go @@ -19,7 +19,6 @@ package typeutils import ( "context" - "encoding/hex" "fmt" "math" "net/url" @@ -28,7 +27,6 @@ import ( "strconv" "strings" - "github.com/cespare/xxhash/v2" "github.com/k3a/html2text" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/config" @@ -288,74 +286,63 @@ func ContentToContentLanguage( return contentStr, langTagStr } -// StatusHash returns an xxhash of text -// from a status, taking account of: -// -// - content warning -// - content -// - media IDs + descriptions -// - poll options -func StatusHash(s *gtsmodel.Status) string { - hash := xxhash.New() - - // Content warning / title. - hash.WriteString(s.ContentWarning) // nolint:errcheck - - // Status content. - hash.WriteString(s.Content) // nolint:errcheck - - // Media IDs + descriptions. - for _, attachment := range s.Attachments { - hash.WriteString(attachment.ID) // nolint:errcheck - hash.WriteString(attachment.Description) // nolint:errcheck - } - - // Poll options. - if s.Poll != nil { - for _, option := range s.Poll.Options { - hash.WriteString(option) // nolint:errcheck - } - } - - sum := hash.Sum(nil) - return hex.EncodeToString(sum) -} - -// filterableText concatenates text from a -// status that we might want to filter on: +// filterableFields returns text fields from +// a status that we might want to filter on: // // - content warning // - content (converted to plaintext from HTML) // - media descriptions // - poll options -func filterableText(s *gtsmodel.Status) string { - fields := []string{} +// +// Each field should be filtered separately. +// This avoids scenarios where false-positive +// multiple-word matches can be made by matching +// the last word of one field + the first word +// of the next field together. +func filterableFields(s *gtsmodel.Status) []string { + // Estimate length of fields. + fieldCount := 2 + len(s.Attachments) + if s.Poll != nil { + fieldCount += len(s.Poll.Options) + } + fields := make([]string, 0, fieldCount) // Content warning / title. - fields = append(fields, s.ContentWarning) + if s.ContentWarning != "" { + fields = append(fields, s.ContentWarning) + } - // Status content; use raw text if available, - // else use text parsed from content HTML. - if s.Text != "" { - fields = append(fields, s.Text) - } else { + // Status content. Though we have raw text + // available for statuses created on our + // instance, use the html2text version to + // remove markdown-formatting characters + // and ensure more consistent filtering. + if s.Content != "" { text := html2text.HTML2TextWithOptions( s.Content, html2text.WithLinksInnerText(), html2text.WithUnixLineBreaks(), ) - fields = append(fields, text) + if text != "" { + fields = append(fields, text) + } } // Media descriptions. for _, attachment := range s.Attachments { - fields = append(fields, attachment.Description) + if attachment.Description != "" { + fields = append(fields, attachment.Description) + } } // Poll options. if s.Poll != nil { - fields = append(fields, s.Poll.Options...) + for _, opt := range s.Poll.Options { + if opt != "" { + fields = append(fields, opt) + } + } } - return strings.Join(fields, " ") + return fields } diff --git a/internal/typeutils/util_test.go b/internal/typeutils/util_test.go index 23be0bbe6..ea6667519 100644 --- a/internal/typeutils/util_test.go +++ b/internal/typeutils/util_test.go @@ -21,6 +21,7 @@ import ( "context" "testing" + "github.com/stretchr/testify/assert" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/language" @@ -161,87 +162,59 @@ func TestContentToContentLanguage(t *testing.T) { func TestFilterableText(t *testing.T) { type testcase struct { - status *gtsmodel.Status - expectedText string + status *gtsmodel.Status + expectedFields []string } - for i, testcase := range []testcase{ + for _, testcase := range []testcase{ { status: >smodel.Status{ ContentWarning: "This is a test status", Content: `

Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial instance.

`, }, - expectedText: `This is a test status Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial instance.`, + expectedFields: []string{ + "This is a test status", + "Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial instance.", + }, }, { status: >smodel.Status{ Content: `

@zlatko currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)

https://github.com/superseriousbusiness/gotosocial/pull/2863

`, }, - expectedText: ` @zlatko currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag) - -https://github.com/superseriousbusiness/gotosocial/pull/2863 `, + expectedFields: []string{ + "@zlatko currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)\n\nhttps://github.com/superseriousbusiness/gotosocial/pull/2863 ", + }, }, { status: >smodel.Status{ - Content: `

Latest graphs for #GoToSocial on Wasm sqlite3 with embedded Wasm ffmpeg, both running on Wazero, and configured with a 50MiB db cache target. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.

`, + ContentWarning: "Nerd stuff", + Content: `

Latest graphs for #GoToSocial on Wasm sqlite3 with embedded Wasm ffmpeg, both running on Wazero, and configured with a 50MiB db cache target. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.

`, Attachments: []*gtsmodel.MediaAttachment{ { Description: `Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.`, }, - }, - }, - expectedText: ` Latest graphs for #GoToSocial on Wasm sqlite3 with embedded Wasm ffmpeg , both running on Wazero , and configured with a 50MiB db cache target . This is the version we'll be releasing soonish, now we're happy with how we've tamed everything. Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.`, - }, - } { - text := filterableText(testcase.status) - if text != testcase.expectedText { - t.Errorf( - "test %d expected text '%s' got '%s'", - i, testcase.expectedText, text, - ) - } - } -} - -func TestStatusHash(t *testing.T) { - type testcase struct { - status *gtsmodel.Status - expectedHash string - } - - for i, testcase := range []testcase{ - { - status: >smodel.Status{ - ContentWarning: "This is a test status", - Content: `

Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial instance.

`, - }, - expectedHash: `8bbb5439dbe62ae0`, - }, - { - status: >smodel.Status{ - Content: `

@zlatko currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)

https://github.com/superseriousbusiness/gotosocial/pull/2863

`, - }, - expectedHash: `d039dfb4d04752d5`, - }, - { - status: >smodel.Status{ - Content: `

Latest graphs for #GoToSocial on Wasm sqlite3 with embedded Wasm ffmpeg, both running on Wazero, and configured with a 50MiB db cache target. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.

`, - Attachments: []*gtsmodel.MediaAttachment{ { - ID: "01J7TYSH1V5V4DCTVPASH3K9PQ", - Description: `Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.`, + Description: `Another media attachment`, + }, + }, + Poll: >smodel.Poll{ + Options: []string{ + "Poll option 1", + "Poll option 2", }, }, }, - expectedHash: `414d975b2ef9d112`, + expectedFields: []string{ + "Nerd stuff", + "Latest graphs for #GoToSocial on Wasm sqlite3 with embedded Wasm ffmpeg , both running on Wazero , and configured with a 50MiB db cache target . This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.", + "Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.", + "Another media attachment", + "Poll option 1", + "Poll option 2", + }, }, } { - hash := StatusHash(testcase.status) - if hash != testcase.expectedHash { - t.Errorf( - "test %d expected hash '%s' got '%s'", - i, testcase.expectedHash, hash, - ) - } + fields := filterableFields(testcase.status) + assert.Equal(t, testcase.expectedFields, fields) } } diff --git a/scripts/build.sh b/scripts/build.sh index 1781731e3..5b10a5493 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -6,7 +6,7 @@ set -e log_exec() { echo "$ ${*}"; "$@"; } # Grab environment variables and set defaults + requirements. -GO_BUILDTAGS="${GO_BUILDTAGS-} netgo osusergo static_build kvformat timetzdata purego" +GO_BUILDTAGS="${GO_BUILDTAGS-} netgo osusergo static_build kvformat timetzdata" GO_LDFLAGS="${GO_LDFLAGS-} -s -w -extldflags '-static' -X 'main.Version=${VERSION:-$(git describe --tags --abbrev=0)}'" GO_GCFLAGS=${GO_GCFLAGS-} @@ -17,7 +17,6 @@ GO_GCFLAGS=${GO_GCFLAGS-} # Available Go build tags, with explanation, followed by benefits of enabling it: # - kvformat: enables prettier output of log fields (slightly better performance) # - timetzdata: embed timezone database inside binary (allow setting local time inside Docker containers, at cost of 450KB) -# - purego: disable amd64/arm64 assembly implementation for xxhash (increase portability at marginal performance cost) # - notracing: disables compiling-in otel tracing support (reduced binary size, better performance) # - nometrics: disables compiling-in otel metrics support (reduced binary size, better performance) # - noerrcaller: disables caller function prefix in errors (slightly better performance, at cost of err readability) diff --git a/vendor/github.com/cespare/xxhash/LICENSE.txt b/vendor/github.com/cespare/xxhash/LICENSE.txt deleted file mode 100644 index 24b53065f..000000000 --- a/vendor/github.com/cespare/xxhash/LICENSE.txt +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/cespare/xxhash/README.md b/vendor/github.com/cespare/xxhash/README.md deleted file mode 100644 index 0982fd25e..000000000 --- a/vendor/github.com/cespare/xxhash/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# xxhash - -[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) - -xxhash is a Go implementation of the 64-bit -[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a -high-quality hashing algorithm that is much faster than anything in the Go -standard library. - -The API is very small, taking its cue from the other hashing packages in the -standard library: - - $ go doc github.com/cespare/xxhash ! - package xxhash // import "github.com/cespare/xxhash" - - Package xxhash implements the 64-bit variant of xxHash (XXH64) as described - at http://cyan4973.github.io/xxHash/. - - func New() hash.Hash64 - func Sum64(b []byte) uint64 - func Sum64String(s string) uint64 - -This implementation provides a fast pure-Go implementation and an even faster -assembly implementation for amd64. - -## Benchmarks - -Here are some quick benchmarks comparing the pure-Go and assembly -implementations of Sum64 against another popular Go XXH64 implementation, -[github.com/OneOfOne/xxhash](https://github.com/OneOfOne/xxhash): - -| input size | OneOfOne | cespare (purego) | cespare | -| --- | --- | --- | --- | -| 5 B | 416 MB/s | 720 MB/s | 872 MB/s | -| 100 B | 3980 MB/s | 5013 MB/s | 5252 MB/s | -| 4 KB | 12727 MB/s | 12999 MB/s | 13026 MB/s | -| 10 MB | 9879 MB/s | 10775 MB/s | 10913 MB/s | - -These numbers were generated with: - -``` -$ go test -benchtime 10s -bench '/OneOfOne,' -$ go test -tags purego -benchtime 10s -bench '/xxhash,' -$ go test -benchtime 10s -bench '/xxhash,' -``` - -## Projects using this package - -- [InfluxDB](https://github.com/influxdata/influxdb) -- [Prometheus](https://github.com/prometheus/prometheus) diff --git a/vendor/github.com/cespare/xxhash/rotate.go b/vendor/github.com/cespare/xxhash/rotate.go deleted file mode 100644 index f3eac5ebc..000000000 --- a/vendor/github.com/cespare/xxhash/rotate.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build !go1.9 - -package xxhash - -// TODO(caleb): After Go 1.10 comes out, remove this fallback code. - -func rol1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } -func rol7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } -func rol11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } -func rol12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } -func rol18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } -func rol23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } -func rol27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } -func rol31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } diff --git a/vendor/github.com/cespare/xxhash/rotate19.go b/vendor/github.com/cespare/xxhash/rotate19.go deleted file mode 100644 index b99612bab..000000000 --- a/vendor/github.com/cespare/xxhash/rotate19.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build go1.9 - -package xxhash - -import "math/bits" - -func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } -func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } -func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } -func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } -func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } -func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } -func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } -func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/cespare/xxhash/xxhash.go b/vendor/github.com/cespare/xxhash/xxhash.go deleted file mode 100644 index f896bd28f..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash.go +++ /dev/null @@ -1,168 +0,0 @@ -// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described -// at http://cyan4973.github.io/xxHash/. -package xxhash - -import ( - "encoding/binary" - "hash" -) - -const ( - prime1 uint64 = 11400714785074694791 - prime2 uint64 = 14029467366897019727 - prime3 uint64 = 1609587929392839161 - prime4 uint64 = 9650029242287828579 - prime5 uint64 = 2870177450012600261 -) - -// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where -// possible in the Go code is worth a small (but measurable) performance boost -// by avoiding some MOVQs. Vars are needed for the asm and also are useful for -// convenience in the Go code in a few places where we need to intentionally -// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the -// result overflows a uint64). -var ( - prime1v = prime1 - prime2v = prime2 - prime3v = prime3 - prime4v = prime4 - prime5v = prime5 -) - -type xxh struct { - v1 uint64 - v2 uint64 - v3 uint64 - v4 uint64 - total int - mem [32]byte - n int // how much of mem is used -} - -// New creates a new hash.Hash64 that implements the 64-bit xxHash algorithm. -func New() hash.Hash64 { - var x xxh - x.Reset() - return &x -} - -func (x *xxh) Reset() { - x.n = 0 - x.total = 0 - x.v1 = prime1v + prime2 - x.v2 = prime2 - x.v3 = 0 - x.v4 = -prime1v -} - -func (x *xxh) Size() int { return 8 } -func (x *xxh) BlockSize() int { return 32 } - -// Write adds more data to x. It always returns len(b), nil. -func (x *xxh) Write(b []byte) (n int, err error) { - n = len(b) - x.total += len(b) - - if x.n+len(b) < 32 { - // This new data doesn't even fill the current block. - copy(x.mem[x.n:], b) - x.n += len(b) - return - } - - if x.n > 0 { - // Finish off the partial block. - copy(x.mem[x.n:], b) - x.v1 = round(x.v1, u64(x.mem[0:8])) - x.v2 = round(x.v2, u64(x.mem[8:16])) - x.v3 = round(x.v3, u64(x.mem[16:24])) - x.v4 = round(x.v4, u64(x.mem[24:32])) - b = b[32-x.n:] - x.n = 0 - } - - if len(b) >= 32 { - // One or more full blocks left. - b = writeBlocks(x, b) - } - - // Store any remaining partial block. - copy(x.mem[:], b) - x.n = len(b) - - return -} - -func (x *xxh) Sum(b []byte) []byte { - s := x.Sum64() - return append( - b, - byte(s>>56), - byte(s>>48), - byte(s>>40), - byte(s>>32), - byte(s>>24), - byte(s>>16), - byte(s>>8), - byte(s), - ) -} - -func (x *xxh) Sum64() uint64 { - var h uint64 - - if x.total >= 32 { - v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = x.v3 + prime5 - } - - h += uint64(x.total) - - i, end := 0, x.n - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(x.mem[i:i+8])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if i+4 <= end { - h ^= uint64(u32(x.mem[i:i+4])) * prime1 - h = rol23(h)*prime2 + prime3 - i += 4 - } - for i < end { - h ^= uint64(x.mem[i]) * prime5 - h = rol11(h) * prime1 - i++ - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } -func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } - -func round(acc, input uint64) uint64 { - acc += input * prime2 - acc = rol31(acc) - acc *= prime1 - return acc -} - -func mergeRound(acc, val uint64) uint64 { - val = round(0, val) - acc ^= val - acc = acc*prime1 + prime4 - return acc -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.go b/vendor/github.com/cespare/xxhash/xxhash_amd64.go deleted file mode 100644 index d61765268..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_amd64.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build !appengine -// +build gc -// +build !purego - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -// -//go:noescape -func Sum64(b []byte) uint64 - -func writeBlocks(x *xxh, b []byte) []byte diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s deleted file mode 100644 index 757f2011f..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_amd64.s +++ /dev/null @@ -1,233 +0,0 @@ -// +build !appengine -// +build gc -// +build !purego - -#include "textflag.h" - -// Register allocation: -// AX h -// CX pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// R15 prime4v - -// round reads from and advances the buffer pointer in CX. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (CX), R12 \ - ADDQ $8, CX \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r - -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ R15, acc - -// func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 - // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), R15 - - // Load slice. - MOVQ b_base+0(FP), CX - MOVQ b_len+8(FP), DX - LEAQ (CX)(DX*1), BX - - // The first loop limit will be len(b)-32. - SUBQ $32, BX - - // Check whether we have at least one block. - CMPQ DX, $32 - JLT noBlocks - - // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 - - // Loop until CX > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) - - JMP afterBlocks - -noBlocks: - MOVQ ·prime5v(SB), AX - -afterBlocks: - ADDQ DX, AX - - // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. - ADDQ $24, BX - - CMPQ CX, BX - JG fourByte - -wordLoop: - // Calculate k1. - MOVQ (CX), R8 - ADDQ $8, CX - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 - - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ R15, AX - - CMPQ CX, BX - JLE wordLoop - -fourByte: - ADDQ $4, BX - CMPQ CX, BX - JG singles - - MOVL (CX), R8 - ADDQ $4, CX - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ CX, BX - JGE finalize - -singlesLoop: - MOVBQZX (CX), R12 - ADDQ $1, CX - IMULQ ·prime5v(SB), R12 - XORQ R12, AX - - ROLQ $11, AX - IMULQ R13, AX - - CMPQ CX, BX - JL singlesLoop - -finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX - - MOVQ AX, ret+24(FP) - RET - -// writeBlocks uses the same registers as above except that it uses AX to store -// the x pointer. - -// func writeBlocks(x *xxh, b []byte) []byte -TEXT ·writeBlocks(SB), NOSPLIT, $0-56 - // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - - // Load slice. - MOVQ b_base+8(FP), CX - MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below - MOVQ b_len+16(FP), DX - LEAQ (CX)(DX*1), BX - SUBQ $32, BX - - // Load vN from x. - MOVQ x+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 - - // We don't need to check the loop condition here; this function is - // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - // Copy vN back to x. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) - - // Construct return slice. - // NOTE: It's important that we don't construct a slice that has a base - // pointer off the end of the original slice, as in Go 1.7+ this will - // cause runtime crashes. (See discussion in, for example, - // https://github.com/golang/go/issues/16772.) - // Therefore, we calculate the length/cap first, and if they're zero, we - // keep the old base. This is what the compiler does as well if you - // write code like - // b = b[len(b):] - - // New length is 32 - (CX - BX) -> BX+32 - CX. - ADDQ $32, BX - SUBQ CX, BX - JZ afterSetBase - - MOVQ CX, ret_base+32(FP) - -afterSetBase: - MOVQ BX, ret_len+40(FP) - MOVQ BX, ret_cap+48(FP) // set cap == len - - RET diff --git a/vendor/github.com/cespare/xxhash/xxhash_other.go b/vendor/github.com/cespare/xxhash/xxhash_other.go deleted file mode 100644 index c68d13f89..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_other.go +++ /dev/null @@ -1,75 +0,0 @@ -// +build !amd64 appengine !gc purego - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -func Sum64(b []byte) uint64 { - // A simpler version would be - // x := New() - // x.Write(b) - // return x.Sum64() - // but this is faster, particularly for small inputs. - - n := len(b) - var h uint64 - - if n >= 32 { - v1 := prime1v + prime2 - v2 := prime2 - v3 := uint64(0) - v4 := -prime1v - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = prime5 - } - - h += uint64(n) - - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(b[i:i+8:len(b)])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if i+4 <= end { - h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 - h = rol23(h)*prime2 + prime3 - i += 4 - } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 - h = rol11(h) * prime1 - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -func writeBlocks(x *xxh, b []byte) []byte { - v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - x.v1, x.v2, x.v3, x.v4 = v1, v2, v3, v4 - return b -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_safe.go b/vendor/github.com/cespare/xxhash/xxhash_safe.go deleted file mode 100644 index dfa15ab7e..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_safe.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build appengine - -// This file contains the safe implementations of otherwise unsafe-using code. - -package xxhash - -// Sum64String computes the 64-bit xxHash digest of s. -func Sum64String(s string) uint64 { - return Sum64([]byte(s)) -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/xxhash_unsafe.go deleted file mode 100644 index d2b64e8bb..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_unsafe.go +++ /dev/null @@ -1,30 +0,0 @@ -// +build !appengine - -// This file encapsulates usage of unsafe. -// xxhash_safe.go contains the safe implementations. - -package xxhash - -import ( - "reflect" - "unsafe" -) - -// Sum64String computes the 64-bit xxHash digest of s. -// It may be faster than Sum64([]byte(s)) by avoiding a copy. -// -// TODO(caleb): Consider removing this if an optimization is ever added to make -// it unnecessary: https://golang.org/issue/2205. -// -// TODO(caleb): We still have a function call; we could instead write Go/asm -// copies of Sum64 for strings to squeeze out a bit more speed. -func Sum64String(s string) uint64 { - // See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ - // for some discussion about this unsafe conversion. - var b []byte - bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data - bh.Len = len(s) - bh.Cap = len(s) - return Sum64(b) -} diff --git a/vendor/modules.txt b/vendor/modules.txt index a0734eabb..92905c133 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -140,9 +140,6 @@ github.com/bytedance/sonic/loader/internal/rt # github.com/cenkalti/backoff/v4 v4.3.0 ## explicit; go 1.18 github.com/cenkalti/backoff/v4 -# github.com/cespare/xxhash v1.1.0 -## explicit -github.com/cespare/xxhash # github.com/cespare/xxhash/v2 v2.3.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2