gotosocial/vendor/codeberg.org/gruf/go-split/splitter.go

package split

import (
	"errors"
	"strings"
	"unicode"
	"unicode/utf8"
)

// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }

// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
	for {
		// Reset buffer
		s.B = s.B[0:0]

		// Trim leading space
		str = trimLeadingSpace(str)

		if len(str) < 1 {
			// Reached end
			return nil
		}

		switch {
		// Single / double quoted
		case str[0] == '\'', str[0] == '"':
			// Calculate next string elem
			i := 1 + s.next(str[1:], str[0])
			if i == 0 /* i.e. if .next() returned -1 */ {
				return errors.New("missing end quote")
			}

			// Pass next element to callback func
			if err := fn(string(s.B)); err != nil {
				return err
			}

			// Reslice + trim leading space
			str = trimLeadingSpace(str[i+1:])

			if len(str) < 1 {
				// reached end
				return nil
			}

			if str[0] != ',' {
				// malformed element without comma after quote
				return errors.New("missing comma separator")
			}

			// Skip comma
			str = str[1:]

		// Empty segment
		case str[0] == ',':
			str = str[1:]

		// No quoting
		default:
			// Calculate next string elem
			i := s.next(str, ',')

			switch i {
			// Reached end
			case -1:
				// we know len > 0

				// Pass to callback
				return fn(string(s.B))

			// Empty elem
			case 0:
				str = str[1:]

			// Non-zero elem
			default:
				// Pass next element to callback
				if err := fn(string(s.B)); err != nil {
					return err
				}

				// Skip past eleme
				str = str[i+1:]
			}
		}
	}
}

// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
	var delims int

	// Guarantee buf large enough
	if len(str) > cap(s.B)-len(s.B) {
		nb := make([]byte, 2*cap(s.B)+len(str))
		_ = copy(nb, s.B)
		s.B = nb[:len(s.B)]
	}

	for i := 0; i < len(str); i++ {
		// Increment delims
		if str[i] == '\\' {
			delims++
			continue
		}

		if str[i] == c {
			var count int

			if count = delims / 2; count > 0 {
				// Add backslashes to buffer
				slashes := backslashes(count)
				s.B = append(s.B, slashes...)
			}

			// Reached delim'd char
			if delims-count == 0 {
				return i
			}
		} else if delims > 0 {
			// Add backslashes to buffer
			slashes := backslashes(delims)
			s.B = append(s.B, slashes...)
		}

		// Write byte to buffer
		s.B = append(s.B, str[i])

		// Reset count
		delims = 0
	}

	return -1
}

// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
	as['\t'/32] |= 1 << ('\t' % 32)
	as['\n'/32] |= 1 << ('\n' % 32)
	as['\v'/32] |= 1 << ('\v' % 32)
	as['\f'/32] |= 1 << ('\f' % 32)
	as['\r'/32] |= 1 << ('\r' % 32)
	as[' '/32] |= 1 << (' ' % 32)
	return
}()

// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
	var start int

	for ; start < len(str); start++ {
		// If beyond ascii range, trim using slower rune check.
		if str[start] >= utf8.RuneSelf {
			return trimLeadingSpaceSlow(str[start:])
		}

		// Ascii character
		char := str[start]

		// This is first non-space ASCII, trim up to here
		if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
			break
		}
	}

	return str[start:]
}

// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
	for i, r := range str {
		if !unicode.IsSpace(r) {
			return str[i:]
		}
	}
	return str
}

// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
	const backslashes = `\\\\\\\\\\\\\\\\\\\\`

	// Fast-path, use string const
	if count < len(backslashes) {
		return backslashes[:count]
	}

	// Slow-path, build custom string
	return backslashSlow(count)
}

// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
	var buf strings.Builder
	for i := 0; i < count; i++ {
		buf.WriteByte('\\')
	}
	return buf.String()
}
[feature] support nested configuration files, and setting ALL configuration variables by CLI and env (#4109) This updates our configuration code generator to now also include map marshal and unmarshalers. So we now have much more control over how things get read from pflags, and stored / read from viper configuration. This allows us to set ALL configuration variables by CLI and environment now, AND support nested configuration files. e.g. ```yaml advanced: scraper-deterrence = true http-client: allow-ips = ["127.0.0.1"] ``` is the same as ```yaml advanced-scraper-deterrence = true http-client-allow-ips = ["127.0.0.1"] ``` This also starts cleaning up of our jumbled Configuration{} type by moving the advanced configuration options into their own nested structs, also as a way to show what it's capable of. It's worth noting however that nesting only works if the Go types are nested too (as this is how we hint to our code generator to generate the necessary flattening code :p). closes #3195 Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4109 Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com> 2025-05-06 15:51:45 +00:00			`package split`

			`import (`
			`"errors"`
			`"strings"`
			`"unicode"`
			`"unicode/utf8"`
			`)`

			`// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().`
			`type Splitter struct{ B []byte }`

			`// SplitFunc will split input string on commas, taking into account string quoting and`
			`// stripping extra whitespace, passing each split to the given function hook.`
			`func (s *Splitter) SplitFunc(str string, fn func(string) error) error {`
			`for {`
			`// Reset buffer`
			`s.B = s.B[0:0]`

			`// Trim leading space`
			`str = trimLeadingSpace(str)`

			`if len(str) < 1 {`
			`// Reached end`
			`return nil`
			`}`

			`switch {`
			`// Single / double quoted`
			`case str[0] == '\'', str[0] == '"':`
			`// Calculate next string elem`
			`i := 1 + s.next(str[1:], str[0])`
			`if i == 0 /* i.e. if .next() returned -1 */ {`
			`return errors.New("missing end quote")`
			`}`

			`// Pass next element to callback func`
			`if err := fn(string(s.B)); err != nil {`
			`return err`
			`}`

			`// Reslice + trim leading space`
			`str = trimLeadingSpace(str[i+1:])`

			`if len(str) < 1 {`
			`// reached end`
			`return nil`
			`}`

			`if str[0] != ',' {`
			`// malformed element without comma after quote`
			`return errors.New("missing comma separator")`
			`}`

			`// Skip comma`
			`str = str[1:]`

			`// Empty segment`
			`case str[0] == ',':`
			`str = str[1:]`

			`// No quoting`
			`default:`
			`// Calculate next string elem`
			`i := s.next(str, ',')`

			`switch i {`
			`// Reached end`
			`case -1:`
			`// we know len > 0`

			`// Pass to callback`
			`return fn(string(s.B))`

			`// Empty elem`
			`case 0:`
			`str = str[1:]`

			`// Non-zero elem`
			`default:`
			`// Pass next element to callback`
			`if err := fn(string(s.B)); err != nil {`
			`return err`
			`}`

			`// Skip past eleme`
			`str = str[i+1:]`
			`}`
			`}`
			`}`
			`}`

			`// next will build the next string element in s.B up to non-delimited instance of c,`
			`// returning number of characters iterated, or -1 if the end of the string was reached.`
			`func (s *Splitter) next(str string, c byte) int {`
			`var delims int`

			`// Guarantee buf large enough`
			`if len(str) > cap(s.B)-len(s.B) {`
			`nb := make([]byte, 2*cap(s.B)+len(str))`
			`_ = copy(nb, s.B)`
			`s.B = nb[:len(s.B)]`
			`}`

			`for i := 0; i < len(str); i++ {`
			`// Increment delims`
			`if str[i] == '\\' {`
			`delims++`
			`continue`
			`}`

			`if str[i] == c {`
			`var count int`

			`if count = delims / 2; count > 0 {`
			`// Add backslashes to buffer`
			`slashes := backslashes(count)`
			`s.B = append(s.B, slashes...)`
			`}`

			`// Reached delim'd char`
			`if delims-count == 0 {`
			`return i`
			`}`
			`} else if delims > 0 {`
			`// Add backslashes to buffer`
			`slashes := backslashes(delims)`
			`s.B = append(s.B, slashes...)`
			`}`

			`// Write byte to buffer`
			`s.B = append(s.B, str[i])`

			`// Reset count`
			`delims = 0`
			`}`

			`return -1`
			`}`

			`// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).`
			`var asciiSpace = func() (as [8]uint32) {`
			`as['\t'/32] \|= 1 << ('\t' % 32)`
			`as['\n'/32] \|= 1 << ('\n' % 32)`
			`as['\v'/32] \|= 1 << ('\v' % 32)`
			`as['\f'/32] \|= 1 << ('\f' % 32)`
			`as['\r'/32] \|= 1 << ('\r' % 32)`
			`as[' '/32] \|= 1 << (' ' % 32)`
			`return`
			`}()`

			`// trimLeadingSpace trims the leading space from a string.`
			`func trimLeadingSpace(str string) string {`
			`var start int`

			`for ; start < len(str); start++ {`
			`// If beyond ascii range, trim using slower rune check.`
			`if str[start] >= utf8.RuneSelf {`
			`return trimLeadingSpaceSlow(str[start:])`
			`}`

			`// Ascii character`
			`char := str[start]`

			`// This is first non-space ASCII, trim up to here`
			`if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {`
			`break`
			`}`
			`}`

			`return str[start:]`
			`}`

			`// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.`
			`func trimLeadingSpaceSlow(str string) string {`
			`for i, r := range str {`
			`if !unicode.IsSpace(r) {`
			`return str[i:]`
			`}`
			`}`
			`return str`
			`}`

			`// backslashes will return a string of backslashes of given length.`
			`func backslashes(count int) string {`
			const backslashes = `\\\\\\\\\\\\\\\\\\\\`

			`// Fast-path, use string const`
			`if count < len(backslashes) {`
			`return backslashes[:count]`
			`}`

			`// Slow-path, build custom string`
			`return backslashSlow(count)`
			`}`

			`// backslashSlow will build a string of backslashes of custom length.`
			`func backslashSlow(count int) string {`
			`var buf strings.Builder`
			`for i := 0; i < count; i++ {`
			`buf.WriteByte('\\')`
			`}`
			`return buf.String()`
			`}`