gotosocial/vendor/codeberg.org/gruf/go-split/splitter.go

205 lines
4.2 KiB
Go
Raw Normal View History

package split
import (
"errors"
"strings"
"unicode"
"unicode/utf8"
)
// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }
// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
for {
// Reset buffer
s.B = s.B[0:0]
// Trim leading space
str = trimLeadingSpace(str)
if len(str) < 1 {
// Reached end
return nil
}
switch {
// Single / double quoted
case str[0] == '\'', str[0] == '"':
// Calculate next string elem
i := 1 + s.next(str[1:], str[0])
if i == 0 /* i.e. if .next() returned -1 */ {
return errors.New("missing end quote")
}
// Pass next element to callback func
if err := fn(string(s.B)); err != nil {
return err
}
// Reslice + trim leading space
str = trimLeadingSpace(str[i+1:])
if len(str) < 1 {
// reached end
return nil
}
if str[0] != ',' {
// malformed element without comma after quote
return errors.New("missing comma separator")
}
// Skip comma
str = str[1:]
// Empty segment
case str[0] == ',':
str = str[1:]
// No quoting
default:
// Calculate next string elem
i := s.next(str, ',')
switch i {
// Reached end
case -1:
// we know len > 0
// Pass to callback
return fn(string(s.B))
// Empty elem
case 0:
str = str[1:]
// Non-zero elem
default:
// Pass next element to callback
if err := fn(string(s.B)); err != nil {
return err
}
// Skip past eleme
str = str[i+1:]
}
}
}
}
// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
var delims int
// Guarantee buf large enough
if len(str) > cap(s.B)-len(s.B) {
nb := make([]byte, 2*cap(s.B)+len(str))
_ = copy(nb, s.B)
s.B = nb[:len(s.B)]
}
for i := 0; i < len(str); i++ {
// Increment delims
if str[i] == '\\' {
delims++
continue
}
if str[i] == c {
var count int
if count = delims / 2; count > 0 {
// Add backslashes to buffer
slashes := backslashes(count)
s.B = append(s.B, slashes...)
}
// Reached delim'd char
if delims-count == 0 {
return i
}
} else if delims > 0 {
// Add backslashes to buffer
slashes := backslashes(delims)
s.B = append(s.B, slashes...)
}
// Write byte to buffer
s.B = append(s.B, str[i])
// Reset count
delims = 0
}
return -1
}
// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
as['\t'/32] |= 1 << ('\t' % 32)
as['\n'/32] |= 1 << ('\n' % 32)
as['\v'/32] |= 1 << ('\v' % 32)
as['\f'/32] |= 1 << ('\f' % 32)
as['\r'/32] |= 1 << ('\r' % 32)
as[' '/32] |= 1 << (' ' % 32)
return
}()
// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
var start int
for ; start < len(str); start++ {
// If beyond ascii range, trim using slower rune check.
if str[start] >= utf8.RuneSelf {
return trimLeadingSpaceSlow(str[start:])
}
// Ascii character
char := str[start]
// This is first non-space ASCII, trim up to here
if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
break
}
}
return str[start:]
}
// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
for i, r := range str {
if !unicode.IsSpace(r) {
return str[i:]
}
}
return str
}
// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
const backslashes = `\\\\\\\\\\\\\\\\\\\\`
// Fast-path, use string const
if count < len(backslashes) {
return backslashes[:count]
}
// Slow-path, build custom string
return backslashSlow(count)
}
// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
var buf strings.Builder
for i := 0; i < count; i++ {
buf.WriteByte('\\')
}
return buf.String()
}