mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 16:22:24 -05:00
205 lines
4.2 KiB
Go
205 lines
4.2 KiB
Go
|
|
package split
|
||
|
|
|
||
|
|
import (
|
||
|
|
"errors"
|
||
|
|
"strings"
|
||
|
|
"unicode"
|
||
|
|
"unicode/utf8"
|
||
|
|
)
|
||
|
|
|
||
|
|
// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
|
||
|
|
type Splitter struct{ B []byte }
|
||
|
|
|
||
|
|
// SplitFunc will split input string on commas, taking into account string quoting and
|
||
|
|
// stripping extra whitespace, passing each split to the given function hook.
|
||
|
|
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
|
||
|
|
for {
|
||
|
|
// Reset buffer
|
||
|
|
s.B = s.B[0:0]
|
||
|
|
|
||
|
|
// Trim leading space
|
||
|
|
str = trimLeadingSpace(str)
|
||
|
|
|
||
|
|
if len(str) < 1 {
|
||
|
|
// Reached end
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
switch {
|
||
|
|
// Single / double quoted
|
||
|
|
case str[0] == '\'', str[0] == '"':
|
||
|
|
// Calculate next string elem
|
||
|
|
i := 1 + s.next(str[1:], str[0])
|
||
|
|
if i == 0 /* i.e. if .next() returned -1 */ {
|
||
|
|
return errors.New("missing end quote")
|
||
|
|
}
|
||
|
|
|
||
|
|
// Pass next element to callback func
|
||
|
|
if err := fn(string(s.B)); err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
// Reslice + trim leading space
|
||
|
|
str = trimLeadingSpace(str[i+1:])
|
||
|
|
|
||
|
|
if len(str) < 1 {
|
||
|
|
// reached end
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
if str[0] != ',' {
|
||
|
|
// malformed element without comma after quote
|
||
|
|
return errors.New("missing comma separator")
|
||
|
|
}
|
||
|
|
|
||
|
|
// Skip comma
|
||
|
|
str = str[1:]
|
||
|
|
|
||
|
|
// Empty segment
|
||
|
|
case str[0] == ',':
|
||
|
|
str = str[1:]
|
||
|
|
|
||
|
|
// No quoting
|
||
|
|
default:
|
||
|
|
// Calculate next string elem
|
||
|
|
i := s.next(str, ',')
|
||
|
|
|
||
|
|
switch i {
|
||
|
|
// Reached end
|
||
|
|
case -1:
|
||
|
|
// we know len > 0
|
||
|
|
|
||
|
|
// Pass to callback
|
||
|
|
return fn(string(s.B))
|
||
|
|
|
||
|
|
// Empty elem
|
||
|
|
case 0:
|
||
|
|
str = str[1:]
|
||
|
|
|
||
|
|
// Non-zero elem
|
||
|
|
default:
|
||
|
|
// Pass next element to callback
|
||
|
|
if err := fn(string(s.B)); err != nil {
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
// Skip past eleme
|
||
|
|
str = str[i+1:]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// next will build the next string element in s.B up to non-delimited instance of c,
|
||
|
|
// returning number of characters iterated, or -1 if the end of the string was reached.
|
||
|
|
func (s *Splitter) next(str string, c byte) int {
|
||
|
|
var delims int
|
||
|
|
|
||
|
|
// Guarantee buf large enough
|
||
|
|
if len(str) > cap(s.B)-len(s.B) {
|
||
|
|
nb := make([]byte, 2*cap(s.B)+len(str))
|
||
|
|
_ = copy(nb, s.B)
|
||
|
|
s.B = nb[:len(s.B)]
|
||
|
|
}
|
||
|
|
|
||
|
|
for i := 0; i < len(str); i++ {
|
||
|
|
// Increment delims
|
||
|
|
if str[i] == '\\' {
|
||
|
|
delims++
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
|
||
|
|
if str[i] == c {
|
||
|
|
var count int
|
||
|
|
|
||
|
|
if count = delims / 2; count > 0 {
|
||
|
|
// Add backslashes to buffer
|
||
|
|
slashes := backslashes(count)
|
||
|
|
s.B = append(s.B, slashes...)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Reached delim'd char
|
||
|
|
if delims-count == 0 {
|
||
|
|
return i
|
||
|
|
}
|
||
|
|
} else if delims > 0 {
|
||
|
|
// Add backslashes to buffer
|
||
|
|
slashes := backslashes(delims)
|
||
|
|
s.B = append(s.B, slashes...)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Write byte to buffer
|
||
|
|
s.B = append(s.B, str[i])
|
||
|
|
|
||
|
|
// Reset count
|
||
|
|
delims = 0
|
||
|
|
}
|
||
|
|
|
||
|
|
return -1
|
||
|
|
}
|
||
|
|
|
||
|
|
// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
|
||
|
|
var asciiSpace = func() (as [8]uint32) {
|
||
|
|
as['\t'/32] |= 1 << ('\t' % 32)
|
||
|
|
as['\n'/32] |= 1 << ('\n' % 32)
|
||
|
|
as['\v'/32] |= 1 << ('\v' % 32)
|
||
|
|
as['\f'/32] |= 1 << ('\f' % 32)
|
||
|
|
as['\r'/32] |= 1 << ('\r' % 32)
|
||
|
|
as[' '/32] |= 1 << (' ' % 32)
|
||
|
|
return
|
||
|
|
}()
|
||
|
|
|
||
|
|
// trimLeadingSpace trims the leading space from a string.
|
||
|
|
func trimLeadingSpace(str string) string {
|
||
|
|
var start int
|
||
|
|
|
||
|
|
for ; start < len(str); start++ {
|
||
|
|
// If beyond ascii range, trim using slower rune check.
|
||
|
|
if str[start] >= utf8.RuneSelf {
|
||
|
|
return trimLeadingSpaceSlow(str[start:])
|
||
|
|
}
|
||
|
|
|
||
|
|
// Ascii character
|
||
|
|
char := str[start]
|
||
|
|
|
||
|
|
// This is first non-space ASCII, trim up to here
|
||
|
|
if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
|
||
|
|
break
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return str[start:]
|
||
|
|
}
|
||
|
|
|
||
|
|
// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
|
||
|
|
func trimLeadingSpaceSlow(str string) string {
|
||
|
|
for i, r := range str {
|
||
|
|
if !unicode.IsSpace(r) {
|
||
|
|
return str[i:]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return str
|
||
|
|
}
|
||
|
|
|
||
|
|
// backslashes will return a string of backslashes of given length.
|
||
|
|
func backslashes(count int) string {
|
||
|
|
const backslashes = `\\\\\\\\\\\\\\\\\\\\`
|
||
|
|
|
||
|
|
// Fast-path, use string const
|
||
|
|
if count < len(backslashes) {
|
||
|
|
return backslashes[:count]
|
||
|
|
}
|
||
|
|
|
||
|
|
// Slow-path, build custom string
|
||
|
|
return backslashSlow(count)
|
||
|
|
}
|
||
|
|
|
||
|
|
// backslashSlow will build a string of backslashes of custom length.
|
||
|
|
func backslashSlow(count int) string {
|
||
|
|
var buf strings.Builder
|
||
|
|
for i := 0; i < count; i++ {
|
||
|
|
buf.WriteByte('\\')
|
||
|
|
}
|
||
|
|
return buf.String()
|
||
|
|
}
|