mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-02 16:52:25 -06:00 
			
		
		
		
	
		
			
	
	
		
			205 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			205 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| 
								 | 
							
								package split
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import (
							 | 
						||
| 
								 | 
							
									"errors"
							 | 
						||
| 
								 | 
							
									"strings"
							 | 
						||
| 
								 | 
							
									"unicode"
							 | 
						||
| 
								 | 
							
									"unicode/utf8"
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
							 | 
						||
| 
								 | 
							
								type Splitter struct{ B []byte }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// SplitFunc will split input string on commas, taking into account string quoting and
							 | 
						||
| 
								 | 
							
								// stripping extra whitespace, passing each split to the given function hook.
							 | 
						||
| 
								 | 
							
								func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
							 | 
						||
| 
								 | 
							
									for {
							 | 
						||
| 
								 | 
							
										// Reset buffer
							 | 
						||
| 
								 | 
							
										s.B = s.B[0:0]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// Trim leading space
							 | 
						||
| 
								 | 
							
										str = trimLeadingSpace(str)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										if len(str) < 1 {
							 | 
						||
| 
								 | 
							
											// Reached end
							 | 
						||
| 
								 | 
							
											return nil
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										switch {
							 | 
						||
| 
								 | 
							
										// Single / double quoted
							 | 
						||
| 
								 | 
							
										case str[0] == '\'', str[0] == '"':
							 | 
						||
| 
								 | 
							
											// Calculate next string elem
							 | 
						||
| 
								 | 
							
											i := 1 + s.next(str[1:], str[0])
							 | 
						||
| 
								 | 
							
											if i == 0 /* i.e. if .next() returned -1 */ {
							 | 
						||
| 
								 | 
							
												return errors.New("missing end quote")
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Pass next element to callback func
							 | 
						||
| 
								 | 
							
											if err := fn(string(s.B)); err != nil {
							 | 
						||
| 
								 | 
							
												return err
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Reslice + trim leading space
							 | 
						||
| 
								 | 
							
											str = trimLeadingSpace(str[i+1:])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if len(str) < 1 {
							 | 
						||
| 
								 | 
							
												// reached end
							 | 
						||
| 
								 | 
							
												return nil
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if str[0] != ',' {
							 | 
						||
| 
								 | 
							
												// malformed element without comma after quote
							 | 
						||
| 
								 | 
							
												return errors.New("missing comma separator")
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Skip comma
							 | 
						||
| 
								 | 
							
											str = str[1:]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// Empty segment
							 | 
						||
| 
								 | 
							
										case str[0] == ',':
							 | 
						||
| 
								 | 
							
											str = str[1:]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// No quoting
							 | 
						||
| 
								 | 
							
										default:
							 | 
						||
| 
								 | 
							
											// Calculate next string elem
							 | 
						||
| 
								 | 
							
											i := s.next(str, ',')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											switch i {
							 | 
						||
| 
								 | 
							
											// Reached end
							 | 
						||
| 
								 | 
							
											case -1:
							 | 
						||
| 
								 | 
							
												// we know len > 0
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
												// Pass to callback
							 | 
						||
| 
								 | 
							
												return fn(string(s.B))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Empty elem
							 | 
						||
| 
								 | 
							
											case 0:
							 | 
						||
| 
								 | 
							
												str = str[1:]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Non-zero elem
							 | 
						||
| 
								 | 
							
											default:
							 | 
						||
| 
								 | 
							
												// Pass next element to callback
							 | 
						||
| 
								 | 
							
												if err := fn(string(s.B)); err != nil {
							 | 
						||
| 
								 | 
							
													return err
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
												// Skip past eleme
							 | 
						||
| 
								 | 
							
												str = str[i+1:]
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// next will build the next string element in s.B up to non-delimited instance of c,
							 | 
						||
| 
								 | 
							
								// returning number of characters iterated, or -1 if the end of the string was reached.
							 | 
						||
| 
								 | 
							
								func (s *Splitter) next(str string, c byte) int {
							 | 
						||
| 
								 | 
							
									var delims int
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Guarantee buf large enough
							 | 
						||
| 
								 | 
							
									if len(str) > cap(s.B)-len(s.B) {
							 | 
						||
| 
								 | 
							
										nb := make([]byte, 2*cap(s.B)+len(str))
							 | 
						||
| 
								 | 
							
										_ = copy(nb, s.B)
							 | 
						||
| 
								 | 
							
										s.B = nb[:len(s.B)]
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									for i := 0; i < len(str); i++ {
							 | 
						||
| 
								 | 
							
										// Increment delims
							 | 
						||
| 
								 | 
							
										if str[i] == '\\' {
							 | 
						||
| 
								 | 
							
											delims++
							 | 
						||
| 
								 | 
							
											continue
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										if str[i] == c {
							 | 
						||
| 
								 | 
							
											var count int
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if count = delims / 2; count > 0 {
							 | 
						||
| 
								 | 
							
												// Add backslashes to buffer
							 | 
						||
| 
								 | 
							
												slashes := backslashes(count)
							 | 
						||
| 
								 | 
							
												s.B = append(s.B, slashes...)
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// Reached delim'd char
							 | 
						||
| 
								 | 
							
											if delims-count == 0 {
							 | 
						||
| 
								 | 
							
												return i
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										} else if delims > 0 {
							 | 
						||
| 
								 | 
							
											// Add backslashes to buffer
							 | 
						||
| 
								 | 
							
											slashes := backslashes(delims)
							 | 
						||
| 
								 | 
							
											s.B = append(s.B, slashes...)
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// Write byte to buffer
							 | 
						||
| 
								 | 
							
										s.B = append(s.B, str[i])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// Reset count
							 | 
						||
| 
								 | 
							
										delims = 0
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									return -1
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
							 | 
						||
| 
								 | 
							
								var asciiSpace = func() (as [8]uint32) {
							 | 
						||
| 
								 | 
							
									as['\t'/32] |= 1 << ('\t' % 32)
							 | 
						||
| 
								 | 
							
									as['\n'/32] |= 1 << ('\n' % 32)
							 | 
						||
| 
								 | 
							
									as['\v'/32] |= 1 << ('\v' % 32)
							 | 
						||
| 
								 | 
							
									as['\f'/32] |= 1 << ('\f' % 32)
							 | 
						||
| 
								 | 
							
									as['\r'/32] |= 1 << ('\r' % 32)
							 | 
						||
| 
								 | 
							
									as[' '/32] |= 1 << (' ' % 32)
							 | 
						||
| 
								 | 
							
									return
							 | 
						||
| 
								 | 
							
								}()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// trimLeadingSpace trims the leading space from a string.
							 | 
						||
| 
								 | 
							
								func trimLeadingSpace(str string) string {
							 | 
						||
| 
								 | 
							
									var start int
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									for ; start < len(str); start++ {
							 | 
						||
| 
								 | 
							
										// If beyond ascii range, trim using slower rune check.
							 | 
						||
| 
								 | 
							
										if str[start] >= utf8.RuneSelf {
							 | 
						||
| 
								 | 
							
											return trimLeadingSpaceSlow(str[start:])
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// Ascii character
							 | 
						||
| 
								 | 
							
										char := str[start]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										// This is first non-space ASCII, trim up to here
							 | 
						||
| 
								 | 
							
										if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
							 | 
						||
| 
								 | 
							
											break
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									return str[start:]
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
							 | 
						||
| 
								 | 
							
								func trimLeadingSpaceSlow(str string) string {
							 | 
						||
| 
								 | 
							
									for i, r := range str {
							 | 
						||
| 
								 | 
							
										if !unicode.IsSpace(r) {
							 | 
						||
| 
								 | 
							
											return str[i:]
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return str
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// backslashes will return a string of backslashes of given length.
							 | 
						||
| 
								 | 
							
								func backslashes(count int) string {
							 | 
						||
| 
								 | 
							
									const backslashes = `\\\\\\\\\\\\\\\\\\\\`
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Fast-path, use string const
							 | 
						||
| 
								 | 
							
									if count < len(backslashes) {
							 | 
						||
| 
								 | 
							
										return backslashes[:count]
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Slow-path, build custom string
							 | 
						||
| 
								 | 
							
									return backslashSlow(count)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// backslashSlow will build a string of backslashes of custom length.
							 | 
						||
| 
								 | 
							
								func backslashSlow(count int) string {
							 | 
						||
| 
								 | 
							
									var buf strings.Builder
							 | 
						||
| 
								 | 
							
									for i := 0; i < count; i++ {
							 | 
						||
| 
								 | 
							
										buf.WriteByte('\\')
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return buf.String()
							 | 
						||
| 
								 | 
							
								}
							 |