mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 11:12:26 -05:00 
			
		
		
		
	
		
			
	
	
		
			205 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			205 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|  | package split | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"errors" | ||
|  | 	"strings" | ||
|  | 	"unicode" | ||
|  | 	"unicode/utf8" | ||
|  | ) | ||
|  | 
 | ||
|  | // Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc(). | ||
|  | type Splitter struct{ B []byte } | ||
|  | 
 | ||
|  | // SplitFunc will split input string on commas, taking into account string quoting and | ||
|  | // stripping extra whitespace, passing each split to the given function hook. | ||
|  | func (s *Splitter) SplitFunc(str string, fn func(string) error) error { | ||
|  | 	for { | ||
|  | 		// Reset buffer | ||
|  | 		s.B = s.B[0:0] | ||
|  | 
 | ||
|  | 		// Trim leading space | ||
|  | 		str = trimLeadingSpace(str) | ||
|  | 
 | ||
|  | 		if len(str) < 1 { | ||
|  | 			// Reached end | ||
|  | 			return nil | ||
|  | 		} | ||
|  | 
 | ||
|  | 		switch { | ||
|  | 		// Single / double quoted | ||
|  | 		case str[0] == '\'', str[0] == '"': | ||
|  | 			// Calculate next string elem | ||
|  | 			i := 1 + s.next(str[1:], str[0]) | ||
|  | 			if i == 0 /* i.e. if .next() returned -1 */ { | ||
|  | 				return errors.New("missing end quote") | ||
|  | 			} | ||
|  | 
 | ||
|  | 			// Pass next element to callback func | ||
|  | 			if err := fn(string(s.B)); err != nil { | ||
|  | 				return err | ||
|  | 			} | ||
|  | 
 | ||
|  | 			// Reslice + trim leading space | ||
|  | 			str = trimLeadingSpace(str[i+1:]) | ||
|  | 
 | ||
|  | 			if len(str) < 1 { | ||
|  | 				// reached end | ||
|  | 				return nil | ||
|  | 			} | ||
|  | 
 | ||
|  | 			if str[0] != ',' { | ||
|  | 				// malformed element without comma after quote | ||
|  | 				return errors.New("missing comma separator") | ||
|  | 			} | ||
|  | 
 | ||
|  | 			// Skip comma | ||
|  | 			str = str[1:] | ||
|  | 
 | ||
|  | 		// Empty segment | ||
|  | 		case str[0] == ',': | ||
|  | 			str = str[1:] | ||
|  | 
 | ||
|  | 		// No quoting | ||
|  | 		default: | ||
|  | 			// Calculate next string elem | ||
|  | 			i := s.next(str, ',') | ||
|  | 
 | ||
|  | 			switch i { | ||
|  | 			// Reached end | ||
|  | 			case -1: | ||
|  | 				// we know len > 0 | ||
|  | 
 | ||
|  | 				// Pass to callback | ||
|  | 				return fn(string(s.B)) | ||
|  | 
 | ||
|  | 			// Empty elem | ||
|  | 			case 0: | ||
|  | 				str = str[1:] | ||
|  | 
 | ||
|  | 			// Non-zero elem | ||
|  | 			default: | ||
|  | 				// Pass next element to callback | ||
|  | 				if err := fn(string(s.B)); err != nil { | ||
|  | 					return err | ||
|  | 				} | ||
|  | 
 | ||
|  | 				// Skip past eleme | ||
|  | 				str = str[i+1:] | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | // next will build the next string element in s.B up to non-delimited instance of c, | ||
|  | // returning number of characters iterated, or -1 if the end of the string was reached. | ||
|  | func (s *Splitter) next(str string, c byte) int { | ||
|  | 	var delims int | ||
|  | 
 | ||
|  | 	// Guarantee buf large enough | ||
|  | 	if len(str) > cap(s.B)-len(s.B) { | ||
|  | 		nb := make([]byte, 2*cap(s.B)+len(str)) | ||
|  | 		_ = copy(nb, s.B) | ||
|  | 		s.B = nb[:len(s.B)] | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for i := 0; i < len(str); i++ { | ||
|  | 		// Increment delims | ||
|  | 		if str[i] == '\\' { | ||
|  | 			delims++ | ||
|  | 			continue | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if str[i] == c { | ||
|  | 			var count int | ||
|  | 
 | ||
|  | 			if count = delims / 2; count > 0 { | ||
|  | 				// Add backslashes to buffer | ||
|  | 				slashes := backslashes(count) | ||
|  | 				s.B = append(s.B, slashes...) | ||
|  | 			} | ||
|  | 
 | ||
|  | 			// Reached delim'd char | ||
|  | 			if delims-count == 0 { | ||
|  | 				return i | ||
|  | 			} | ||
|  | 		} else if delims > 0 { | ||
|  | 			// Add backslashes to buffer | ||
|  | 			slashes := backslashes(delims) | ||
|  | 			s.B = append(s.B, slashes...) | ||
|  | 		} | ||
|  | 
 | ||
|  | 		// Write byte to buffer | ||
|  | 		s.B = append(s.B, str[i]) | ||
|  | 
 | ||
|  | 		// Reset count | ||
|  | 		delims = 0 | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return -1 | ||
|  | } | ||
|  | 
 | ||
|  | // asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet). | ||
|  | var asciiSpace = func() (as [8]uint32) { | ||
|  | 	as['\t'/32] |= 1 << ('\t' % 32) | ||
|  | 	as['\n'/32] |= 1 << ('\n' % 32) | ||
|  | 	as['\v'/32] |= 1 << ('\v' % 32) | ||
|  | 	as['\f'/32] |= 1 << ('\f' % 32) | ||
|  | 	as['\r'/32] |= 1 << ('\r' % 32) | ||
|  | 	as[' '/32] |= 1 << (' ' % 32) | ||
|  | 	return | ||
|  | }() | ||
|  | 
 | ||
|  | // trimLeadingSpace trims the leading space from a string. | ||
|  | func trimLeadingSpace(str string) string { | ||
|  | 	var start int | ||
|  | 
 | ||
|  | 	for ; start < len(str); start++ { | ||
|  | 		// If beyond ascii range, trim using slower rune check. | ||
|  | 		if str[start] >= utf8.RuneSelf { | ||
|  | 			return trimLeadingSpaceSlow(str[start:]) | ||
|  | 		} | ||
|  | 
 | ||
|  | 		// Ascii character | ||
|  | 		char := str[start] | ||
|  | 
 | ||
|  | 		// This is first non-space ASCII, trim up to here | ||
|  | 		if (asciiSpace[char/32] & (1 << (char % 32))) == 0 { | ||
|  | 			break | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return str[start:] | ||
|  | } | ||
|  | 
 | ||
|  | // trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check. | ||
|  | func trimLeadingSpaceSlow(str string) string { | ||
|  | 	for i, r := range str { | ||
|  | 		if !unicode.IsSpace(r) { | ||
|  | 			return str[i:] | ||
|  | 		} | ||
|  | 	} | ||
|  | 	return str | ||
|  | } | ||
|  | 
 | ||
|  | // backslashes will return a string of backslashes of given length. | ||
|  | func backslashes(count int) string { | ||
|  | 	const backslashes = `\\\\\\\\\\\\\\\\\\\\` | ||
|  | 
 | ||
|  | 	// Fast-path, use string const | ||
|  | 	if count < len(backslashes) { | ||
|  | 		return backslashes[:count] | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Slow-path, build custom string | ||
|  | 	return backslashSlow(count) | ||
|  | } | ||
|  | 
 | ||
|  | // backslashSlow will build a string of backslashes of custom length. | ||
|  | func backslashSlow(count int) string { | ||
|  | 	var buf strings.Builder | ||
|  | 	for i := 0; i < count; i++ { | ||
|  | 		buf.WriteByte('\\') | ||
|  | 	} | ||
|  | 	return buf.String() | ||
|  | } |