mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 06:52:26 -05:00 
			
		
		
		
	
		
			
	
	
		
			377 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			377 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|  | // Copyright 2014 The Go Authors. All rights reserved. | ||
|  | // Use of this source code is governed by a BSD-style | ||
|  | // license that can be found in the LICENSE file. | ||
|  | 
 | ||
|  | package cases | ||
|  | 
 | ||
|  | import "golang.org/x/text/transform" | ||
|  | 
 | ||
|  | // A context is used for iterating over source bytes, fetching case info and | ||
|  | // writing to a destination buffer. | ||
|  | // | ||
|  | // Casing operations may need more than one rune of context to decide how a rune | ||
|  | // should be cased. Casing implementations should call checkpoint on context | ||
|  | // whenever it is known to be safe to return the runes processed so far. | ||
|  | // | ||
|  | // It is recommended for implementations to not allow for more than 30 case | ||
|  | // ignorables as lookahead (analogous to the limit in norm) and to use state if | ||
|  | // unbounded lookahead is needed for cased runes. | ||
|  | type context struct { | ||
|  | 	dst, src []byte | ||
|  | 	atEOF    bool | ||
|  | 
 | ||
|  | 	pDst int // pDst points past the last written rune in dst. | ||
|  | 	pSrc int // pSrc points to the start of the currently scanned rune. | ||
|  | 
 | ||
|  | 	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. | ||
|  | 	nDst, nSrc int | ||
|  | 	err        error | ||
|  | 
 | ||
|  | 	sz   int  // size of current rune | ||
|  | 	info info // case information of currently scanned rune | ||
|  | 
 | ||
|  | 	// State preserved across calls to Transform. | ||
|  | 	isMidWord bool // false if next cased letter needs to be title-cased. | ||
|  | } | ||
|  | 
 | ||
|  | func (c *context) Reset() { | ||
|  | 	c.isMidWord = false | ||
|  | } | ||
|  | 
 | ||
|  | // ret returns the return values for the Transform method. It checks whether | ||
|  | // there were insufficient bytes in src to complete and introduces an error | ||
|  | // accordingly, if necessary. | ||
|  | func (c *context) ret() (nDst, nSrc int, err error) { | ||
|  | 	if c.err != nil || c.nSrc == len(c.src) { | ||
|  | 		return c.nDst, c.nSrc, c.err | ||
|  | 	} | ||
|  | 	// This point is only reached by mappers if there was no short destination | ||
|  | 	// buffer. This means that the source buffer was exhausted and that c.sz was | ||
|  | 	// set to 0 by next. | ||
|  | 	if c.atEOF && c.pSrc == len(c.src) { | ||
|  | 		return c.pDst, c.pSrc, nil | ||
|  | 	} | ||
|  | 	return c.nDst, c.nSrc, transform.ErrShortSrc | ||
|  | } | ||
|  | 
 | ||
|  | // retSpan returns the return values for the Span method. It checks whether | ||
|  | // there were insufficient bytes in src to complete and introduces an error | ||
|  | // accordingly, if necessary. | ||
|  | func (c *context) retSpan() (n int, err error) { | ||
|  | 	_, nSrc, err := c.ret() | ||
|  | 	return nSrc, err | ||
|  | } | ||
|  | 
 | ||
|  | // checkpoint sets the return value buffer points for Transform to the current | ||
|  | // positions. | ||
|  | func (c *context) checkpoint() { | ||
|  | 	if c.err == nil { | ||
|  | 		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | // unreadRune causes the last rune read by next to be reread on the next | ||
|  | // invocation of next. Only one unreadRune may be called after a call to next. | ||
|  | func (c *context) unreadRune() { | ||
|  | 	c.sz = 0 | ||
|  | } | ||
|  | 
 | ||
|  | func (c *context) next() bool { | ||
|  | 	c.pSrc += c.sz | ||
|  | 	if c.pSrc == len(c.src) || c.err != nil { | ||
|  | 		c.info, c.sz = 0, 0 | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	v, sz := trie.lookup(c.src[c.pSrc:]) | ||
|  | 	c.info, c.sz = info(v), sz | ||
|  | 	if c.sz == 0 { | ||
|  | 		if c.atEOF { | ||
|  | 			// A zero size means we have an incomplete rune. If we are atEOF, | ||
|  | 			// this means it is an illegal rune, which we will consume one | ||
|  | 			// byte at a time. | ||
|  | 			c.sz = 1 | ||
|  | 		} else { | ||
|  | 			c.err = transform.ErrShortSrc | ||
|  | 			return false | ||
|  | 		} | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // writeBytes adds bytes to dst. | ||
|  | func (c *context) writeBytes(b []byte) bool { | ||
|  | 	if len(c.dst)-c.pDst < len(b) { | ||
|  | 		c.err = transform.ErrShortDst | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	// This loop is faster than using copy. | ||
|  | 	for _, ch := range b { | ||
|  | 		c.dst[c.pDst] = ch | ||
|  | 		c.pDst++ | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // writeString writes the given string to dst. | ||
|  | func (c *context) writeString(s string) bool { | ||
|  | 	if len(c.dst)-c.pDst < len(s) { | ||
|  | 		c.err = transform.ErrShortDst | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	// This loop is faster than using copy. | ||
|  | 	for i := 0; i < len(s); i++ { | ||
|  | 		c.dst[c.pDst] = s[i] | ||
|  | 		c.pDst++ | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // copy writes the current rune to dst. | ||
|  | func (c *context) copy() bool { | ||
|  | 	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) | ||
|  | } | ||
|  | 
 | ||
|  | // copyXOR copies the current rune to dst and modifies it by applying the XOR | ||
|  | // pattern of the case info. It is the responsibility of the caller to ensure | ||
|  | // that this is a rune with a XOR pattern defined. | ||
|  | func (c *context) copyXOR() bool { | ||
|  | 	if !c.copy() { | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	if c.info&xorIndexBit == 0 { | ||
|  | 		// Fast path for 6-bit XOR pattern, which covers most cases. | ||
|  | 		c.dst[c.pDst-1] ^= byte(c.info >> xorShift) | ||
|  | 	} else { | ||
|  | 		// Interpret XOR bits as an index. | ||
|  | 		// TODO: test performance for unrolling this loop. Verify that we have | ||
|  | 		// at least two bytes and at most three. | ||
|  | 		idx := c.info >> xorShift | ||
|  | 		for p := c.pDst - 1; ; p-- { | ||
|  | 			c.dst[p] ^= xorData[idx] | ||
|  | 			idx-- | ||
|  | 			if xorData[idx] == 0 { | ||
|  | 				break | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // hasPrefix returns true if src[pSrc:] starts with the given string. | ||
|  | func (c *context) hasPrefix(s string) bool { | ||
|  | 	b := c.src[c.pSrc:] | ||
|  | 	if len(b) < len(s) { | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	for i, c := range b[:len(s)] { | ||
|  | 		if c != s[i] { | ||
|  | 			return false | ||
|  | 		} | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // caseType returns an info with only the case bits, normalized to either | ||
|  | // cLower, cUpper, cTitle or cUncased. | ||
|  | func (c *context) caseType() info { | ||
|  | 	cm := c.info & 0x7 | ||
|  | 	if cm < 4 { | ||
|  | 		return cm | ||
|  | 	} | ||
|  | 	if cm >= cXORCase { | ||
|  | 		// xor the last bit of the rune with the case type bits. | ||
|  | 		b := c.src[c.pSrc+c.sz-1] | ||
|  | 		return info(b&1) ^ cm&0x3 | ||
|  | 	} | ||
|  | 	if cm == cIgnorableCased { | ||
|  | 		return cLower | ||
|  | 	} | ||
|  | 	return cUncased | ||
|  | } | ||
|  | 
 | ||
|  | // lower writes the lowercase version of the current rune to dst. | ||
|  | func lower(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cLower { | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		return c.copyXOR() | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||
|  | 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { | ||
|  | 		return c.writeString(e[offset : offset+nLower]) | ||
|  | 	} | ||
|  | 	return c.copy() | ||
|  | } | ||
|  | 
 | ||
|  | func isLower(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cLower { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // upper writes the uppercase version of the current rune to dst. | ||
|  | func upper(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cUpper { | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		return c.copyXOR() | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||
|  | 	// Get length of first special case mapping. | ||
|  | 	n := (e[1] >> lengthBits) & lengthMask | ||
|  | 	if ct == cTitle { | ||
|  | 		// The first special case mapping is for lower. Set n to the second. | ||
|  | 		if n == noChange { | ||
|  | 			n = 0 | ||
|  | 		} | ||
|  | 		n, e = e[1]&lengthMask, e[n:] | ||
|  | 	} | ||
|  | 	if n != noChange { | ||
|  | 		return c.writeString(e[offset : offset+n]) | ||
|  | 	} | ||
|  | 	return c.copy() | ||
|  | } | ||
|  | 
 | ||
|  | // isUpper writes the isUppercase version of the current rune to dst. | ||
|  | func isUpper(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cUpper { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	// Get length of first special case mapping. | ||
|  | 	n := (e[1] >> lengthBits) & lengthMask | ||
|  | 	if ct == cTitle { | ||
|  | 		n = e[1] & lengthMask | ||
|  | 	} | ||
|  | 	if n != noChange { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // title writes the title case version of the current rune to dst. | ||
|  | func title(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cTitle { | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		if ct == cLower { | ||
|  | 			return c.copyXOR() | ||
|  | 		} | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	// Get the exception data. | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	offset := 2 + e[0]&lengthMask // size of header + fold string | ||
|  | 
 | ||
|  | 	nFirst := (e[1] >> lengthBits) & lengthMask | ||
|  | 	if nTitle := e[1] & lengthMask; nTitle != noChange { | ||
|  | 		if nFirst != noChange { | ||
|  | 			e = e[nFirst:] | ||
|  | 		} | ||
|  | 		return c.writeString(e[offset : offset+nTitle]) | ||
|  | 	} | ||
|  | 	if ct == cLower && nFirst != noChange { | ||
|  | 		// Use the uppercase version instead. | ||
|  | 		return c.writeString(e[offset : offset+nFirst]) | ||
|  | 	} | ||
|  | 	// Already in correct case. | ||
|  | 	return c.copy() | ||
|  | } | ||
|  | 
 | ||
|  | // isTitle reports whether the current rune is in title case. | ||
|  | func isTitle(c *context) bool { | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&hasMappingMask == 0 || ct == cTitle { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		if ct == cLower { | ||
|  | 			c.err = transform.ErrEndOfSpan | ||
|  | 			return false | ||
|  | 		} | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	// Get the exception data. | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	if nTitle := e[1] & lengthMask; nTitle != noChange { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	nFirst := (e[1] >> lengthBits) & lengthMask | ||
|  | 	if ct == cLower && nFirst != noChange { | ||
|  | 		c.err = transform.ErrEndOfSpan | ||
|  | 		return false | ||
|  | 	} | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // foldFull writes the foldFull version of the current rune to dst. | ||
|  | func foldFull(c *context) bool { | ||
|  | 	if c.info&hasMappingMask == 0 { | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		if ct != cLower || c.info&inverseFoldBit != 0 { | ||
|  | 			return c.copyXOR() | ||
|  | 		} | ||
|  | 		return c.copy() | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	n := e[0] & lengthMask | ||
|  | 	if n == 0 { | ||
|  | 		if ct == cLower { | ||
|  | 			return c.copy() | ||
|  | 		} | ||
|  | 		n = (e[1] >> lengthBits) & lengthMask | ||
|  | 	} | ||
|  | 	return c.writeString(e[2 : 2+n]) | ||
|  | } | ||
|  | 
 | ||
|  | // isFoldFull reports whether the current run is mapped to foldFull | ||
|  | func isFoldFull(c *context) bool { | ||
|  | 	if c.info&hasMappingMask == 0 { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	ct := c.caseType() | ||
|  | 	if c.info&exceptionBit == 0 { | ||
|  | 		if ct != cLower || c.info&inverseFoldBit != 0 { | ||
|  | 			c.err = transform.ErrEndOfSpan | ||
|  | 			return false | ||
|  | 		} | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	e := exceptions[c.info>>exceptionShift:] | ||
|  | 	n := e[0] & lengthMask | ||
|  | 	if n == 0 && ct == cLower { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 	c.err = transform.ErrEndOfSpan | ||
|  | 	return false | ||
|  | } |