mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 18:12:24 -05:00 
			
		
		
		
	
		
			
	
	
		
			207 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			207 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|  | // Copyright 2015 The Go Authors. All rights reserved. | ||
|  | // Use of this source code is governed by a BSD-style | ||
|  | // license that can be found in the LICENSE file. | ||
|  | 
 | ||
|  | //go:generate stringer -type=Kind | ||
|  | //go:generate go run gen.go gen_common.go gen_trieval.go | ||
|  | 
 | ||
|  | // Package width provides functionality for handling different widths in text. | ||
|  | // | ||
|  | // Wide characters behave like ideographs; they tend to allow line breaks after | ||
|  | // each character and remain upright in vertical text layout. Narrow characters | ||
|  | // are kept together in words or runs that are rotated sideways in vertical text | ||
|  | // layout. | ||
|  | // | ||
|  | // For more information, see https://unicode.org/reports/tr11/. | ||
|  | package width // import "golang.org/x/text/width" | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"unicode/utf8" | ||
|  | 
 | ||
|  | 	"golang.org/x/text/transform" | ||
|  | ) | ||
|  | 
 | ||
|  | // TODO | ||
|  | // 1) Reduce table size by compressing blocks. | ||
|  | // 2) API proposition for computing display length | ||
|  | //    (approximation, fixed pitch only). | ||
|  | // 3) Implement display length. | ||
|  | 
 | ||
|  | // Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/. | ||
|  | type Kind int | ||
|  | 
 | ||
|  | const ( | ||
|  | 	// Neutral characters do not occur in legacy East Asian character sets. | ||
|  | 	Neutral Kind = iota | ||
|  | 
 | ||
|  | 	// EastAsianAmbiguous characters that can be sometimes wide and sometimes | ||
|  | 	// narrow and require additional information not contained in the character | ||
|  | 	// code to further resolve their width. | ||
|  | 	EastAsianAmbiguous | ||
|  | 
 | ||
|  | 	// EastAsianWide characters are wide in its usual form. They occur only in | ||
|  | 	// the context of East Asian typography. These runes may have explicit | ||
|  | 	// halfwidth counterparts. | ||
|  | 	EastAsianWide | ||
|  | 
 | ||
|  | 	// EastAsianNarrow characters are narrow in its usual form. They often have | ||
|  | 	// fullwidth counterparts. | ||
|  | 	EastAsianNarrow | ||
|  | 
 | ||
|  | 	// Note: there exist Narrow runes that do not have fullwidth or wide | ||
|  | 	// counterparts, despite what the definition says (e.g. U+27E6). | ||
|  | 
 | ||
|  | 	// EastAsianFullwidth characters have a compatibility decompositions of type | ||
|  | 	// wide that map to a narrow counterpart. | ||
|  | 	EastAsianFullwidth | ||
|  | 
 | ||
|  | 	// EastAsianHalfwidth characters have a compatibility decomposition of type | ||
|  | 	// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON | ||
|  | 	// SIGN. | ||
|  | 	EastAsianHalfwidth | ||
|  | 
 | ||
|  | 	// Note: there exist runes that have a halfwidth counterparts but that are | ||
|  | 	// classified as Ambiguous, rather than wide (e.g. U+2190). | ||
|  | ) | ||
|  | 
 | ||
|  | // TODO: the generated tries need to return size 1 for invalid runes for the | ||
|  | // width to be computed correctly (each byte should render width 1) | ||
|  | 
 | ||
|  | var trie = newWidthTrie(0) | ||
|  | 
 | ||
|  | // Lookup reports the Properties of the first rune in b and the number of bytes | ||
|  | // of its UTF-8 encoding. | ||
|  | func Lookup(b []byte) (p Properties, size int) { | ||
|  | 	v, sz := trie.lookup(b) | ||
|  | 	return Properties{elem(v), b[sz-1]}, sz | ||
|  | } | ||
|  | 
 | ||
|  | // LookupString reports the Properties of the first rune in s and the number of | ||
|  | // bytes of its UTF-8 encoding. | ||
|  | func LookupString(s string) (p Properties, size int) { | ||
|  | 	v, sz := trie.lookupString(s) | ||
|  | 	return Properties{elem(v), s[sz-1]}, sz | ||
|  | } | ||
|  | 
 | ||
|  | // LookupRune reports the Properties of rune r. | ||
|  | func LookupRune(r rune) Properties { | ||
|  | 	var buf [4]byte | ||
|  | 	n := utf8.EncodeRune(buf[:], r) | ||
|  | 	v, _ := trie.lookup(buf[:n]) | ||
|  | 	last := byte(r) | ||
|  | 	if r >= utf8.RuneSelf { | ||
|  | 		last = 0x80 + byte(r&0x3f) | ||
|  | 	} | ||
|  | 	return Properties{elem(v), last} | ||
|  | } | ||
|  | 
 | ||
|  | // Properties provides access to width properties of a rune. | ||
|  | type Properties struct { | ||
|  | 	elem elem | ||
|  | 	last byte | ||
|  | } | ||
|  | 
 | ||
|  | func (e elem) kind() Kind { | ||
|  | 	return Kind(e >> typeShift) | ||
|  | } | ||
|  | 
 | ||
|  | // Kind returns the Kind of a rune as defined in Unicode TR #11. | ||
|  | // See https://unicode.org/reports/tr11/ for more details. | ||
|  | func (p Properties) Kind() Kind { | ||
|  | 	return p.elem.kind() | ||
|  | } | ||
|  | 
 | ||
|  | // Folded returns the folded variant of a rune or 0 if the rune is canonical. | ||
|  | func (p Properties) Folded() rune { | ||
|  | 	if p.elem&tagNeedsFold != 0 { | ||
|  | 		buf := inverseData[byte(p.elem)] | ||
|  | 		buf[buf[0]] ^= p.last | ||
|  | 		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) | ||
|  | 		return r | ||
|  | 	} | ||
|  | 	return 0 | ||
|  | } | ||
|  | 
 | ||
|  | // Narrow returns the narrow variant of a rune or 0 if the rune is already | ||
|  | // narrow or doesn't have a narrow variant. | ||
|  | func (p Properties) Narrow() rune { | ||
|  | 	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) { | ||
|  | 		buf := inverseData[byte(p.elem)] | ||
|  | 		buf[buf[0]] ^= p.last | ||
|  | 		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) | ||
|  | 		return r | ||
|  | 	} | ||
|  | 	return 0 | ||
|  | } | ||
|  | 
 | ||
|  | // Wide returns the wide variant of a rune or 0 if the rune is already | ||
|  | // wide or doesn't have a wide variant. | ||
|  | func (p Properties) Wide() rune { | ||
|  | 	if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) { | ||
|  | 		buf := inverseData[byte(p.elem)] | ||
|  | 		buf[buf[0]] ^= p.last | ||
|  | 		r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) | ||
|  | 		return r | ||
|  | 	} | ||
|  | 	return 0 | ||
|  | } | ||
|  | 
 | ||
|  | // TODO for Properties: | ||
|  | // - Add Fullwidth/Halfwidth or Inverted methods for computing variants | ||
|  | // mapping. | ||
|  | // - Add width information (including information on non-spacing runes). | ||
|  | 
 | ||
|  | // Transformer implements the transform.Transformer interface. | ||
|  | type Transformer struct { | ||
|  | 	t transform.SpanningTransformer | ||
|  | } | ||
|  | 
 | ||
|  | // Reset implements the transform.Transformer interface. | ||
|  | func (t Transformer) Reset() { t.t.Reset() } | ||
|  | 
 | ||
|  | // Transform implements the transform.Transformer interface. | ||
|  | func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||
|  | 	return t.t.Transform(dst, src, atEOF) | ||
|  | } | ||
|  | 
 | ||
|  | // Span implements the transform.SpanningTransformer interface. | ||
|  | func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) { | ||
|  | 	return t.t.Span(src, atEOF) | ||
|  | } | ||
|  | 
 | ||
|  | // Bytes returns a new byte slice with the result of applying t to b. | ||
|  | func (t Transformer) Bytes(b []byte) []byte { | ||
|  | 	b, _, _ = transform.Bytes(t, b) | ||
|  | 	return b | ||
|  | } | ||
|  | 
 | ||
|  | // String returns a string with the result of applying t to s. | ||
|  | func (t Transformer) String(s string) string { | ||
|  | 	s, _, _ = transform.String(t, s) | ||
|  | 	return s | ||
|  | } | ||
|  | 
 | ||
|  | var ( | ||
|  | 	// Fold is a transform that maps all runes to their canonical width. | ||
|  | 	// | ||
|  | 	// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm | ||
|  | 	// provide a more generic folding mechanism. | ||
|  | 	Fold Transformer = Transformer{foldTransform{}} | ||
|  | 
 | ||
|  | 	// Widen is a transform that maps runes to their wide variant, if | ||
|  | 	// available. | ||
|  | 	Widen Transformer = Transformer{wideTransform{}} | ||
|  | 
 | ||
|  | 	// Narrow is a transform that maps runes to their narrow variant, if | ||
|  | 	// available. | ||
|  | 	Narrow Transformer = Transformer{narrowTransform{}} | ||
|  | ) | ||
|  | 
 | ||
|  | // TODO: Consider the following options: | ||
|  | // - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some | ||
|  | //   generalized variant of this. | ||
|  | // - Consider a wide Won character to be the default width (or some generalized | ||
|  | //   variant of this). | ||
|  | // - Filter the set of characters that gets converted (the preferred approach is | ||
|  | //   to allow applying filters to transforms). |