mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-04 05:02:25 -06:00 
			
		
		
		
	
		
			
	
	
		
			207 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			207 lines
		
	
	
	
		
			6.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| 
								 | 
							
								// Copyright 2015 The Go Authors. All rights reserved.
							 | 
						||
| 
								 | 
							
								// Use of this source code is governed by a BSD-style
							 | 
						||
| 
								 | 
							
								// license that can be found in the LICENSE file.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								//go:generate stringer -type=Kind
							 | 
						||
| 
								 | 
							
								//go:generate go run gen.go gen_common.go gen_trieval.go
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Package width provides functionality for handling different widths in text.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// Wide characters behave like ideographs; they tend to allow line breaks after
							 | 
						||
| 
								 | 
							
								// each character and remain upright in vertical text layout. Narrow characters
							 | 
						||
| 
								 | 
							
								// are kept together in words or runs that are rotated sideways in vertical text
							 | 
						||
| 
								 | 
							
								// layout.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// For more information, see https://unicode.org/reports/tr11/.
							 | 
						||
| 
								 | 
							
								package width // import "golang.org/x/text/width"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import (
							 | 
						||
| 
								 | 
							
									"unicode/utf8"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									"golang.org/x/text/transform"
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO
							 | 
						||
| 
								 | 
							
								// 1) Reduce table size by compressing blocks.
							 | 
						||
| 
								 | 
							
								// 2) API proposition for computing display length
							 | 
						||
| 
								 | 
							
								//    (approximation, fixed pitch only).
							 | 
						||
| 
								 | 
							
								// 3) Implement display length.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
							 | 
						||
| 
								 | 
							
								type Kind int
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								const (
							 | 
						||
| 
								 | 
							
									// Neutral characters do not occur in legacy East Asian character sets.
							 | 
						||
| 
								 | 
							
									Neutral Kind = iota
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// EastAsianAmbiguous characters that can be sometimes wide and sometimes
							 | 
						||
| 
								 | 
							
									// narrow and require additional information not contained in the character
							 | 
						||
| 
								 | 
							
									// code to further resolve their width.
							 | 
						||
| 
								 | 
							
									EastAsianAmbiguous
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// EastAsianWide characters are wide in its usual form. They occur only in
							 | 
						||
| 
								 | 
							
									// the context of East Asian typography. These runes may have explicit
							 | 
						||
| 
								 | 
							
									// halfwidth counterparts.
							 | 
						||
| 
								 | 
							
									EastAsianWide
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// EastAsianNarrow characters are narrow in its usual form. They often have
							 | 
						||
| 
								 | 
							
									// fullwidth counterparts.
							 | 
						||
| 
								 | 
							
									EastAsianNarrow
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Note: there exist Narrow runes that do not have fullwidth or wide
							 | 
						||
| 
								 | 
							
									// counterparts, despite what the definition says (e.g. U+27E6).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// EastAsianFullwidth characters have a compatibility decompositions of type
							 | 
						||
| 
								 | 
							
									// wide that map to a narrow counterpart.
							 | 
						||
| 
								 | 
							
									EastAsianFullwidth
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// EastAsianHalfwidth characters have a compatibility decomposition of type
							 | 
						||
| 
								 | 
							
									// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
							 | 
						||
| 
								 | 
							
									// SIGN.
							 | 
						||
| 
								 | 
							
									EastAsianHalfwidth
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Note: there exist runes that have a halfwidth counterparts but that are
							 | 
						||
| 
								 | 
							
									// classified as Ambiguous, rather than wide (e.g. U+2190).
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO: the generated tries need to return size 1 for invalid runes for the
							 | 
						||
| 
								 | 
							
								// width to be computed correctly (each byte should render width 1)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var trie = newWidthTrie(0)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Lookup reports the Properties of the first rune in b and the number of bytes
							 | 
						||
| 
								 | 
							
								// of its UTF-8 encoding.
							 | 
						||
| 
								 | 
							
								func Lookup(b []byte) (p Properties, size int) {
							 | 
						||
| 
								 | 
							
									v, sz := trie.lookup(b)
							 | 
						||
| 
								 | 
							
									return Properties{elem(v), b[sz-1]}, sz
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// LookupString reports the Properties of the first rune in s and the number of
							 | 
						||
| 
								 | 
							
								// bytes of its UTF-8 encoding.
							 | 
						||
| 
								 | 
							
								func LookupString(s string) (p Properties, size int) {
							 | 
						||
| 
								 | 
							
									v, sz := trie.lookupString(s)
							 | 
						||
| 
								 | 
							
									return Properties{elem(v), s[sz-1]}, sz
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// LookupRune reports the Properties of rune r.
							 | 
						||
| 
								 | 
							
								func LookupRune(r rune) Properties {
							 | 
						||
| 
								 | 
							
									var buf [4]byte
							 | 
						||
| 
								 | 
							
									n := utf8.EncodeRune(buf[:], r)
							 | 
						||
| 
								 | 
							
									v, _ := trie.lookup(buf[:n])
							 | 
						||
| 
								 | 
							
									last := byte(r)
							 | 
						||
| 
								 | 
							
									if r >= utf8.RuneSelf {
							 | 
						||
| 
								 | 
							
										last = 0x80 + byte(r&0x3f)
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return Properties{elem(v), last}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Properties provides access to width properties of a rune.
							 | 
						||
| 
								 | 
							
								type Properties struct {
							 | 
						||
| 
								 | 
							
									elem elem
							 | 
						||
| 
								 | 
							
									last byte
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func (e elem) kind() Kind {
							 | 
						||
| 
								 | 
							
									return Kind(e >> typeShift)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Kind returns the Kind of a rune as defined in Unicode TR #11.
							 | 
						||
| 
								 | 
							
								// See https://unicode.org/reports/tr11/ for more details.
							 | 
						||
| 
								 | 
							
								func (p Properties) Kind() Kind {
							 | 
						||
| 
								 | 
							
									return p.elem.kind()
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Folded returns the folded variant of a rune or 0 if the rune is canonical.
							 | 
						||
| 
								 | 
							
								func (p Properties) Folded() rune {
							 | 
						||
| 
								 | 
							
									if p.elem&tagNeedsFold != 0 {
							 | 
						||
| 
								 | 
							
										buf := inverseData[byte(p.elem)]
							 | 
						||
| 
								 | 
							
										buf[buf[0]] ^= p.last
							 | 
						||
| 
								 | 
							
										r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
							 | 
						||
| 
								 | 
							
										return r
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return 0
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Narrow returns the narrow variant of a rune or 0 if the rune is already
							 | 
						||
| 
								 | 
							
								// narrow or doesn't have a narrow variant.
							 | 
						||
| 
								 | 
							
								func (p Properties) Narrow() rune {
							 | 
						||
| 
								 | 
							
									if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
							 | 
						||
| 
								 | 
							
										buf := inverseData[byte(p.elem)]
							 | 
						||
| 
								 | 
							
										buf[buf[0]] ^= p.last
							 | 
						||
| 
								 | 
							
										r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
							 | 
						||
| 
								 | 
							
										return r
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return 0
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Wide returns the wide variant of a rune or 0 if the rune is already
							 | 
						||
| 
								 | 
							
								// wide or doesn't have a wide variant.
							 | 
						||
| 
								 | 
							
								func (p Properties) Wide() rune {
							 | 
						||
| 
								 | 
							
									if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
							 | 
						||
| 
								 | 
							
										buf := inverseData[byte(p.elem)]
							 | 
						||
| 
								 | 
							
										buf[buf[0]] ^= p.last
							 | 
						||
| 
								 | 
							
										r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
							 | 
						||
| 
								 | 
							
										return r
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return 0
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO for Properties:
							 | 
						||
| 
								 | 
							
								// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
							 | 
						||
| 
								 | 
							
								// mapping.
							 | 
						||
| 
								 | 
							
								// - Add width information (including information on non-spacing runes).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Transformer implements the transform.Transformer interface.
							 | 
						||
| 
								 | 
							
								type Transformer struct {
							 | 
						||
| 
								 | 
							
									t transform.SpanningTransformer
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Reset implements the transform.Transformer interface.
							 | 
						||
| 
								 | 
							
								func (t Transformer) Reset() { t.t.Reset() }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Transform implements the transform.Transformer interface.
							 | 
						||
| 
								 | 
							
								func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						||
| 
								 | 
							
									return t.t.Transform(dst, src, atEOF)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Span implements the transform.SpanningTransformer interface.
							 | 
						||
| 
								 | 
							
								func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						||
| 
								 | 
							
									return t.t.Span(src, atEOF)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Bytes returns a new byte slice with the result of applying t to b.
							 | 
						||
| 
								 | 
							
								func (t Transformer) Bytes(b []byte) []byte {
							 | 
						||
| 
								 | 
							
									b, _, _ = transform.Bytes(t, b)
							 | 
						||
| 
								 | 
							
									return b
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// String returns a string with the result of applying t to s.
							 | 
						||
| 
								 | 
							
								func (t Transformer) String(s string) string {
							 | 
						||
| 
								 | 
							
									s, _, _ = transform.String(t, s)
							 | 
						||
| 
								 | 
							
									return s
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var (
							 | 
						||
| 
								 | 
							
									// Fold is a transform that maps all runes to their canonical width.
							 | 
						||
| 
								 | 
							
									//
							 | 
						||
| 
								 | 
							
									// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
							 | 
						||
| 
								 | 
							
									// provide a more generic folding mechanism.
							 | 
						||
| 
								 | 
							
									Fold Transformer = Transformer{foldTransform{}}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Widen is a transform that maps runes to their wide variant, if
							 | 
						||
| 
								 | 
							
									// available.
							 | 
						||
| 
								 | 
							
									Widen Transformer = Transformer{wideTransform{}}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Narrow is a transform that maps runes to their narrow variant, if
							 | 
						||
| 
								 | 
							
									// available.
							 | 
						||
| 
								 | 
							
									Narrow Transformer = Transformer{narrowTransform{}}
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO: Consider the following options:
							 | 
						||
| 
								 | 
							
								// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
							 | 
						||
| 
								 | 
							
								//   generalized variant of this.
							 | 
						||
| 
								 | 
							
								// - Consider a wide Won character to be the default width (or some generalized
							 | 
						||
| 
								 | 
							
								//   variant of this).
							 | 
						||
| 
								 | 
							
								// - Filter the set of characters that gets converted (the preferred approach is
							 | 
						||
| 
								 | 
							
								//   to allow applying filters to transforms).
							 |