mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-04 09:12:24 -06:00 
			
		
		
		
	
		
			
	
	
		
			817 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			817 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| 
								 | 
							
								// Copyright 2014 The Go Authors. All rights reserved.
							 | 
						|||
| 
								 | 
							
								// Use of this source code is governed by a BSD-style
							 | 
						|||
| 
								 | 
							
								// license that can be found in the LICENSE file.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								package cases
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// This file contains the definitions of case mappings for all supported
							 | 
						|||
| 
								 | 
							
								// languages. The rules for the language-specific tailorings were taken and
							 | 
						|||
| 
								 | 
							
								// modified from the CLDR transform definitions in common/transforms.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								import (
							 | 
						|||
| 
								 | 
							
									"strings"
							 | 
						|||
| 
								 | 
							
									"unicode"
							 | 
						|||
| 
								 | 
							
									"unicode/utf8"
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									"golang.org/x/text/internal"
							 | 
						|||
| 
								 | 
							
									"golang.org/x/text/language"
							 | 
						|||
| 
								 | 
							
									"golang.org/x/text/transform"
							 | 
						|||
| 
								 | 
							
									"golang.org/x/text/unicode/norm"
							 | 
						|||
| 
								 | 
							
								)
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// A mapFunc takes a context set to the current rune and writes the mapped
							 | 
						|||
| 
								 | 
							
								// version to the same context. It may advance the context to the next rune. It
							 | 
						|||
| 
								 | 
							
								// returns whether a checkpoint is possible: whether the pDst bytes written to
							 | 
						|||
| 
								 | 
							
								// dst so far won't need changing as we see more source bytes.
							 | 
						|||
| 
								 | 
							
								type mapFunc func(*context) bool
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// A spanFunc takes a context set to the current rune and returns whether this
							 | 
						|||
| 
								 | 
							
								// rune would be altered when written to the output. It may advance the context
							 | 
						|||
| 
								 | 
							
								// to the next rune. It returns whether a checkpoint is possible.
							 | 
						|||
| 
								 | 
							
								type spanFunc func(*context) bool
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// maxIgnorable defines the maximum number of ignorables to consider for
							 | 
						|||
| 
								 | 
							
								// lookahead operations.
							 | 
						|||
| 
								 | 
							
								const maxIgnorable = 30
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// supported lists the language tags for which we have tailorings.
							 | 
						|||
| 
								 | 
							
								const supported = "und af az el lt nl tr"
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func init() {
							 | 
						|||
| 
								 | 
							
									tags := []language.Tag{}
							 | 
						|||
| 
								 | 
							
									for _, s := range strings.Split(supported, " ") {
							 | 
						|||
| 
								 | 
							
										tags = append(tags, language.MustParse(s))
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									matcher = internal.NewInheritanceMatcher(tags)
							 | 
						|||
| 
								 | 
							
									Supported = language.NewCoverage(tags)
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								var (
							 | 
						|||
| 
								 | 
							
									matcher *internal.InheritanceMatcher
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									Supported language.Coverage
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// We keep the following lists separate, instead of having a single per-
							 | 
						|||
| 
								 | 
							
									// language struct, to give the compiler a chance to remove unused code.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// Some uppercase mappers are stateless, so we can precompute the
							 | 
						|||
| 
								 | 
							
									// Transformers and save a bit on runtime allocations.
							 | 
						|||
| 
								 | 
							
									upperFunc = []struct {
							 | 
						|||
| 
								 | 
							
										upper mapFunc
							 | 
						|||
| 
								 | 
							
										span  spanFunc
							 | 
						|||
| 
								 | 
							
									}{
							 | 
						|||
| 
								 | 
							
										{nil, nil},                  // und
							 | 
						|||
| 
								 | 
							
										{nil, nil},                  // af
							 | 
						|||
| 
								 | 
							
										{aztrUpper(upper), isUpper}, // az
							 | 
						|||
| 
								 | 
							
										{elUpper, noSpan},           // el
							 | 
						|||
| 
								 | 
							
										{ltUpper(upper), noSpan},    // lt
							 | 
						|||
| 
								 | 
							
										{nil, nil},                  // nl
							 | 
						|||
| 
								 | 
							
										{aztrUpper(upper), isUpper}, // tr
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									undUpper            transform.SpanningTransformer = &undUpperCaser{}
							 | 
						|||
| 
								 | 
							
									undLower            transform.SpanningTransformer = &undLowerCaser{}
							 | 
						|||
| 
								 | 
							
									undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									lowerFunc = []mapFunc{
							 | 
						|||
| 
								 | 
							
										nil,       // und
							 | 
						|||
| 
								 | 
							
										nil,       // af
							 | 
						|||
| 
								 | 
							
										aztrLower, // az
							 | 
						|||
| 
								 | 
							
										nil,       // el
							 | 
						|||
| 
								 | 
							
										ltLower,   // lt
							 | 
						|||
| 
								 | 
							
										nil,       // nl
							 | 
						|||
| 
								 | 
							
										aztrLower, // tr
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									titleInfos = []struct {
							 | 
						|||
| 
								 | 
							
										title     mapFunc
							 | 
						|||
| 
								 | 
							
										lower     mapFunc
							 | 
						|||
| 
								 | 
							
										titleSpan spanFunc
							 | 
						|||
| 
								 | 
							
										rewrite   func(*context)
							 | 
						|||
| 
								 | 
							
									}{
							 | 
						|||
| 
								 | 
							
										{title, lower, isTitle, nil},                // und
							 | 
						|||
| 
								 | 
							
										{title, lower, isTitle, afnlRewrite},        // af
							 | 
						|||
| 
								 | 
							
										{aztrUpper(title), aztrLower, isTitle, nil}, // az
							 | 
						|||
| 
								 | 
							
										{title, lower, isTitle, nil},                // el
							 | 
						|||
| 
								 | 
							
										{ltUpper(title), ltLower, noSpan, nil},      // lt
							 | 
						|||
| 
								 | 
							
										{nlTitle, lower, nlTitleSpan, afnlRewrite},  // nl
							 | 
						|||
| 
								 | 
							
										{aztrUpper(title), aztrLower, isTitle, nil}, // tr
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								)
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
							 | 
						|||
| 
								 | 
							
									_, i, _ := matcher.Match(t)
							 | 
						|||
| 
								 | 
							
									f := upperFunc[i].upper
							 | 
						|||
| 
								 | 
							
									if f == nil {
							 | 
						|||
| 
								 | 
							
										return undUpper
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return &simpleCaser{f: f, span: upperFunc[i].span}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func makeLower(t language.Tag, o options) transform.SpanningTransformer {
							 | 
						|||
| 
								 | 
							
									_, i, _ := matcher.Match(t)
							 | 
						|||
| 
								 | 
							
									f := lowerFunc[i]
							 | 
						|||
| 
								 | 
							
									if f == nil {
							 | 
						|||
| 
								 | 
							
										if o.ignoreFinalSigma {
							 | 
						|||
| 
								 | 
							
											return undLowerIgnoreSigma
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										return undLower
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if o.ignoreFinalSigma {
							 | 
						|||
| 
								 | 
							
										return &simpleCaser{f: f, span: isLower}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return &lowerCaser{
							 | 
						|||
| 
								 | 
							
										first:   f,
							 | 
						|||
| 
								 | 
							
										midWord: finalSigma(f),
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
							 | 
						|||
| 
								 | 
							
									_, i, _ := matcher.Match(t)
							 | 
						|||
| 
								 | 
							
									x := &titleInfos[i]
							 | 
						|||
| 
								 | 
							
									lower := x.lower
							 | 
						|||
| 
								 | 
							
									if o.noLower {
							 | 
						|||
| 
								 | 
							
										lower = (*context).copy
							 | 
						|||
| 
								 | 
							
									} else if !o.ignoreFinalSigma {
							 | 
						|||
| 
								 | 
							
										lower = finalSigma(lower)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return &titleCaser{
							 | 
						|||
| 
								 | 
							
										title:     x.title,
							 | 
						|||
| 
								 | 
							
										lower:     lower,
							 | 
						|||
| 
								 | 
							
										titleSpan: x.titleSpan,
							 | 
						|||
| 
								 | 
							
										rewrite:   x.rewrite,
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func noSpan(c *context) bool {
							 | 
						|||
| 
								 | 
							
									c.err = transform.ErrEndOfSpan
							 | 
						|||
| 
								 | 
							
									return false
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// TODO: consider a similar special case for the fast majority lower case. This
							 | 
						|||
| 
								 | 
							
								// is a bit more involved so will require some more precise benchmarking to
							 | 
						|||
| 
								 | 
							
								// justify it.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								type undUpperCaser struct{ transform.NopResetter }
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// undUpperCaser implements the Transformer interface for doing an upper case
							 | 
						|||
| 
								 | 
							
								// mapping for the root locale (und). It eliminates the need for an allocation
							 | 
						|||
| 
								 | 
							
								// as it prevents escaping by not using function pointers.
							 | 
						|||
| 
								 | 
							
								func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{dst: dst, src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() {
							 | 
						|||
| 
								 | 
							
										upper(&c)
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && isUpper(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.retSpan()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
							 | 
						|||
| 
								 | 
							
								// a lower case mapping for the root locale (und) ignoring final sigma
							 | 
						|||
| 
								 | 
							
								// handling. This casing algorithm is used in some performance-critical packages
							 | 
						|||
| 
								 | 
							
								// like secure/precis and x/net/http/idna, which warrants its special-casing.
							 | 
						|||
| 
								 | 
							
								type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{dst: dst, src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && lower(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// Span implements a generic lower-casing. This is possible as isLower works
							 | 
						|||
| 
								 | 
							
								// for all lowercasing variants. All lowercase variants only vary in how they
							 | 
						|||
| 
								 | 
							
								// transform a non-lowercase letter. They will never change an already lowercase
							 | 
						|||
| 
								 | 
							
								// letter. In addition, there is no state.
							 | 
						|||
| 
								 | 
							
								func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && isLower(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.retSpan()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								type simpleCaser struct {
							 | 
						|||
| 
								 | 
							
									context
							 | 
						|||
| 
								 | 
							
									f    mapFunc
							 | 
						|||
| 
								 | 
							
									span spanFunc
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// simpleCaser implements the Transformer interface for doing a case operation
							 | 
						|||
| 
								 | 
							
								// on a rune-by-rune basis.
							 | 
						|||
| 
								 | 
							
								func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{dst: dst, src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && t.f(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && t.span(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.retSpan()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// undLowerCaser implements the Transformer interface for doing a lower case
							 | 
						|||
| 
								 | 
							
								// mapping for the root locale (und) ignoring final sigma handling. This casing
							 | 
						|||
| 
								 | 
							
								// algorithm is used in some performance-critical packages like secure/precis
							 | 
						|||
| 
								 | 
							
								// and x/net/http/idna, which warrants its special-casing.
							 | 
						|||
| 
								 | 
							
								type undLowerCaser struct{ transform.NopResetter }
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{dst: dst, src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for isInterWord := true; c.next(); {
							 | 
						|||
| 
								 | 
							
										if isInterWord {
							 | 
						|||
| 
								 | 
							
											if c.info.isCased() {
							 | 
						|||
| 
								 | 
							
												if !lower(&c) {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												isInterWord = false
							 | 
						|||
| 
								 | 
							
											} else if !c.copy() {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										} else {
							 | 
						|||
| 
								 | 
							
											if c.info.isNotCasedAndNotCaseIgnorable() {
							 | 
						|||
| 
								 | 
							
												if !c.copy() {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												isInterWord = true
							 | 
						|||
| 
								 | 
							
											} else if !c.hasPrefix("Σ") {
							 | 
						|||
| 
								 | 
							
												if !lower(&c) {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
											} else if !finalSigmaBody(&c) {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						|||
| 
								 | 
							
									c := context{src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									for c.next() && isLower(&c) {
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.retSpan()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// lowerCaser implements the Transformer interface. The default Unicode lower
							 | 
						|||
| 
								 | 
							
								// casing requires different treatment for the first and subsequent characters
							 | 
						|||
| 
								 | 
							
								// of a word, most notably to handle the Greek final Sigma.
							 | 
						|||
| 
								 | 
							
								type lowerCaser struct {
							 | 
						|||
| 
								 | 
							
									undLowerIgnoreSigmaCaser
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									context
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									first, midWord mapFunc
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									t.context = context{dst: dst, src: src, atEOF: atEOF}
							 | 
						|||
| 
								 | 
							
									c := &t.context
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for isInterWord := true; c.next(); {
							 | 
						|||
| 
								 | 
							
										if isInterWord {
							 | 
						|||
| 
								 | 
							
											if c.info.isCased() {
							 | 
						|||
| 
								 | 
							
												if !t.first(c) {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												isInterWord = false
							 | 
						|||
| 
								 | 
							
											} else if !c.copy() {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										} else {
							 | 
						|||
| 
								 | 
							
											if c.info.isNotCasedAndNotCaseIgnorable() {
							 | 
						|||
| 
								 | 
							
												if !c.copy() {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												isInterWord = true
							 | 
						|||
| 
								 | 
							
											} else if !t.midWord(c) {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										c.checkpoint()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// titleCaser implements the Transformer interface. Title casing algorithms
							 | 
						|||
| 
								 | 
							
								// distinguish between the first letter of a word and subsequent letters of the
							 | 
						|||
| 
								 | 
							
								// same word. It uses state to avoid requiring a potentially infinite lookahead.
							 | 
						|||
| 
								 | 
							
								type titleCaser struct {
							 | 
						|||
| 
								 | 
							
									context
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// rune mappings used by the actual casing algorithms.
							 | 
						|||
| 
								 | 
							
									title     mapFunc
							 | 
						|||
| 
								 | 
							
									lower     mapFunc
							 | 
						|||
| 
								 | 
							
									titleSpan spanFunc
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									rewrite func(*context)
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// Transform implements the standard Unicode title case algorithm as defined in
							 | 
						|||
| 
								 | 
							
								// Chapter 3 of The Unicode Standard:
							 | 
						|||
| 
								 | 
							
								// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
							 | 
						|||
| 
								 | 
							
								// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
							 | 
						|||
| 
								 | 
							
								// first cased character F following the word boundary. If F exists, map F to
							 | 
						|||
| 
								 | 
							
								// Titlecase_Mapping(F); then map all characters C between F and the following
							 | 
						|||
| 
								 | 
							
								// word boundary to Lowercase_Mapping(C).
							 | 
						|||
| 
								 | 
							
								func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						|||
| 
								 | 
							
									t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
							 | 
						|||
| 
								 | 
							
									c := &t.context
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									if !c.next() {
							 | 
						|||
| 
								 | 
							
										return c.ret()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for {
							 | 
						|||
| 
								 | 
							
										p := c.info
							 | 
						|||
| 
								 | 
							
										if t.rewrite != nil {
							 | 
						|||
| 
								 | 
							
											t.rewrite(c)
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										wasMid := p.isMid()
							 | 
						|||
| 
								 | 
							
										// Break out of this loop on failure to ensure we do not modify the
							 | 
						|||
| 
								 | 
							
										// state incorrectly.
							 | 
						|||
| 
								 | 
							
										if p.isCased() {
							 | 
						|||
| 
								 | 
							
											if !c.isMidWord {
							 | 
						|||
| 
								 | 
							
												if !t.title(c) {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												c.isMidWord = true
							 | 
						|||
| 
								 | 
							
											} else if !t.lower(c) {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										} else if !c.copy() {
							 | 
						|||
| 
								 | 
							
											break
							 | 
						|||
| 
								 | 
							
										} else if p.isBreak() {
							 | 
						|||
| 
								 | 
							
											c.isMidWord = false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										// As we save the state of the transformer, it is safe to call
							 | 
						|||
| 
								 | 
							
										// checkpoint after any successful write.
							 | 
						|||
| 
								 | 
							
										if !(c.isMidWord && wasMid) {
							 | 
						|||
| 
								 | 
							
											c.checkpoint()
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										if !c.next() {
							 | 
						|||
| 
								 | 
							
											break
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										if wasMid && c.info.isMid() {
							 | 
						|||
| 
								 | 
							
											c.isMidWord = false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.ret()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						|||
| 
								 | 
							
									t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
							 | 
						|||
| 
								 | 
							
									c := &t.context
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									if !c.next() {
							 | 
						|||
| 
								 | 
							
										return c.retSpan()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for {
							 | 
						|||
| 
								 | 
							
										p := c.info
							 | 
						|||
| 
								 | 
							
										if t.rewrite != nil {
							 | 
						|||
| 
								 | 
							
											t.rewrite(c)
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										wasMid := p.isMid()
							 | 
						|||
| 
								 | 
							
										// Break out of this loop on failure to ensure we do not modify the
							 | 
						|||
| 
								 | 
							
										// state incorrectly.
							 | 
						|||
| 
								 | 
							
										if p.isCased() {
							 | 
						|||
| 
								 | 
							
											if !c.isMidWord {
							 | 
						|||
| 
								 | 
							
												if !t.titleSpan(c) {
							 | 
						|||
| 
								 | 
							
													break
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												c.isMidWord = true
							 | 
						|||
| 
								 | 
							
											} else if !isLower(c) {
							 | 
						|||
| 
								 | 
							
												break
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										} else if p.isBreak() {
							 | 
						|||
| 
								 | 
							
											c.isMidWord = false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										// As we save the state of the transformer, it is safe to call
							 | 
						|||
| 
								 | 
							
										// checkpoint after any successful write.
							 | 
						|||
| 
								 | 
							
										if !(c.isMidWord && wasMid) {
							 | 
						|||
| 
								 | 
							
											c.checkpoint()
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										if !c.next() {
							 | 
						|||
| 
								 | 
							
											break
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										if wasMid && c.info.isMid() {
							 | 
						|||
| 
								 | 
							
											c.isMidWord = false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.retSpan()
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// finalSigma adds Greek final Sigma handing to another casing function. It
							 | 
						|||
| 
								 | 
							
								// determines whether a lowercased sigma should be σ or ς, by looking ahead for
							 | 
						|||
| 
								 | 
							
								// case-ignorables and a cased letters.
							 | 
						|||
| 
								 | 
							
								func finalSigma(f mapFunc) mapFunc {
							 | 
						|||
| 
								 | 
							
									return func(c *context) bool {
							 | 
						|||
| 
								 | 
							
										if !c.hasPrefix("Σ") {
							 | 
						|||
| 
								 | 
							
											return f(c)
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										return finalSigmaBody(c)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func finalSigmaBody(c *context) bool {
							 | 
						|||
| 
								 | 
							
									// Current rune must be ∑.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// ::NFD();
							 | 
						|||
| 
								 | 
							
									// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
							 | 
						|||
| 
								 | 
							
									// Σ } [:case-ignorable:]* [:cased:] → σ;
							 | 
						|||
| 
								 | 
							
									// [:cased:] [:case-ignorable:]* { Σ → ς;
							 | 
						|||
| 
								 | 
							
									// ::Any-Lower;
							 | 
						|||
| 
								 | 
							
									// ::NFC();
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									p := c.pDst
							 | 
						|||
| 
								 | 
							
									c.writeString("ς")
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// TODO: we should do this here, but right now this will never have an
							 | 
						|||
| 
								 | 
							
									// effect as this is called when the prefix is Sigma, whereas Dutch and
							 | 
						|||
| 
								 | 
							
									// Afrikaans only test for an apostrophe.
							 | 
						|||
| 
								 | 
							
									//
							 | 
						|||
| 
								 | 
							
									// if t.rewrite != nil {
							 | 
						|||
| 
								 | 
							
									// 	t.rewrite(c)
							 | 
						|||
| 
								 | 
							
									// }
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// We need to do one more iteration after maxIgnorable, as a cased
							 | 
						|||
| 
								 | 
							
									// letter is not an ignorable and may modify the result.
							 | 
						|||
| 
								 | 
							
									wasMid := false
							 | 
						|||
| 
								 | 
							
									for i := 0; i < maxIgnorable+1; i++ {
							 | 
						|||
| 
								 | 
							
										if !c.next() {
							 | 
						|||
| 
								 | 
							
											return false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										if !c.info.isCaseIgnorable() {
							 | 
						|||
| 
								 | 
							
											// All Midword runes are also case ignorable, so we are
							 | 
						|||
| 
								 | 
							
											// guaranteed to have a letter or word break here. As we are
							 | 
						|||
| 
								 | 
							
											// unreading the run, there is no need to unset c.isMidWord;
							 | 
						|||
| 
								 | 
							
											// the title caser will handle this.
							 | 
						|||
| 
								 | 
							
											if c.info.isCased() {
							 | 
						|||
| 
								 | 
							
												// p+1 is guaranteed to be in bounds: if writing ς was
							 | 
						|||
| 
								 | 
							
												// successful, p+1 will contain the second byte of ς. If not,
							 | 
						|||
| 
								 | 
							
												// this function will have returned after c.next returned false.
							 | 
						|||
| 
								 | 
							
												c.dst[p+1]++ // ς → σ
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
											c.unreadRune()
							 | 
						|||
| 
								 | 
							
											return true
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										// A case ignorable may also introduce a word break, so we may need
							 | 
						|||
| 
								 | 
							
										// to continue searching even after detecting a break.
							 | 
						|||
| 
								 | 
							
										isMid := c.info.isMid()
							 | 
						|||
| 
								 | 
							
										if (wasMid && isMid) || c.info.isBreak() {
							 | 
						|||
| 
								 | 
							
											c.isMidWord = false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										wasMid = isMid
							 | 
						|||
| 
								 | 
							
										c.copy()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return true
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// finalSigmaSpan would be the same as isLower.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// elUpper implements Greek upper casing, which entails removing a predefined
							 | 
						|||
| 
								 | 
							
								// set of non-blocked modifiers. Note that these accents should not be removed
							 | 
						|||
| 
								 | 
							
								// for title casing!
							 | 
						|||
| 
								 | 
							
								// Example: "Οδός" -> "ΟΔΟΣ".
							 | 
						|||
| 
								 | 
							
								func elUpper(c *context) bool {
							 | 
						|||
| 
								 | 
							
									// From CLDR:
							 | 
						|||
| 
								 | 
							
									// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
							 | 
						|||
| 
								 | 
							
									// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									r, _ := utf8.DecodeRune(c.src[c.pSrc:])
							 | 
						|||
| 
								 | 
							
									oldPDst := c.pDst
							 | 
						|||
| 
								 | 
							
									if !upper(c) {
							 | 
						|||
| 
								 | 
							
										return false
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if !unicode.Is(unicode.Greek, r) {
							 | 
						|||
| 
								 | 
							
										return true
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									i := 0
							 | 
						|||
| 
								 | 
							
									// Take the properties of the uppercased rune that is already written to the
							 | 
						|||
| 
								 | 
							
									// destination. This saves us the trouble of having to uppercase the
							 | 
						|||
| 
								 | 
							
									// decomposed rune again.
							 | 
						|||
| 
								 | 
							
									if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
							 | 
						|||
| 
								 | 
							
										// Restore the destination position and process the decomposed rune.
							 | 
						|||
| 
								 | 
							
										r, sz := utf8.DecodeRune(b)
							 | 
						|||
| 
								 | 
							
										if r <= 0xFF { // See A.6.1
							 | 
						|||
| 
								 | 
							
											return true
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										c.pDst = oldPDst
							 | 
						|||
| 
								 | 
							
										// Insert the first rune and ignore the modifiers. See A.6.2.
							 | 
						|||
| 
								 | 
							
										c.writeBytes(b[:sz])
							 | 
						|||
| 
								 | 
							
										i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for ; i < maxIgnorable && c.next(); i++ {
							 | 
						|||
| 
								 | 
							
										switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
							 | 
						|||
| 
								 | 
							
										// Above and Iota Subscript
							 | 
						|||
| 
								 | 
							
										case 0x0300, // U+0300 COMBINING GRAVE ACCENT
							 | 
						|||
| 
								 | 
							
											0x0301, // U+0301 COMBINING ACUTE ACCENT
							 | 
						|||
| 
								 | 
							
											0x0304, // U+0304 COMBINING MACRON
							 | 
						|||
| 
								 | 
							
											0x0306, // U+0306 COMBINING BREVE
							 | 
						|||
| 
								 | 
							
											0x0308, // U+0308 COMBINING DIAERESIS
							 | 
						|||
| 
								 | 
							
											0x0313, // U+0313 COMBINING COMMA ABOVE
							 | 
						|||
| 
								 | 
							
											0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
							 | 
						|||
| 
								 | 
							
											0x0342, // U+0342 COMBINING GREEK PERISPOMENI
							 | 
						|||
| 
								 | 
							
											0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
							 | 
						|||
| 
								 | 
							
											// No-op. Gobble the modifier.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										default:
							 | 
						|||
| 
								 | 
							
											switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
							 | 
						|||
| 
								 | 
							
											case cccZero:
							 | 
						|||
| 
								 | 
							
												c.unreadRune()
							 | 
						|||
| 
								 | 
							
												return true
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
											// We don't need to test for IotaSubscript as the only rune that
							 | 
						|||
| 
								 | 
							
											// qualifies (U+0345) was already excluded in the switch statement
							 | 
						|||
| 
								 | 
							
											// above. See A.4.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
											case cccAbove:
							 | 
						|||
| 
								 | 
							
												return c.copy()
							 | 
						|||
| 
								 | 
							
											default:
							 | 
						|||
| 
								 | 
							
												// Some other modifier. We're still allowed to gobble Greek
							 | 
						|||
| 
								 | 
							
												// modifiers after this.
							 | 
						|||
| 
								 | 
							
												c.copy()
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return i == maxIgnorable
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// TODO: implement elUpperSpan (low-priority: complex and infrequent).
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func ltLower(c *context) bool {
							 | 
						|||
| 
								 | 
							
									// From CLDR:
							 | 
						|||
| 
								 | 
							
									// # Introduce an explicit dot above when lowercasing capital I's and J's
							 | 
						|||
| 
								 | 
							
									// # whenever there are more accents above.
							 | 
						|||
| 
								 | 
							
									// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
							 | 
						|||
| 
								 | 
							
									// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
							 | 
						|||
| 
								 | 
							
									// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
							 | 
						|||
| 
								 | 
							
									// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
							 | 
						|||
| 
								 | 
							
									// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
							 | 
						|||
| 
								 | 
							
									// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
							 | 
						|||
| 
								 | 
							
									// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
							 | 
						|||
| 
								 | 
							
									// ::NFD();
							 | 
						|||
| 
								 | 
							
									// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
							 | 
						|||
| 
								 | 
							
									// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
							 | 
						|||
| 
								 | 
							
									// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
							 | 
						|||
| 
								 | 
							
									// I \u0300 (Ì) → i \u0307 \u0300;
							 | 
						|||
| 
								 | 
							
									// I \u0301 (Í) → i \u0307 \u0301;
							 | 
						|||
| 
								 | 
							
									// I \u0303 (Ĩ) → i \u0307 \u0303;
							 | 
						|||
| 
								 | 
							
									// ::Any-Lower();
							 | 
						|||
| 
								 | 
							
									// ::NFC();
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									i := 0
							 | 
						|||
| 
								 | 
							
									if r := c.src[c.pSrc]; r < utf8.RuneSelf {
							 | 
						|||
| 
								 | 
							
										lower(c)
							 | 
						|||
| 
								 | 
							
										if r != 'I' && r != 'J' {
							 | 
						|||
| 
								 | 
							
											return true
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									} else {
							 | 
						|||
| 
								 | 
							
										p := norm.NFD.Properties(c.src[c.pSrc:])
							 | 
						|||
| 
								 | 
							
										if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
							 | 
						|||
| 
								 | 
							
											// UTF-8 optimization: the decomposition will only have an above
							 | 
						|||
| 
								 | 
							
											// modifier if the last rune of the decomposition is in [U+300-U+311].
							 | 
						|||
| 
								 | 
							
											// In all other cases, a decomposition starting with I is always
							 | 
						|||
| 
								 | 
							
											// an I followed by modifiers that are not cased themselves. See A.2.
							 | 
						|||
| 
								 | 
							
											if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
							 | 
						|||
| 
								 | 
							
												if !c.writeBytes(d[:1]) {
							 | 
						|||
| 
								 | 
							
													return false
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												c.dst[c.pDst-1] += 'a' - 'A' // lower
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
												// Assumption: modifier never changes on lowercase. See A.1.
							 | 
						|||
| 
								 | 
							
												// Assumption: all modifiers added have CCC = Above. See A.2.3.
							 | 
						|||
| 
								 | 
							
												return c.writeString("\u0307") && c.writeBytes(d[1:])
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
											// In all other cases the additional modifiers will have a CCC
							 | 
						|||
| 
								 | 
							
											// that is less than 230 (Above). We will insert the U+0307, if
							 | 
						|||
| 
								 | 
							
											// needed, after these modifiers so that a string in FCD form
							 | 
						|||
| 
								 | 
							
											// will remain so. See A.2.2.
							 | 
						|||
| 
								 | 
							
											lower(c)
							 | 
						|||
| 
								 | 
							
											i = 1
							 | 
						|||
| 
								 | 
							
										} else {
							 | 
						|||
| 
								 | 
							
											return lower(c)
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									for ; i < maxIgnorable && c.next(); i++ {
							 | 
						|||
| 
								 | 
							
										switch c.info.cccType() {
							 | 
						|||
| 
								 | 
							
										case cccZero:
							 | 
						|||
| 
								 | 
							
											c.unreadRune()
							 | 
						|||
| 
								 | 
							
											return true
							 | 
						|||
| 
								 | 
							
										case cccAbove:
							 | 
						|||
| 
								 | 
							
											return c.writeString("\u0307") && c.copy() // See A.1.
							 | 
						|||
| 
								 | 
							
										default:
							 | 
						|||
| 
								 | 
							
											c.copy() // See A.1.
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return i == maxIgnorable
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// ltLowerSpan would be the same as isLower.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func ltUpper(f mapFunc) mapFunc {
							 | 
						|||
| 
								 | 
							
									return func(c *context) bool {
							 | 
						|||
| 
								 | 
							
										// Unicode:
							 | 
						|||
| 
								 | 
							
										// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
							 | 
						|||
| 
								 | 
							
										//
							 | 
						|||
| 
								 | 
							
										// From CLDR:
							 | 
						|||
| 
								 | 
							
										// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
							 | 
						|||
| 
								 | 
							
										// # intervening non-230 marks.
							 | 
						|||
| 
								 | 
							
										// ::NFD();
							 | 
						|||
| 
								 | 
							
										// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
							 | 
						|||
| 
								 | 
							
										// ::Any-Upper();
							 | 
						|||
| 
								 | 
							
										// ::NFC();
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										// TODO: See A.5. A soft-dotted rune never has an exception. This would
							 | 
						|||
| 
								 | 
							
										// allow us to overload the exception bit and encode this property in
							 | 
						|||
| 
								 | 
							
										// info. Need to measure performance impact of this.
							 | 
						|||
| 
								 | 
							
										r, _ := utf8.DecodeRune(c.src[c.pSrc:])
							 | 
						|||
| 
								 | 
							
										oldPDst := c.pDst
							 | 
						|||
| 
								 | 
							
										if !f(c) {
							 | 
						|||
| 
								 | 
							
											return false
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										if !unicode.Is(unicode.Soft_Dotted, r) {
							 | 
						|||
| 
								 | 
							
											return true
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										// We don't need to do an NFD normalization, as a soft-dotted rune never
							 | 
						|||
| 
								 | 
							
										// contains U+0307. See A.3.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
										i := 0
							 | 
						|||
| 
								 | 
							
										for ; i < maxIgnorable && c.next(); i++ {
							 | 
						|||
| 
								 | 
							
											switch c.info.cccType() {
							 | 
						|||
| 
								 | 
							
											case cccZero:
							 | 
						|||
| 
								 | 
							
												c.unreadRune()
							 | 
						|||
| 
								 | 
							
												return true
							 | 
						|||
| 
								 | 
							
											case cccAbove:
							 | 
						|||
| 
								 | 
							
												if c.hasPrefix("\u0307") {
							 | 
						|||
| 
								 | 
							
													// We don't do a full NFC, but rather combine runes for
							 | 
						|||
| 
								 | 
							
													// some of the common cases. (Returning NFC or
							 | 
						|||
| 
								 | 
							
													// preserving normal form is neither a requirement nor
							 | 
						|||
| 
								 | 
							
													// a possibility anyway).
							 | 
						|||
| 
								 | 
							
													if !c.next() {
							 | 
						|||
| 
								 | 
							
														return false
							 | 
						|||
| 
								 | 
							
													}
							 | 
						|||
| 
								 | 
							
													if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
							 | 
						|||
| 
								 | 
							
														s := ""
							 | 
						|||
| 
								 | 
							
														switch c.src[c.pSrc+1] {
							 | 
						|||
| 
								 | 
							
														case 0x80: // U+0300 COMBINING GRAVE ACCENT
							 | 
						|||
| 
								 | 
							
															s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
							 | 
						|||
| 
								 | 
							
														case 0x81: // U+0301 COMBINING ACUTE ACCENT
							 | 
						|||
| 
								 | 
							
															s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
							 | 
						|||
| 
								 | 
							
														case 0x83: // U+0303 COMBINING TILDE
							 | 
						|||
| 
								 | 
							
															s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
							 | 
						|||
| 
								 | 
							
														case 0x88: // U+0308 COMBINING DIAERESIS
							 | 
						|||
| 
								 | 
							
															s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
							 | 
						|||
| 
								 | 
							
														default:
							 | 
						|||
| 
								 | 
							
														}
							 | 
						|||
| 
								 | 
							
														if s != "" {
							 | 
						|||
| 
								 | 
							
															c.pDst = oldPDst
							 | 
						|||
| 
								 | 
							
															return c.writeString(s)
							 | 
						|||
| 
								 | 
							
														}
							 | 
						|||
| 
								 | 
							
													}
							 | 
						|||
| 
								 | 
							
												}
							 | 
						|||
| 
								 | 
							
												return c.copy()
							 | 
						|||
| 
								 | 
							
											default:
							 | 
						|||
| 
								 | 
							
												c.copy()
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										return i == maxIgnorable
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// TODO: implement ltUpperSpan (low priority: complex and infrequent).
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func aztrUpper(f mapFunc) mapFunc {
							 | 
						|||
| 
								 | 
							
									return func(c *context) bool {
							 | 
						|||
| 
								 | 
							
										// i→İ;
							 | 
						|||
| 
								 | 
							
										if c.src[c.pSrc] == 'i' {
							 | 
						|||
| 
								 | 
							
											return c.writeString("İ")
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
										return f(c)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func aztrLower(c *context) (done bool) {
							 | 
						|||
| 
								 | 
							
									// From CLDR:
							 | 
						|||
| 
								 | 
							
									// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
							 | 
						|||
| 
								 | 
							
									// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
							 | 
						|||
| 
								 | 
							
									// İ→i;
							 | 
						|||
| 
								 | 
							
									// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
							 | 
						|||
| 
								 | 
							
									// # This matches the behavior of the canonically equivalent I-dot_above
							 | 
						|||
| 
								 | 
							
									// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
							 | 
						|||
| 
								 | 
							
									// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
							 | 
						|||
| 
								 | 
							
									// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
							 | 
						|||
| 
								 | 
							
									// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
							 | 
						|||
| 
								 | 
							
									// I→ı ;
							 | 
						|||
| 
								 | 
							
									// ::Any-Lower();
							 | 
						|||
| 
								 | 
							
									if c.hasPrefix("\u0130") { // İ
							 | 
						|||
| 
								 | 
							
										return c.writeString("i")
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if c.src[c.pSrc] != 'I' {
							 | 
						|||
| 
								 | 
							
										return lower(c)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									// We ignore the lower-case I for now, but insert it later when we know
							 | 
						|||
| 
								 | 
							
									// which form we need.
							 | 
						|||
| 
								 | 
							
									start := c.pSrc + c.sz
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									i := 0
							 | 
						|||
| 
								 | 
							
								Loop:
							 | 
						|||
| 
								 | 
							
									// We check for up to n ignorables before \u0307. As \u0307 is an
							 | 
						|||
| 
								 | 
							
									// ignorable as well, n is maxIgnorable-1.
							 | 
						|||
| 
								 | 
							
									for ; i < maxIgnorable && c.next(); i++ {
							 | 
						|||
| 
								 | 
							
										switch c.info.cccType() {
							 | 
						|||
| 
								 | 
							
										case cccAbove:
							 | 
						|||
| 
								 | 
							
											if c.hasPrefix("\u0307") {
							 | 
						|||
| 
								 | 
							
												return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
							 | 
						|||
| 
								 | 
							
											}
							 | 
						|||
| 
								 | 
							
											done = true
							 | 
						|||
| 
								 | 
							
											break Loop
							 | 
						|||
| 
								 | 
							
										case cccZero:
							 | 
						|||
| 
								 | 
							
											c.unreadRune()
							 | 
						|||
| 
								 | 
							
											done = true
							 | 
						|||
| 
								 | 
							
											break Loop
							 | 
						|||
| 
								 | 
							
										default:
							 | 
						|||
| 
								 | 
							
											// We'll write this rune after we know which starter to use.
							 | 
						|||
| 
								 | 
							
										}
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if i == maxIgnorable {
							 | 
						|||
| 
								 | 
							
										done = true
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// aztrLowerSpan would be the same as isLower.
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func nlTitle(c *context) bool {
							 | 
						|||
| 
								 | 
							
									// From CLDR:
							 | 
						|||
| 
								 | 
							
									// # Special titlecasing for Dutch initial "ij".
							 | 
						|||
| 
								 | 
							
									// ::Any-Title();
							 | 
						|||
| 
								 | 
							
									// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
							 | 
						|||
| 
								 | 
							
									// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
							 | 
						|||
| 
								 | 
							
									if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
							 | 
						|||
| 
								 | 
							
										return title(c)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
									if !c.writeString("I") || !c.next() {
							 | 
						|||
| 
								 | 
							
										return false
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
							 | 
						|||
| 
								 | 
							
										return c.writeString("J")
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									c.unreadRune()
							 | 
						|||
| 
								 | 
							
									return true
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								func nlTitleSpan(c *context) bool {
							 | 
						|||
| 
								 | 
							
									// From CLDR:
							 | 
						|||
| 
								 | 
							
									// # Special titlecasing for Dutch initial "ij".
							 | 
						|||
| 
								 | 
							
									// ::Any-Title();
							 | 
						|||
| 
								 | 
							
									// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
							 | 
						|||
| 
								 | 
							
									// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
							 | 
						|||
| 
								 | 
							
									if c.src[c.pSrc] != 'I' {
							 | 
						|||
| 
								 | 
							
										return isTitle(c)
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if !c.next() || c.src[c.pSrc] == 'j' {
							 | 
						|||
| 
								 | 
							
										return false
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									if c.src[c.pSrc] != 'J' {
							 | 
						|||
| 
								 | 
							
										c.unreadRune()
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
									return true
							 | 
						|||
| 
								 | 
							
								}
							 | 
						|||
| 
								 | 
							
								
							 | 
						|||
| 
								 | 
							
								// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
							 | 
						|||
| 
								 | 
							
								func afnlRewrite(c *context) {
							 | 
						|||
| 
								 | 
							
									if c.hasPrefix("'") || c.hasPrefix("’") {
							 | 
						|||
| 
								 | 
							
										c.isMidWord = true
							 | 
						|||
| 
								 | 
							
									}
							 | 
						|||
| 
								 | 
							
								}
							 |