mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-03 18:22:25 -06:00 
			
		
		
		
	
		
			
	
	
		
			163 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			163 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| 
								 | 
							
								// Copyright 2014 The Go Authors. All rights reserved.
							 | 
						||
| 
								 | 
							
								// Use of this source code is governed by a BSD-style
							 | 
						||
| 
								 | 
							
								// license that can be found in the LICENSE file.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								//go:generate go run gen.go gen_trieval.go
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Package cases provides general and language-specific case mappers.
							 | 
						||
| 
								 | 
							
								package cases // import "golang.org/x/text/cases"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import (
							 | 
						||
| 
								 | 
							
									"golang.org/x/text/language"
							 | 
						||
| 
								 | 
							
									"golang.org/x/text/transform"
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// References:
							 | 
						||
| 
								 | 
							
								// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/reports/tr29/
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
							 | 
						||
| 
								 | 
							
								// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
							 | 
						||
| 
								 | 
							
								// - http://userguide.icu-project.org/transforms/casemappings
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO:
							 | 
						||
| 
								 | 
							
								// - Case folding
							 | 
						||
| 
								 | 
							
								// - Wide and Narrow?
							 | 
						||
| 
								 | 
							
								// - Segmenter option for title casing.
							 | 
						||
| 
								 | 
							
								// - ASCII fast paths
							 | 
						||
| 
								 | 
							
								// - Encode Soft-Dotted property within trie somehow.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// A Caser transforms given input to a certain case. It implements
							 | 
						||
| 
								 | 
							
								// transform.Transformer.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// A Caser may be stateful and should therefore not be shared between
							 | 
						||
| 
								 | 
							
								// goroutines.
							 | 
						||
| 
								 | 
							
								type Caser struct {
							 | 
						||
| 
								 | 
							
									t transform.SpanningTransformer
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Bytes returns a new byte slice with the result of converting b to the case
							 | 
						||
| 
								 | 
							
								// form implemented by c.
							 | 
						||
| 
								 | 
							
								func (c Caser) Bytes(b []byte) []byte {
							 | 
						||
| 
								 | 
							
									b, _, _ = transform.Bytes(c.t, b)
							 | 
						||
| 
								 | 
							
									return b
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// String returns a string with the result of transforming s to the case form
							 | 
						||
| 
								 | 
							
								// implemented by c.
							 | 
						||
| 
								 | 
							
								func (c Caser) String(s string) string {
							 | 
						||
| 
								 | 
							
									s, _, _ = transform.String(c.t, s)
							 | 
						||
| 
								 | 
							
									return s
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Reset resets the Caser to be reused for new input after a previous call to
							 | 
						||
| 
								 | 
							
								// Transform.
							 | 
						||
| 
								 | 
							
								func (c Caser) Reset() { c.t.Reset() }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Transform implements the transform.Transformer interface and transforms the
							 | 
						||
| 
								 | 
							
								// given input to the case form implemented by c.
							 | 
						||
| 
								 | 
							
								func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
							 | 
						||
| 
								 | 
							
									return c.t.Transform(dst, src, atEOF)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Span implements the transform.SpanningTransformer interface.
							 | 
						||
| 
								 | 
							
								func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
							 | 
						||
| 
								 | 
							
									return c.t.Span(src, atEOF)
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Upper returns a Caser for language-specific uppercasing.
							 | 
						||
| 
								 | 
							
								func Upper(t language.Tag, opts ...Option) Caser {
							 | 
						||
| 
								 | 
							
									return Caser{makeUpper(t, getOpts(opts...))}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Lower returns a Caser for language-specific lowercasing.
							 | 
						||
| 
								 | 
							
								func Lower(t language.Tag, opts ...Option) Caser {
							 | 
						||
| 
								 | 
							
									return Caser{makeLower(t, getOpts(opts...))}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Title returns a Caser for language-specific title casing. It uses an
							 | 
						||
| 
								 | 
							
								// approximation of the default Unicode Word Break algorithm.
							 | 
						||
| 
								 | 
							
								func Title(t language.Tag, opts ...Option) Caser {
							 | 
						||
| 
								 | 
							
									return Caser{makeTitle(t, getOpts(opts...))}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Fold returns a Caser that implements Unicode case folding. The returned Caser
							 | 
						||
| 
								 | 
							
								// is stateless and safe to use concurrently by multiple goroutines.
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// Case folding does not normalize the input and may not preserve a normal form.
							 | 
						||
| 
								 | 
							
								// Use the collate or search package for more convenient and linguistically
							 | 
						||
| 
								 | 
							
								// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
							 | 
						||
| 
								 | 
							
								// where security aspects are a concern.
							 | 
						||
| 
								 | 
							
								func Fold(opts ...Option) Caser {
							 | 
						||
| 
								 | 
							
									return Caser{makeFold(getOpts(opts...))}
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// An Option is used to modify the behavior of a Caser.
							 | 
						||
| 
								 | 
							
								type Option func(o options) options
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO: consider these options to take a boolean as well, like FinalSigma.
							 | 
						||
| 
								 | 
							
								// The advantage of using this approach is that other providers of a lower-case
							 | 
						||
| 
								 | 
							
								// algorithm could set different defaults by prefixing a user-provided slice
							 | 
						||
| 
								 | 
							
								// of options with their own. This is handy, for instance, for the precis
							 | 
						||
| 
								 | 
							
								// package which would override the default to not handle the Greek final sigma.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var (
							 | 
						||
| 
								 | 
							
									// NoLower disables the lowercasing of non-leading letters for a title
							 | 
						||
| 
								 | 
							
									// caser.
							 | 
						||
| 
								 | 
							
									NoLower Option = noLower
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// Compact omits mappings in case folding for characters that would grow the
							 | 
						||
| 
								 | 
							
									// input. (Unimplemented.)
							 | 
						||
| 
								 | 
							
									Compact Option = compact
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// TODO: option to preserve a normal form, if applicable?
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								type options struct {
							 | 
						||
| 
								 | 
							
									noLower bool
							 | 
						||
| 
								 | 
							
									simple  bool
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									// TODO: segmenter, max ignorable, alternative versions, etc.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									ignoreFinalSigma bool
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func getOpts(o ...Option) (res options) {
							 | 
						||
| 
								 | 
							
									for _, f := range o {
							 | 
						||
| 
								 | 
							
										res = f(res)
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func noLower(o options) options {
							 | 
						||
| 
								 | 
							
									o.noLower = true
							 | 
						||
| 
								 | 
							
									return o
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func compact(o options) options {
							 | 
						||
| 
								 | 
							
									o.simple = true
							 | 
						||
| 
								 | 
							
									return o
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// HandleFinalSigma specifies whether the special handling of Greek final sigma
							 | 
						||
| 
								 | 
							
								// should be enabled. Unicode prescribes handling the Greek final sigma for all
							 | 
						||
| 
								 | 
							
								// locales, but standards like IDNA and PRECIS override this default.
							 | 
						||
| 
								 | 
							
								func HandleFinalSigma(enable bool) Option {
							 | 
						||
| 
								 | 
							
									if enable {
							 | 
						||
| 
								 | 
							
										return handleFinalSigma
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
									return ignoreFinalSigma
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func ignoreFinalSigma(o options) options {
							 | 
						||
| 
								 | 
							
									o.ignoreFinalSigma = true
							 | 
						||
| 
								 | 
							
									return o
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								func handleFinalSigma(o options) options {
							 | 
						||
| 
								 | 
							
									o.ignoreFinalSigma = false
							 | 
						||
| 
								 | 
							
									return o
							 | 
						||
| 
								 | 
							
								}
							 |