mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-10-29 13:32:25 -05:00
Pg to bun (#148)
* start moving to bun * changing more stuff * more * and yet more * tests passing * seems stable now * more big changes * small fix * little fixes
This commit is contained in:
parent
071eca20ce
commit
2dc9fc1626
713 changed files with 98694 additions and 22704 deletions
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
162
vendor/golang.org/x/text/cases/cases.go
generated
vendored
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
||||
|
||||
// Package cases provides general and language-specific case mappers.
|
||||
package cases // import "golang.org/x/text/cases"
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// References:
|
||||
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
|
||||
// - https://www.unicode.org/reports/tr29/
|
||||
// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
|
||||
// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
|
||||
// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
|
||||
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
|
||||
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
|
||||
// - http://userguide.icu-project.org/transforms/casemappings
|
||||
|
||||
// TODO:
|
||||
// - Case folding
|
||||
// - Wide and Narrow?
|
||||
// - Segmenter option for title casing.
|
||||
// - ASCII fast paths
|
||||
// - Encode Soft-Dotted property within trie somehow.
|
||||
|
||||
// A Caser transforms given input to a certain case. It implements
|
||||
// transform.Transformer.
|
||||
//
|
||||
// A Caser may be stateful and should therefore not be shared between
|
||||
// goroutines.
|
||||
type Caser struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b to the case
|
||||
// form implemented by c.
|
||||
func (c Caser) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(c.t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of transforming s to the case form
|
||||
// implemented by c.
|
||||
func (c Caser) String(s string) string {
|
||||
s, _, _ = transform.String(c.t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
// Reset resets the Caser to be reused for new input after a previous call to
|
||||
// Transform.
|
||||
func (c Caser) Reset() { c.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface and transforms the
|
||||
// given input to the case form implemented by c.
|
||||
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return c.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return c.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Upper returns a Caser for language-specific uppercasing.
|
||||
func Upper(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeUpper(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Lower returns a Caser for language-specific lowercasing.
|
||||
func Lower(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeLower(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Title returns a Caser for language-specific title casing. It uses an
|
||||
// approximation of the default Unicode Word Break algorithm.
|
||||
func Title(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeTitle(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Fold returns a Caser that implements Unicode case folding. The returned Caser
|
||||
// is stateless and safe to use concurrently by multiple goroutines.
|
||||
//
|
||||
// Case folding does not normalize the input and may not preserve a normal form.
|
||||
// Use the collate or search package for more convenient and linguistically
|
||||
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
|
||||
// where security aspects are a concern.
|
||||
func Fold(opts ...Option) Caser {
|
||||
return Caser{makeFold(getOpts(opts...))}
|
||||
}
|
||||
|
||||
// An Option is used to modify the behavior of a Caser.
|
||||
type Option func(o options) options
|
||||
|
||||
// TODO: consider these options to take a boolean as well, like FinalSigma.
|
||||
// The advantage of using this approach is that other providers of a lower-case
|
||||
// algorithm could set different defaults by prefixing a user-provided slice
|
||||
// of options with their own. This is handy, for instance, for the precis
|
||||
// package which would override the default to not handle the Greek final sigma.
|
||||
|
||||
var (
|
||||
// NoLower disables the lowercasing of non-leading letters for a title
|
||||
// caser.
|
||||
NoLower Option = noLower
|
||||
|
||||
// Compact omits mappings in case folding for characters that would grow the
|
||||
// input. (Unimplemented.)
|
||||
Compact Option = compact
|
||||
)
|
||||
|
||||
// TODO: option to preserve a normal form, if applicable?
|
||||
|
||||
type options struct {
|
||||
noLower bool
|
||||
simple bool
|
||||
|
||||
// TODO: segmenter, max ignorable, alternative versions, etc.
|
||||
|
||||
ignoreFinalSigma bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
res = f(res)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func noLower(o options) options {
|
||||
o.noLower = true
|
||||
return o
|
||||
}
|
||||
|
||||
func compact(o options) options {
|
||||
o.simple = true
|
||||
return o
|
||||
}
|
||||
|
||||
// HandleFinalSigma specifies whether the special handling of Greek final sigma
|
||||
// should be enabled. Unicode prescribes handling the Greek final sigma for all
|
||||
// locales, but standards like IDNA and PRECIS override this default.
|
||||
func HandleFinalSigma(enable bool) Option {
|
||||
if enable {
|
||||
return handleFinalSigma
|
||||
}
|
||||
return ignoreFinalSigma
|
||||
}
|
||||
|
||||
func ignoreFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = true
|
||||
return o
|
||||
}
|
||||
|
||||
func handleFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = false
|
||||
return o
|
||||
}
|
||||
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
376
vendor/golang.org/x/text/cases/context.go
generated
vendored
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// A context is used for iterating over source bytes, fetching case info and
|
||||
// writing to a destination buffer.
|
||||
//
|
||||
// Casing operations may need more than one rune of context to decide how a rune
|
||||
// should be cased. Casing implementations should call checkpoint on context
|
||||
// whenever it is known to be safe to return the runes processed so far.
|
||||
//
|
||||
// It is recommended for implementations to not allow for more than 30 case
|
||||
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||||
// unbounded lookahead is needed for cased runes.
|
||||
type context struct {
|
||||
dst, src []byte
|
||||
atEOF bool
|
||||
|
||||
pDst int // pDst points past the last written rune in dst.
|
||||
pSrc int // pSrc points to the start of the currently scanned rune.
|
||||
|
||||
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||||
nDst, nSrc int
|
||||
err error
|
||||
|
||||
sz int // size of current rune
|
||||
info info // case information of currently scanned rune
|
||||
|
||||
// State preserved across calls to Transform.
|
||||
isMidWord bool // false if next cased letter needs to be title-cased.
|
||||
}
|
||||
|
||||
func (c *context) Reset() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// ret returns the return values for the Transform method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) ret() (nDst, nSrc int, err error) {
|
||||
if c.err != nil || c.nSrc == len(c.src) {
|
||||
return c.nDst, c.nSrc, c.err
|
||||
}
|
||||
// This point is only reached by mappers if there was no short destination
|
||||
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||||
// set to 0 by next.
|
||||
if c.atEOF && c.pSrc == len(c.src) {
|
||||
return c.pDst, c.pSrc, nil
|
||||
}
|
||||
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||||
}
|
||||
|
||||
// retSpan returns the return values for the Span method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) retSpan() (n int, err error) {
|
||||
_, nSrc, err := c.ret()
|
||||
return nSrc, err
|
||||
}
|
||||
|
||||
// checkpoint sets the return value buffer points for Transform to the current
|
||||
// positions.
|
||||
func (c *context) checkpoint() {
|
||||
if c.err == nil {
|
||||
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||||
}
|
||||
}
|
||||
|
||||
// unreadRune causes the last rune read by next to be reread on the next
|
||||
// invocation of next. Only one unreadRune may be called after a call to next.
|
||||
func (c *context) unreadRune() {
|
||||
c.sz = 0
|
||||
}
|
||||
|
||||
func (c *context) next() bool {
|
||||
c.pSrc += c.sz
|
||||
if c.pSrc == len(c.src) || c.err != nil {
|
||||
c.info, c.sz = 0, 0
|
||||
return false
|
||||
}
|
||||
v, sz := trie.lookup(c.src[c.pSrc:])
|
||||
c.info, c.sz = info(v), sz
|
||||
if c.sz == 0 {
|
||||
if c.atEOF {
|
||||
// A zero size means we have an incomplete rune. If we are atEOF,
|
||||
// this means it is an illegal rune, which we will consume one
|
||||
// byte at a time.
|
||||
c.sz = 1
|
||||
} else {
|
||||
c.err = transform.ErrShortSrc
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeBytes adds bytes to dst.
|
||||
func (c *context) writeBytes(b []byte) bool {
|
||||
if len(c.dst)-c.pDst < len(b) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for _, ch := range b {
|
||||
c.dst[c.pDst] = ch
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeString writes the given string to dst.
|
||||
func (c *context) writeString(s string) bool {
|
||||
if len(c.dst)-c.pDst < len(s) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for i := 0; i < len(s); i++ {
|
||||
c.dst[c.pDst] = s[i]
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// copy writes the current rune to dst.
|
||||
func (c *context) copy() bool {
|
||||
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||||
}
|
||||
|
||||
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||||
// pattern of the case info. It is the responsibility of the caller to ensure
|
||||
// that this is a rune with a XOR pattern defined.
|
||||
func (c *context) copyXOR() bool {
|
||||
if !c.copy() {
|
||||
return false
|
||||
}
|
||||
if c.info&xorIndexBit == 0 {
|
||||
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||||
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||||
} else {
|
||||
// Interpret XOR bits as an index.
|
||||
// TODO: test performance for unrolling this loop. Verify that we have
|
||||
// at least two bytes and at most three.
|
||||
idx := c.info >> xorShift
|
||||
for p := c.pDst - 1; ; p-- {
|
||||
c.dst[p] ^= xorData[idx]
|
||||
idx--
|
||||
if xorData[idx] == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||||
func (c *context) hasPrefix(s string) bool {
|
||||
b := c.src[c.pSrc:]
|
||||
if len(b) < len(s) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b[:len(s)] {
|
||||
if c != s[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// caseType returns an info with only the case bits, normalized to either
|
||||
// cLower, cUpper, cTitle or cUncased.
|
||||
func (c *context) caseType() info {
|
||||
cm := c.info & 0x7
|
||||
if cm < 4 {
|
||||
return cm
|
||||
}
|
||||
if cm >= cXORCase {
|
||||
// xor the last bit of the rune with the case type bits.
|
||||
b := c.src[c.pSrc+c.sz-1]
|
||||
return info(b&1) ^ cm&0x3
|
||||
}
|
||||
if cm == cIgnorableCased {
|
||||
return cLower
|
||||
}
|
||||
return cUncased
|
||||
}
|
||||
|
||||
// lower writes the lowercase version of the current rune to dst.
|
||||
func lower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
return c.writeString(e[offset : offset+nLower])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
func isLower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// upper writes the uppercase version of the current rune to dst.
|
||||
func upper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
// The first special case mapping is for lower. Set n to the second.
|
||||
if n == noChange {
|
||||
n = 0
|
||||
}
|
||||
n, e = e[1]&lengthMask, e[n:]
|
||||
}
|
||||
if n != noChange {
|
||||
return c.writeString(e[offset : offset+n])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isUpper writes the isUppercase version of the current rune to dst.
|
||||
func isUpper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
n = e[1] & lengthMask
|
||||
}
|
||||
if n != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// title writes the title case version of the current rune to dst.
|
||||
func title(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
if nFirst != noChange {
|
||||
e = e[nFirst:]
|
||||
}
|
||||
return c.writeString(e[offset : offset+nTitle])
|
||||
}
|
||||
if ct == cLower && nFirst != noChange {
|
||||
// Use the uppercase version instead.
|
||||
return c.writeString(e[offset : offset+nFirst])
|
||||
}
|
||||
// Already in correct case.
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isTitle reports whether the current rune is in title case.
|
||||
func isTitle(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cLower && nFirst != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// foldFull writes the foldFull version of the current rune to dst.
|
||||
func foldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return c.copy()
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 {
|
||||
if ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
n = (e[1] >> lengthBits) & lengthMask
|
||||
}
|
||||
return c.writeString(e[2 : 2+n])
|
||||
}
|
||||
|
||||
// isFoldFull reports whether the current run is mapped to foldFull
|
||||
func isFoldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return true
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 && ct == cLower {
|
||||
return true
|
||||
}
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
34
vendor/golang.org/x/text/cases/fold.go
generated
vendored
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
type caseFolder struct{ transform.NopResetter }
|
||||
|
||||
// caseFolder implements the Transformer interface for doing case folding.
|
||||
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
foldFull(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isFoldFull(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
func makeFold(o options) transform.SpanningTransformer {
|
||||
// TODO: Special case folding, through option Language, Special/Turkic, or
|
||||
// both.
|
||||
// TODO: Implement Compact options.
|
||||
return &caseFolder{}
|
||||
}
|
||||
62
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
62
vendor/golang.org/x/text/cases/icu.go
generated
vendored
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build icu
|
||||
// +build icu
|
||||
|
||||
package cases
|
||||
|
||||
// Ideally these functions would be defined in a test file, but go test doesn't
|
||||
// allow CGO in tests. The build tag should ensure either way that these
|
||||
// functions will not end up in the package.
|
||||
|
||||
// TODO: Ensure that the correct ICU version is set.
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -licui18n.57 -licuuc.57
|
||||
#include <stdlib.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/localpointer.h>
|
||||
#include <unicode/ucasemap.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func doICU(tag, caser, input string) string {
|
||||
err := C.UErrorCode(0)
|
||||
loc := C.CString(tag)
|
||||
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
|
||||
|
||||
buf := make([]byte, len(input)*4)
|
||||
dst := (*C.char)(unsafe.Pointer(&buf[0]))
|
||||
src := C.CString(input)
|
||||
|
||||
cn := C.int32_t(0)
|
||||
|
||||
switch caser {
|
||||
case "fold":
|
||||
cn = C.ucasemap_utf8FoldCase(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "lower":
|
||||
cn = C.ucasemap_utf8ToLower(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "upper":
|
||||
cn = C.ucasemap_utf8ToUpper(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "title":
|
||||
cn = C.ucasemap_utf8ToTitle(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
}
|
||||
return string(buf[:cn])
|
||||
}
|
||||
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/cases/info.go
generated
vendored
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
func (c info) cccVal() info {
|
||||
if c&exceptionBit != 0 {
|
||||
return info(exceptions[c>>exceptionShift]) & cccMask
|
||||
}
|
||||
return c & cccMask
|
||||
}
|
||||
|
||||
func (c info) cccType() info {
|
||||
ccc := c.cccVal()
|
||||
if ccc <= cccZero {
|
||||
return cccZero
|
||||
}
|
||||
return ccc
|
||||
}
|
||||
|
||||
// TODO: Implement full Unicode breaking algorithm:
|
||||
// 1) Implement breaking in separate package.
|
||||
// 2) Use the breaker here.
|
||||
// 3) Compare table size and performance of using the more generic breaker.
|
||||
//
|
||||
// Note that we can extend the current algorithm to be much more accurate. This
|
||||
// only makes sense, though, if the performance and/or space penalty of using
|
||||
// the generic breaker is big. Extra data will only be needed for non-cased
|
||||
// runes, which means there are sufficient bits left in the caseType.
|
||||
// ICU prohibits breaking in such cases as well.
|
||||
|
||||
// For the purpose of title casing we use an approximation of the Unicode Word
|
||||
// Breaking algorithm defined in Annex #29:
|
||||
// https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
|
||||
//
|
||||
// For our approximation, we group the Word Break types into the following
|
||||
// categories, with associated rules:
|
||||
//
|
||||
// 1) Letter:
|
||||
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
|
||||
// Rule: Never break between consecutive runes of this category.
|
||||
//
|
||||
// 2) Mid:
|
||||
// MidLetter, MidNumLet, Single_Quote.
|
||||
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
|
||||
// Me, Cf, Lm or Sk).
|
||||
// Rule: Don't break between Letter and Mid, but break between two Mids.
|
||||
//
|
||||
// 3) Break:
|
||||
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
|
||||
// Other.
|
||||
// These categories should always result in a break between two cased letters.
|
||||
// Rule: Always break.
|
||||
//
|
||||
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
|
||||
// preventing a break between two cased letters. For now we will ignore this
|
||||
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
|
||||
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
|
||||
//
|
||||
// Note 2: the rule for Mid is very approximate, but works in most cases. To
|
||||
// improve, we could store the categories in the trie value and use a FA to
|
||||
// manage breaks. See TODO comment above.
|
||||
//
|
||||
// Note 3: according to the spec, it is possible for the Extend category to
|
||||
// introduce breaks between other categories grouped in Letter. However, this
|
||||
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
|
||||
|
||||
// isBreak returns whether this rune should introduce a break.
|
||||
func (c info) isBreak() bool {
|
||||
return c.cccVal() == cccBreak
|
||||
}
|
||||
|
||||
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
|
||||
// Numeric, ExtendNumLet, or Extend.
|
||||
func (c info) isLetter() bool {
|
||||
ccc := c.cccVal()
|
||||
if ccc == cccZero {
|
||||
return !c.isCaseIgnorable()
|
||||
}
|
||||
return ccc != cccBreak
|
||||
}
|
||||
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
816
vendor/golang.org/x/text/cases/map.go
generated
vendored
Normal file
|
|
@ -0,0 +1,816 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains the definitions of case mappings for all supported
|
||||
// languages. The rules for the language-specific tailorings were taken and
|
||||
// modified from the CLDR transform definitions in common/transforms.
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// A mapFunc takes a context set to the current rune and writes the mapped
|
||||
// version to the same context. It may advance the context to the next rune. It
|
||||
// returns whether a checkpoint is possible: whether the pDst bytes written to
|
||||
// dst so far won't need changing as we see more source bytes.
|
||||
type mapFunc func(*context) bool
|
||||
|
||||
// A spanFunc takes a context set to the current rune and returns whether this
|
||||
// rune would be altered when written to the output. It may advance the context
|
||||
// to the next rune. It returns whether a checkpoint is possible.
|
||||
type spanFunc func(*context) bool
|
||||
|
||||
// maxIgnorable defines the maximum number of ignorables to consider for
|
||||
// lookahead operations.
|
||||
const maxIgnorable = 30
|
||||
|
||||
// supported lists the language tags for which we have tailorings.
|
||||
const supported = "und af az el lt nl tr"
|
||||
|
||||
func init() {
|
||||
tags := []language.Tag{}
|
||||
for _, s := range strings.Split(supported, " ") {
|
||||
tags = append(tags, language.MustParse(s))
|
||||
}
|
||||
matcher = internal.NewInheritanceMatcher(tags)
|
||||
Supported = language.NewCoverage(tags)
|
||||
}
|
||||
|
||||
var (
|
||||
matcher *internal.InheritanceMatcher
|
||||
|
||||
Supported language.Coverage
|
||||
|
||||
// We keep the following lists separate, instead of having a single per-
|
||||
// language struct, to give the compiler a chance to remove unused code.
|
||||
|
||||
// Some uppercase mappers are stateless, so we can precompute the
|
||||
// Transformers and save a bit on runtime allocations.
|
||||
upperFunc = []struct {
|
||||
upper mapFunc
|
||||
span spanFunc
|
||||
}{
|
||||
{nil, nil}, // und
|
||||
{nil, nil}, // af
|
||||
{aztrUpper(upper), isUpper}, // az
|
||||
{elUpper, noSpan}, // el
|
||||
{ltUpper(upper), noSpan}, // lt
|
||||
{nil, nil}, // nl
|
||||
{aztrUpper(upper), isUpper}, // tr
|
||||
}
|
||||
|
||||
undUpper transform.SpanningTransformer = &undUpperCaser{}
|
||||
undLower transform.SpanningTransformer = &undLowerCaser{}
|
||||
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
|
||||
|
||||
lowerFunc = []mapFunc{
|
||||
nil, // und
|
||||
nil, // af
|
||||
aztrLower, // az
|
||||
nil, // el
|
||||
ltLower, // lt
|
||||
nil, // nl
|
||||
aztrLower, // tr
|
||||
}
|
||||
|
||||
titleInfos = []struct {
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
rewrite func(*context)
|
||||
}{
|
||||
{title, lower, isTitle, nil}, // und
|
||||
{title, lower, isTitle, afnlRewrite}, // af
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // az
|
||||
{title, lower, isTitle, nil}, // el
|
||||
{ltUpper(title), ltLower, noSpan, nil}, // lt
|
||||
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
|
||||
}
|
||||
)
|
||||
|
||||
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := upperFunc[i].upper
|
||||
if f == nil {
|
||||
return undUpper
|
||||
}
|
||||
return &simpleCaser{f: f, span: upperFunc[i].span}
|
||||
}
|
||||
|
||||
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := lowerFunc[i]
|
||||
if f == nil {
|
||||
if o.ignoreFinalSigma {
|
||||
return undLowerIgnoreSigma
|
||||
}
|
||||
return undLower
|
||||
}
|
||||
if o.ignoreFinalSigma {
|
||||
return &simpleCaser{f: f, span: isLower}
|
||||
}
|
||||
return &lowerCaser{
|
||||
first: f,
|
||||
midWord: finalSigma(f),
|
||||
}
|
||||
}
|
||||
|
||||
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
x := &titleInfos[i]
|
||||
lower := x.lower
|
||||
if o.noLower {
|
||||
lower = (*context).copy
|
||||
} else if !o.ignoreFinalSigma {
|
||||
lower = finalSigma(lower)
|
||||
}
|
||||
return &titleCaser{
|
||||
title: x.title,
|
||||
lower: lower,
|
||||
titleSpan: x.titleSpan,
|
||||
rewrite: x.rewrite,
|
||||
}
|
||||
}
|
||||
|
||||
func noSpan(c *context) bool {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
|
||||
// TODO: consider a similar special case for the fast majority lower case. This
|
||||
// is a bit more involved so will require some more precise benchmarking to
|
||||
// justify it.
|
||||
|
||||
type undUpperCaser struct{ transform.NopResetter }
|
||||
|
||||
// undUpperCaser implements the Transformer interface for doing an upper case
|
||||
// mapping for the root locale (und). It eliminates the need for an allocation
|
||||
// as it prevents escaping by not using function pointers.
|
||||
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
upper(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isUpper(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
|
||||
// a lower case mapping for the root locale (und) ignoring final sigma
|
||||
// handling. This casing algorithm is used in some performance-critical packages
|
||||
// like secure/precis and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && lower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
|
||||
}
|
||||
|
||||
// Span implements a generic lower-casing. This is possible as isLower works
|
||||
// for all lowercasing variants. All lowercase variants only vary in how they
|
||||
// transform a non-lowercase letter. They will never change an already lowercase
|
||||
// letter. In addition, there is no state.
|
||||
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
type simpleCaser struct {
|
||||
context
|
||||
f mapFunc
|
||||
span spanFunc
|
||||
}
|
||||
|
||||
// simpleCaser implements the Transformer interface for doing a case operation
|
||||
// on a rune-by-rune basis.
|
||||
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && t.f(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && t.span(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerCaser implements the Transformer interface for doing a lower case
|
||||
// mapping for the root locale (und) ignoring final sigma handling. This casing
|
||||
// algorithm is used in some performance-critical packages like secure/precis
|
||||
// and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !c.hasPrefix("Σ") {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
} else if !finalSigmaBody(&c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// lowerCaser implements the Transformer interface. The default Unicode lower
|
||||
// casing requires different treatment for the first and subsequent characters
|
||||
// of a word, most notably to handle the Greek final Sigma.
|
||||
type lowerCaser struct {
|
||||
undLowerIgnoreSigmaCaser
|
||||
|
||||
context
|
||||
|
||||
first, midWord mapFunc
|
||||
}
|
||||
|
||||
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF}
|
||||
c := &t.context
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !t.first(c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !t.midWord(c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
// titleCaser implements the Transformer interface. Title casing algorithms
|
||||
// distinguish between the first letter of a word and subsequent letters of the
|
||||
// same word. It uses state to avoid requiring a potentially infinite lookahead.
|
||||
type titleCaser struct {
|
||||
context
|
||||
|
||||
// rune mappings used by the actual casing algorithms.
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
|
||||
rewrite func(*context)
|
||||
}
|
||||
|
||||
// Transform implements the standard Unicode title case algorithm as defined in
|
||||
// Chapter 3 of The Unicode Standard:
|
||||
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
|
||||
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
|
||||
// first cased character F following the word boundary. If F exists, map F to
|
||||
// Titlecase_Mapping(F); then map all characters C between F and the following
|
||||
// word boundary to Lowercase_Mapping(C).
|
||||
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.title(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !t.lower(c) {
|
||||
break
|
||||
}
|
||||
} else if !c.copy() {
|
||||
break
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.titleSpan(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !isLower(c) {
|
||||
break
|
||||
}
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// finalSigma adds Greek final Sigma handing to another casing function. It
|
||||
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
|
||||
// case-ignorables and a cased letters.
|
||||
func finalSigma(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
if !c.hasPrefix("Σ") {
|
||||
return f(c)
|
||||
}
|
||||
return finalSigmaBody(c)
|
||||
}
|
||||
}
|
||||
|
||||
func finalSigmaBody(c *context) bool {
|
||||
// Current rune must be ∑.
|
||||
|
||||
// ::NFD();
|
||||
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||
// Σ } [:case-ignorable:]* [:cased:] → σ;
|
||||
// [:cased:] [:case-ignorable:]* { Σ → ς;
|
||||
// ::Any-Lower;
|
||||
// ::NFC();
|
||||
|
||||
p := c.pDst
|
||||
c.writeString("ς")
|
||||
|
||||
// TODO: we should do this here, but right now this will never have an
|
||||
// effect as this is called when the prefix is Sigma, whereas Dutch and
|
||||
// Afrikaans only test for an apostrophe.
|
||||
//
|
||||
// if t.rewrite != nil {
|
||||
// t.rewrite(c)
|
||||
// }
|
||||
|
||||
// We need to do one more iteration after maxIgnorable, as a cased
|
||||
// letter is not an ignorable and may modify the result.
|
||||
wasMid := false
|
||||
for i := 0; i < maxIgnorable+1; i++ {
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if !c.info.isCaseIgnorable() {
|
||||
// All Midword runes are also case ignorable, so we are
|
||||
// guaranteed to have a letter or word break here. As we are
|
||||
// unreading the run, there is no need to unset c.isMidWord;
|
||||
// the title caser will handle this.
|
||||
if c.info.isCased() {
|
||||
// p+1 is guaranteed to be in bounds: if writing ς was
|
||||
// successful, p+1 will contain the second byte of ς. If not,
|
||||
// this function will have returned after c.next returned false.
|
||||
c.dst[p+1]++ // ς → σ
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
// A case ignorable may also introduce a word break, so we may need
|
||||
// to continue searching even after detecting a break.
|
||||
isMid := c.info.isMid()
|
||||
if (wasMid && isMid) || c.info.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
wasMid = isMid
|
||||
c.copy()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// finalSigmaSpan would be the same as isLower.
|
||||
|
||||
// elUpper implements Greek upper casing, which entails removing a predefined
|
||||
// set of non-blocked modifiers. Note that these accents should not be removed
|
||||
// for title casing!
|
||||
// Example: "Οδός" -> "ΟΔΟΣ".
|
||||
func elUpper(c *context) bool {
|
||||
// From CLDR:
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
|
||||
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !upper(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Greek, r) {
|
||||
return true
|
||||
}
|
||||
i := 0
|
||||
// Take the properties of the uppercased rune that is already written to the
|
||||
// destination. This saves us the trouble of having to uppercase the
|
||||
// decomposed rune again.
|
||||
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
|
||||
// Restore the destination position and process the decomposed rune.
|
||||
r, sz := utf8.DecodeRune(b)
|
||||
if r <= 0xFF { // See A.6.1
|
||||
return true
|
||||
}
|
||||
c.pDst = oldPDst
|
||||
// Insert the first rune and ignore the modifiers. See A.6.2.
|
||||
c.writeBytes(b[:sz])
|
||||
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
|
||||
// Above and Iota Subscript
|
||||
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
|
||||
0x0301, // U+0301 COMBINING ACUTE ACCENT
|
||||
0x0304, // U+0304 COMBINING MACRON
|
||||
0x0306, // U+0306 COMBINING BREVE
|
||||
0x0308, // U+0308 COMBINING DIAERESIS
|
||||
0x0313, // U+0313 COMBINING COMMA ABOVE
|
||||
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
|
||||
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
|
||||
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
|
||||
// No-op. Gobble the modifier.
|
||||
|
||||
default:
|
||||
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
|
||||
// We don't need to test for IotaSubscript as the only rune that
|
||||
// qualifies (U+0345) was already excluded in the switch statement
|
||||
// above. See A.4.
|
||||
|
||||
case cccAbove:
|
||||
return c.copy()
|
||||
default:
|
||||
// Some other modifier. We're still allowed to gobble Greek
|
||||
// modifiers after this.
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
|
||||
|
||||
func ltLower(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||
// # whenever there are more accents above.
|
||||
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
// ::NFD();
|
||||
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
||||
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
||||
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
||||
// I \u0300 (Ì) → i \u0307 \u0300;
|
||||
// I \u0301 (Í) → i \u0307 \u0301;
|
||||
// I \u0303 (Ĩ) → i \u0307 \u0303;
|
||||
// ::Any-Lower();
|
||||
// ::NFC();
|
||||
|
||||
i := 0
|
||||
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
|
||||
lower(c)
|
||||
if r != 'I' && r != 'J' {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
p := norm.NFD.Properties(c.src[c.pSrc:])
|
||||
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
|
||||
// UTF-8 optimization: the decomposition will only have an above
|
||||
// modifier if the last rune of the decomposition is in [U+300-U+311].
|
||||
// In all other cases, a decomposition starting with I is always
|
||||
// an I followed by modifiers that are not cased themselves. See A.2.
|
||||
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
|
||||
if !c.writeBytes(d[:1]) {
|
||||
return false
|
||||
}
|
||||
c.dst[c.pDst-1] += 'a' - 'A' // lower
|
||||
|
||||
// Assumption: modifier never changes on lowercase. See A.1.
|
||||
// Assumption: all modifiers added have CCC = Above. See A.2.3.
|
||||
return c.writeString("\u0307") && c.writeBytes(d[1:])
|
||||
}
|
||||
// In all other cases the additional modifiers will have a CCC
|
||||
// that is less than 230 (Above). We will insert the U+0307, if
|
||||
// needed, after these modifiers so that a string in FCD form
|
||||
// will remain so. See A.2.2.
|
||||
lower(c)
|
||||
i = 1
|
||||
} else {
|
||||
return lower(c)
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
return c.writeString("\u0307") && c.copy() // See A.1.
|
||||
default:
|
||||
c.copy() // See A.1.
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// ltLowerSpan would be the same as isLower.
|
||||
|
||||
func ltUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// Unicode:
|
||||
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
//
|
||||
// From CLDR:
|
||||
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
|
||||
// # intervening non-230 marks.
|
||||
// ::NFD();
|
||||
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
||||
// ::Any-Upper();
|
||||
// ::NFC();
|
||||
|
||||
// TODO: See A.5. A soft-dotted rune never has an exception. This would
|
||||
// allow us to overload the exception bit and encode this property in
|
||||
// info. Need to measure performance impact of this.
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !f(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Soft_Dotted, r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// We don't need to do an NFD normalization, as a soft-dotted rune never
|
||||
// contains U+0307. See A.3.
|
||||
|
||||
i := 0
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
// We don't do a full NFC, but rather combine runes for
|
||||
// some of the common cases. (Returning NFC or
|
||||
// preserving normal form is neither a requirement nor
|
||||
// a possibility anyway).
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
|
||||
s := ""
|
||||
switch c.src[c.pSrc+1] {
|
||||
case 0x80: // U+0300 COMBINING GRAVE ACCENT
|
||||
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
|
||||
case 0x81: // U+0301 COMBINING ACUTE ACCENT
|
||||
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
|
||||
case 0x83: // U+0303 COMBINING TILDE
|
||||
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
|
||||
case 0x88: // U+0308 COMBINING DIAERESIS
|
||||
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
default:
|
||||
}
|
||||
if s != "" {
|
||||
c.pDst = oldPDst
|
||||
return c.writeString(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
return c.copy()
|
||||
default:
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
|
||||
|
||||
func aztrUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// i→İ;
|
||||
if c.src[c.pSrc] == 'i' {
|
||||
return c.writeString("İ")
|
||||
}
|
||||
return f(c)
|
||||
}
|
||||
}
|
||||
|
||||
func aztrLower(c *context) (done bool) {
|
||||
// From CLDR:
|
||||
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
// İ→i;
|
||||
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
// # This matches the behavior of the canonically equivalent I-dot_above
|
||||
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
|
||||
// I→ı ;
|
||||
// ::Any-Lower();
|
||||
if c.hasPrefix("\u0130") { // İ
|
||||
return c.writeString("i")
|
||||
}
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return lower(c)
|
||||
}
|
||||
|
||||
// We ignore the lower-case I for now, but insert it later when we know
|
||||
// which form we need.
|
||||
start := c.pSrc + c.sz
|
||||
|
||||
i := 0
|
||||
Loop:
|
||||
// We check for up to n ignorables before \u0307. As \u0307 is an
|
||||
// ignorable as well, n is maxIgnorable-1.
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
|
||||
}
|
||||
done = true
|
||||
break Loop
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
done = true
|
||||
break Loop
|
||||
default:
|
||||
// We'll write this rune after we know which starter to use.
|
||||
}
|
||||
}
|
||||
if i == maxIgnorable {
|
||||
done = true
|
||||
}
|
||||
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
|
||||
}
|
||||
|
||||
// aztrLowerSpan would be the same as isLower.
|
||||
|
||||
func nlTitle(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
|
||||
return title(c)
|
||||
}
|
||||
|
||||
if !c.writeString("I") || !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
|
||||
return c.writeString("J")
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
|
||||
func nlTitleSpan(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return isTitle(c)
|
||||
}
|
||||
if !c.next() || c.src[c.pSrc] == 'j' {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] != 'J' {
|
||||
c.unreadRune()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
|
||||
func afnlRewrite(c *context) {
|
||||
if c.hasPrefix("'") || c.hasPrefix("’") {
|
||||
c.isMidWord = true
|
||||
}
|
||||
}
|
||||
2256
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
2256
vendor/golang.org/x/text/cases/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2317
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
2317
vendor/golang.org/x/text/cases/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2360
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
2360
vendor/golang.org/x/text/cases/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2400
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
2400
vendor/golang.org/x/text/cases/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
2216
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
2216
vendor/golang.org/x/text/cases/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
214
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
214
vendor/golang.org/x/text/cases/trieval.go
generated
vendored
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the case
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds case information for a single rune. It is the value returned
|
||||
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||
// stores some basic case data and an index into an array with additional data.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..4 unsigned exception index
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||
// least-significant bits of the rune).
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
casedMask = 0x0003
|
||||
fullCasedMask = 0x0007
|
||||
ignorableMask = 0x0006
|
||||
ignorableValue = 0x0004
|
||||
|
||||
inverseFoldBit = 1 << 7
|
||||
isMidBit = 1 << 6
|
||||
|
||||
exceptionBit = 1 << 3
|
||||
exceptionShift = 4
|
||||
numExceptionBits = 12
|
||||
|
||||
xorIndexBit = 1 << 6
|
||||
xorShift = 8
|
||||
|
||||
// There is no mapping if all xor bits and the exception bit are zero.
|
||||
hasMappingMask = 0xff80 | exceptionBit
|
||||
)
|
||||
|
||||
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||
// title, upper and lower case and case ignorable. (For a definition of these
|
||||
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||
// cased while not having a mapping.
|
||||
//
|
||||
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||
// characters). We use this property by defining a cXORCase mode, where the case
|
||||
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||
// pattern for case mappings is often identical for successive runes, using
|
||||
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||
// allows us to better compress the trie blocks.
|
||||
const (
|
||||
cUncased info = iota // 000
|
||||
cTitle // 001
|
||||
cLower // 010
|
||||
cUpper // 011
|
||||
cIgnorableUncased // 100
|
||||
cIgnorableCased // 101 // lower case if mappings exist
|
||||
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||
|
||||
maxCaseMode = cUpper
|
||||
)
|
||||
|
||||
func (c info) isCased() bool {
|
||||
return c&casedMask != 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorable() bool {
|
||||
return c&ignorableMask == ignorableValue
|
||||
}
|
||||
|
||||
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||
return c&fullCasedMask == 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||
return c&fullCasedMask == cIgnorableUncased
|
||||
}
|
||||
|
||||
func (c info) isMid() bool {
|
||||
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||
}
|
||||
|
||||
// The case mapping implementation will need to know about various Canonical
|
||||
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||
// the rune also has the break category Break (see below).
|
||||
const (
|
||||
cccBreak info = iota << 4
|
||||
cccZero
|
||||
cccAbove
|
||||
cccOther
|
||||
|
||||
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||
)
|
||||
|
||||
const (
|
||||
starter = 0
|
||||
above = 230
|
||||
iotaSubscript = 240
|
||||
)
|
||||
|
||||
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
const (
|
||||
lengthMask = 0x07
|
||||
lengthBits = 3
|
||||
noChange = 0
|
||||
)
|
||||
|
||||
// References to generated trie.
|
||||
|
||||
var trie = newCaseTrie(0)
|
||||
|
||||
var sparse = sparseBlocks{
|
||||
values: sparseValues[:],
|
||||
offsets: sparseOffsets[:],
|
||||
}
|
||||
|
||||
// Sparse block lookup code.
|
||||
|
||||
// valueRange is an entry in a sparse block.
|
||||
type valueRange struct {
|
||||
value uint16
|
||||
lo, hi byte
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
// lookup returns the value from values block n for byte b using binary search.
|
||||
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
lo := s.offsets[n]
|
||||
hi := s.offsets[n+1]
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := s.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||
// is boring.
|
||||
const lastRuneForTesting = rune(0x1FFFF)
|
||||
49
vendor/golang.org/x/text/internal/internal.go
generated
vendored
Normal file
49
vendor/golang.org/x/text/internal/internal.go
generated
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains non-exported functionality that are used by
|
||||
// packages in the text repository.
|
||||
package internal // import "golang.org/x/text/internal"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// SortTags sorts tags in place.
|
||||
func SortTags(tags []language.Tag) {
|
||||
sort.Sort(sorter(tags))
|
||||
}
|
||||
|
||||
type sorter []language.Tag
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return s[i].String() < s[j].String()
|
||||
}
|
||||
|
||||
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
|
||||
// only unique tags.
|
||||
func UniqueTags(tags []language.Tag) []language.Tag {
|
||||
if len(tags) <= 1 {
|
||||
return tags
|
||||
}
|
||||
SortTags(tags)
|
||||
k := 0
|
||||
for i := 1; i < len(tags); i++ {
|
||||
if tags[k].String() < tags[i].String() {
|
||||
k++
|
||||
tags[k] = tags[i]
|
||||
}
|
||||
}
|
||||
return tags[:k+1]
|
||||
}
|
||||
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
Normal file
67
vendor/golang.org/x/text/internal/match.go
generated
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package internal
|
||||
|
||||
// This file contains matchers that implement CLDR inheritance.
|
||||
//
|
||||
// See https://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||
//
|
||||
// Some of the inheritance described in this document is already handled by
|
||||
// the cldr package.
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO: consider if (some of the) matching algorithm needs to be public after
|
||||
// getting some feel about what is generic and what is specific.
|
||||
|
||||
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
|
||||
// chain.
|
||||
//
|
||||
// The matcher uses canonicalization and the parent relationship to find a
|
||||
// match. The resulting match will always be either Und or a language with the
|
||||
// same language and script as the requested language. It will not match
|
||||
// languages for which there is understood to be mutual or one-directional
|
||||
// intelligibility.
|
||||
//
|
||||
// A Match will indicate an Exact match if the language matches after
|
||||
// canonicalization and High if the matched tag is a parent.
|
||||
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
|
||||
tags := &InheritanceMatcher{make(map[language.Tag]int)}
|
||||
for i, tag := range t {
|
||||
ct, err := language.All.Canonicalize(tag)
|
||||
if err != nil {
|
||||
ct = tag
|
||||
}
|
||||
tags.index[ct] = i
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
type InheritanceMatcher struct {
|
||||
index map[language.Tag]int
|
||||
}
|
||||
|
||||
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
|
||||
for _, t := range want {
|
||||
ct, err := language.All.Canonicalize(t)
|
||||
if err != nil {
|
||||
ct = t
|
||||
}
|
||||
conf := language.Exact
|
||||
for {
|
||||
if index, ok := m.index[ct]; ok {
|
||||
return ct, index, conf
|
||||
}
|
||||
if ct == language.Und {
|
||||
break
|
||||
}
|
||||
ct = ct.Parent()
|
||||
conf = language.High
|
||||
}
|
||||
}
|
||||
return language.Und, 0, language.No
|
||||
}
|
||||
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
Normal file
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runes
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
|
||||
// This is done for various reasons:
|
||||
// - To retain the semantics of the Nop transformer: if input is passed to a Nop
|
||||
// one would expect it to be unchanged.
|
||||
// - It would be very expensive to pass a converted RuneError to a transformer:
|
||||
// a transformer might need more source bytes after RuneError, meaning that
|
||||
// the only way to pass it safely is to create a new buffer and manage the
|
||||
// intermingling of RuneErrors and normal input.
|
||||
// - Many transformers leave ill-formed UTF-8 as is, so this is not
|
||||
// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
|
||||
// logical consequence of the operation (as for Map) or if it otherwise would
|
||||
// pose security concerns (as for Remove).
|
||||
// - An alternative would be to return an error on ill-formed UTF-8, but this
|
||||
// would be inconsistent with other operations.
|
||||
|
||||
// If returns a transformer that applies tIn to consecutive runes for which
|
||||
// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
|
||||
// is called on tIn and tNotIn at the start of each run. A Nop transformer will
|
||||
// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
|
||||
// to RuneError to determine which transformer to apply, but is passed as is to
|
||||
// the respective transformer.
|
||||
func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
|
||||
if tIn == nil && tNotIn == nil {
|
||||
return Transformer{transform.Nop}
|
||||
}
|
||||
if tIn == nil {
|
||||
tIn = transform.Nop
|
||||
}
|
||||
if tNotIn == nil {
|
||||
tNotIn = transform.Nop
|
||||
}
|
||||
sIn, ok := tIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sIn = dummySpan{tIn}
|
||||
}
|
||||
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sNotIn = dummySpan{tNotIn}
|
||||
}
|
||||
|
||||
a := &cond{
|
||||
tIn: sIn,
|
||||
tNotIn: sNotIn,
|
||||
f: s.Contains,
|
||||
}
|
||||
a.Reset()
|
||||
return Transformer{a}
|
||||
}
|
||||
|
||||
type dummySpan struct{ transform.Transformer }
|
||||
|
||||
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
type cond struct {
|
||||
tIn, tNotIn transform.SpanningTransformer
|
||||
f func(rune) bool
|
||||
check func(rune) bool // current check to perform
|
||||
t transform.SpanningTransformer // current transformer to use
|
||||
}
|
||||
|
||||
// Reset implements transform.Transformer.
|
||||
func (t *cond) Reset() {
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.t.Reset() // notIn will be reset on first usage.
|
||||
}
|
||||
|
||||
func (t *cond) is(r rune) bool {
|
||||
if t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.isNot
|
||||
t.t = t.tNotIn
|
||||
t.tNotIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
func (t *cond) isNot(r rune) bool {
|
||||
if !t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.tIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
// This implementation of Span doesn't help all too much, but it needs to be
|
||||
// there to satisfy this package's Transformer interface.
|
||||
// TODO: there are certainly room for improvements, though. For example, if
|
||||
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
|
||||
// to special-case that loop.
|
||||
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
p := 0
|
||||
for n < len(src) && err == nil {
|
||||
// Don't process too much at a time as the Spanner that will be
|
||||
// called on this block may terminate early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if v := n + maxChunk; v < max {
|
||||
max = v
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
|
||||
n += n2
|
||||
if err2 != nil {
|
||||
return n, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = n + size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
p := 0
|
||||
for nSrc < len(src) && err == nil {
|
||||
// Don't process too much at a time, as the work might be wasted if the
|
||||
// destination buffer isn't large enough to hold the result or a
|
||||
// transform returns an error early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if n := nSrc + maxChunk; n < len(src) {
|
||||
max = n
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
|
||||
nDst += nDst2
|
||||
nSrc += nSrc2
|
||||
if err2 != nil {
|
||||
return nDst, nSrc, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = nSrc + size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
Normal file
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package runes provide transforms for UTF-8 encoded text.
|
||||
package runes // import "golang.org/x/text/runes"
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// A Set is a collection of runes.
|
||||
type Set interface {
|
||||
// Contains returns true if r is contained in the set.
|
||||
Contains(r rune) bool
|
||||
}
|
||||
|
||||
type setFunc func(rune) bool
|
||||
|
||||
func (s setFunc) Contains(r rune) bool {
|
||||
return s(r)
|
||||
}
|
||||
|
||||
// Note: using funcs here instead of wrapping types result in cleaner
|
||||
// documentation and a smaller API.
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes in
|
||||
// the given RangeTable.
|
||||
func In(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes not
|
||||
// in the given RangeTable.
|
||||
func NotIn(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// Predicate creates a Set with a Contains method that returns f(r).
|
||||
func Predicate(f func(rune) bool) Set {
|
||||
return setFunc(f)
|
||||
}
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(b, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b using t. It
|
||||
// calls Reset on t. It returns nil if any error was found. This can only happen
|
||||
// if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, err := transform.Bytes(t, b)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of converting s using t. It calls
|
||||
// Reset on t. It returns the empty string if any error was found. This can only
|
||||
// happen if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, err := transform.String(t, s)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// - Copy: copying strings and bytes in whole-rune units.
|
||||
// - Validation (maybe)
|
||||
// - Well-formed-ness (maybe)
|
||||
|
||||
const runeErrorString = string(utf8.RuneError)
|
||||
|
||||
// Remove returns a Transformer that removes runes r for which s.Contains(r).
|
||||
// Illegal input bytes are replaced by RuneError before being passed to f.
|
||||
func Remove(s Set) Transformer {
|
||||
if f, ok := s.(setFunc); ok {
|
||||
// This little trick cuts the running time of BenchmarkRemove for sets
|
||||
// created by Predicate roughly in half.
|
||||
// TODO: special-case RangeTables as well.
|
||||
return Transformer{remove(f)}
|
||||
}
|
||||
return Transformer{remove(s.Contains)}
|
||||
}
|
||||
|
||||
// TODO: remove transform.RemoveFunc.
|
||||
|
||||
type remove func(r rune) bool
|
||||
|
||||
func (remove) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
// We replace illegal bytes with RuneError. Not doing so might
|
||||
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
|
||||
// The resulting byte sequence may subsequently contain runes
|
||||
// for which t(r) is true that were passed unnoticed.
|
||||
if !t(utf8.RuneError) {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
}
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
if t(r) {
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Map returns a Transformer that maps the runes in the input using the given
|
||||
// mapping. Illegal bytes in the input are converted to utf8.RuneError before
|
||||
// being passed to the mapping func.
|
||||
func Map(mapping func(rune) rune) Transformer {
|
||||
return Transformer{mapper(mapping)}
|
||||
}
|
||||
|
||||
type mapper func(rune) rune
|
||||
|
||||
func (mapper) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); n += size {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) != r {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var replacement rune
|
||||
var b [utf8.UTFMax]byte
|
||||
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
if replacement = t(r); replacement < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = byte(replacement)
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
} else if replacement = t(r); replacement == r {
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
n := utf8.EncodeRune(b[:], replacement)
|
||||
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst[nDst] = b[i]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ReplaceIllFormed returns a transformer that replaces all input bytes that are
|
||||
// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
|
||||
func ReplaceIllFormed() Transformer {
|
||||
return Transformer{&replaceIllFormed{}}
|
||||
}
|
||||
|
||||
type replaceIllFormed struct{ transform.NopResetter }
|
||||
|
||||
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// ASCII fast path.
|
||||
if src[n] < utf8.RuneSelf {
|
||||
n++
|
||||
continue
|
||||
}
|
||||
|
||||
r, size := utf8.DecodeRune(src[n:])
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if r != utf8.RuneError || size != 1 {
|
||||
n += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// ASCII fast path.
|
||||
if r := src[nSrc]; r < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = r
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += size
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
Normal file
36
vendor/golang.org/x/text/secure/precis/class.go
generated
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO: Add contextual character rules from Appendix A of RFC5892.
|
||||
|
||||
// A class is a set of characters that match certain derived properties. The
|
||||
// PRECIS framework defines two classes: The Freeform class and the Identifier
|
||||
// class. The freeform class should be used for profiles where expressiveness is
|
||||
// prioritized over safety such as nicknames or passwords. The identifier class
|
||||
// should be used for profiles where safety is the first priority such as
|
||||
// addressable network labels and usernames.
|
||||
type class struct {
|
||||
validFrom property
|
||||
}
|
||||
|
||||
// Contains satisfies the runes.Set interface and returns whether the given rune
|
||||
// is a member of the class.
|
||||
func (c class) Contains(r rune) bool {
|
||||
b := make([]byte, 4)
|
||||
n := utf8.EncodeRune(b, r)
|
||||
|
||||
trieval, _ := dpTrie.lookup(b[:n])
|
||||
return c.validFrom <= property(trieval)
|
||||
}
|
||||
|
||||
var (
|
||||
identifier = &class{validFrom: pValid}
|
||||
freeform = &class{validFrom: idDisOrFreePVal}
|
||||
)
|
||||
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
Normal file
139
vendor/golang.org/x/text/secure/precis/context.go
generated
vendored
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "errors"
|
||||
|
||||
// This file contains tables and code related to context rules.
|
||||
|
||||
type catBitmap uint16
|
||||
|
||||
const (
|
||||
// These bits, once set depending on the current value, are never unset.
|
||||
bJapanese catBitmap = 1 << iota
|
||||
bArabicIndicDigit
|
||||
bExtendedArabicIndicDigit
|
||||
|
||||
// These bits are set on each iteration depending on the current value.
|
||||
bJoinStart
|
||||
bJoinMid
|
||||
bJoinEnd
|
||||
bVirama
|
||||
bLatinSmallL
|
||||
bGreek
|
||||
bHebrew
|
||||
|
||||
// These bits indicated which of the permanent bits need to be set at the
|
||||
// end of the checks.
|
||||
bMustHaveJapn
|
||||
|
||||
permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
|
||||
)
|
||||
|
||||
const finalShift = 10
|
||||
|
||||
var errContext = errors.New("precis: contextual rule violated")
|
||||
|
||||
func init() {
|
||||
// Programmatically set these required bits as, manually setting them seems
|
||||
// too error prone.
|
||||
for i, ct := range categoryTransitions {
|
||||
categoryTransitions[i].keep |= permanent
|
||||
categoryTransitions[i].accept |= ct.term
|
||||
}
|
||||
}
|
||||
|
||||
var categoryTransitions = []struct {
|
||||
keep catBitmap // mask selecting which bits to keep from the previous state
|
||||
set catBitmap // mask for which bits to set for this transition
|
||||
|
||||
// These bitmaps are used for rules that require lookahead.
|
||||
// term&accept == term must be true, which is enforced programmatically.
|
||||
term catBitmap // bits accepted as termination condition
|
||||
accept catBitmap // bits that pass, but not sufficient as termination
|
||||
|
||||
// The rule function cannot take a *context as an argument, as it would
|
||||
// cause the context to escape, adding significant overhead.
|
||||
rule func(beforeBits catBitmap) (doLookahead bool, err error)
|
||||
}{
|
||||
joiningL: {set: bJoinStart},
|
||||
joiningD: {set: bJoinStart | bJoinEnd},
|
||||
joiningT: {keep: bJoinStart, set: bJoinMid},
|
||||
joiningR: {set: bJoinEnd},
|
||||
viramaModifier: {set: bVirama},
|
||||
viramaJoinT: {set: bVirama | bJoinMid},
|
||||
latinSmallL: {set: bLatinSmallL},
|
||||
greek: {set: bGreek},
|
||||
greekJoinT: {set: bGreek | bJoinMid},
|
||||
hebrew: {set: bHebrew},
|
||||
hebrewJoinT: {set: bHebrew | bJoinMid},
|
||||
japanese: {set: bJapanese},
|
||||
katakanaMiddleDot: {set: bMustHaveJapn},
|
||||
|
||||
zeroWidthNonJoiner: {
|
||||
term: bJoinEnd,
|
||||
accept: bJoinMid,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama != 0 {
|
||||
return false, nil
|
||||
}
|
||||
if before&bJoinStart == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
zeroWidthJoiner: {
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
middleDot: {
|
||||
term: bLatinSmallL,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bLatinSmallL == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
greekLowerNumeralSign: {
|
||||
set: bGreek,
|
||||
term: bGreek,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
hebrewPreceding: {
|
||||
set: bHebrew,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bHebrew == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
arabicIndicDigit: {
|
||||
set: bArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bExtendedArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
extendedArabicIndicDigit: {
|
||||
set: bExtendedArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
}
|
||||
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
Normal file
14
vendor/golang.org/x/text/secure/precis/doc.go
generated
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package precis contains types and functions for the preparation,
|
||||
// enforcement, and comparison of internationalized strings ("PRECIS") as
|
||||
// defined in RFC 8264. It also contains several pre-defined profiles for
|
||||
// passwords, nicknames, and usernames as defined in RFC 8265 and RFC 8266.
|
||||
//
|
||||
// BE ADVISED: This package is under construction and the API may change in
|
||||
// backwards incompatible ways and without notice.
|
||||
package precis // import "golang.org/x/text/secure/precis"
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
||||
72
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
Normal file
72
vendor/golang.org/x/text/secure/precis/nickname.go
generated
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type nickAdditionalMapping struct {
|
||||
// TODO: This transformer needs to be stateless somehow…
|
||||
notStart bool
|
||||
prevSpace bool
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Reset() {
|
||||
t.prevSpace = false
|
||||
t.notStart = false
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
// RFC 8266 §2.1. Rules
|
||||
//
|
||||
// 2. Additional Mapping Rule: The additional mapping rule consists of
|
||||
// the following sub-rules.
|
||||
//
|
||||
// a. Map any instances of non-ASCII space to SPACE (U+0020); a
|
||||
// non-ASCII space is any Unicode code point having a general
|
||||
// category of "Zs", naturally with the exception of SPACE
|
||||
// (U+0020). (The inclusion of only ASCII space prevents
|
||||
// confusion with various non-ASCII space code points, many of
|
||||
// which are difficult to reproduce across different input
|
||||
// methods.)
|
||||
//
|
||||
// b. Remove any instances of the ASCII space character at the
|
||||
// beginning or end of a nickname (e.g., "stpeter " is mapped to
|
||||
// "stpeter").
|
||||
//
|
||||
// c. Map interior sequences of more than one ASCII space character
|
||||
// to a single ASCII space character (e.g., "St Peter" is
|
||||
// mapped to "St Peter").
|
||||
for nSrc < len(src) {
|
||||
r, size := utf8.DecodeRune(src[nSrc:])
|
||||
if size == 0 { // Incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1
|
||||
}
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
t.prevSpace = true
|
||||
} else {
|
||||
if t.prevSpace && t.notStart {
|
||||
dst[nDst] = ' '
|
||||
nDst += 1
|
||||
}
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
nDst += size
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
t.prevSpace = false
|
||||
t.notStart = true
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
157
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
Normal file
157
vendor/golang.org/x/text/secure/precis/options.go
generated
vendored
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// An Option is used to define the behavior and rules of a Profile.
|
||||
type Option func(*options)
|
||||
|
||||
type options struct {
|
||||
// Preparation options
|
||||
foldWidth bool
|
||||
|
||||
// Enforcement options
|
||||
asciiLower bool
|
||||
cases transform.SpanningTransformer
|
||||
disallow runes.Set
|
||||
norm transform.SpanningTransformer
|
||||
additional []func() transform.SpanningTransformer
|
||||
width transform.SpanningTransformer
|
||||
disallowEmpty bool
|
||||
bidiRule bool
|
||||
repeat bool
|
||||
|
||||
// Comparison options
|
||||
ignorecase bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
f(&res)
|
||||
}
|
||||
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
|
||||
// down the road.
|
||||
if res.norm == nil {
|
||||
res.norm = norm.NFC
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
// The IgnoreCase option causes the profile to perform a case insensitive
|
||||
// comparison during the PRECIS comparison step.
|
||||
IgnoreCase Option = ignoreCase
|
||||
|
||||
// The FoldWidth option causes the profile to map non-canonical wide and
|
||||
// narrow variants to their decomposition mapping. This is useful for
|
||||
// profiles that are based on the identifier class which would otherwise
|
||||
// disallow such characters.
|
||||
FoldWidth Option = foldWidth
|
||||
|
||||
// The DisallowEmpty option causes the enforcement step to return an error if
|
||||
// the resulting string would be empty.
|
||||
DisallowEmpty Option = disallowEmpty
|
||||
|
||||
// The BidiRule option causes the Bidi Rule defined in RFC 5893 to be
|
||||
// applied.
|
||||
BidiRule Option = bidiRule
|
||||
)
|
||||
|
||||
var (
|
||||
ignoreCase = func(o *options) {
|
||||
o.ignorecase = true
|
||||
}
|
||||
foldWidth = func(o *options) {
|
||||
o.foldWidth = true
|
||||
}
|
||||
disallowEmpty = func(o *options) {
|
||||
o.disallowEmpty = true
|
||||
}
|
||||
bidiRule = func(o *options) {
|
||||
o.bidiRule = true
|
||||
}
|
||||
repeat = func(o *options) {
|
||||
o.repeat = true
|
||||
}
|
||||
)
|
||||
|
||||
// TODO: move this logic to package transform
|
||||
|
||||
type spanWrap struct{ transform.Transformer }
|
||||
|
||||
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
// TODO: allow different types? For instance:
|
||||
// func() transform.Transformer
|
||||
// func() transform.SpanningTransformer
|
||||
// func([]byte) bool // validation only
|
||||
//
|
||||
// Also, would be great if we could detect if a transformer is reentrant.
|
||||
|
||||
// The AdditionalMapping option defines the additional mapping rule for the
|
||||
// Profile by applying Transformer's in sequence.
|
||||
func AdditionalMapping(t ...func() transform.Transformer) Option {
|
||||
return func(o *options) {
|
||||
for _, f := range t {
|
||||
sf := func() transform.SpanningTransformer {
|
||||
return f().(transform.SpanningTransformer)
|
||||
}
|
||||
if _, ok := f().(transform.SpanningTransformer); !ok {
|
||||
sf = func() transform.SpanningTransformer {
|
||||
return spanWrap{f()}
|
||||
}
|
||||
}
|
||||
o.additional = append(o.additional, sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The Norm option defines a Profile's normalization rule. Defaults to NFC.
|
||||
func Norm(f norm.Form) Option {
|
||||
return func(o *options) {
|
||||
o.norm = f
|
||||
}
|
||||
}
|
||||
|
||||
// The FoldCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func FoldCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
o.cases = cases.Fold(opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The LowerCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func LowerCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
if len(opts) == 0 {
|
||||
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
return
|
||||
}
|
||||
|
||||
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
|
||||
o.cases = cases.Lower(language.Und, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The Disallow option further restricts a Profile's allowed characters beyond
|
||||
// what is disallowed by the underlying string class.
|
||||
func Disallow(set runes.Set) Option {
|
||||
return func(o *options) {
|
||||
o.disallow = set
|
||||
}
|
||||
}
|
||||
412
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
Normal file
412
vendor/golang.org/x/text/secure/precis/profile.go
generated
vendored
Normal file
|
|
@ -0,0 +1,412 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/secure/bidirule"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/width"
|
||||
)
|
||||
|
||||
var (
|
||||
errDisallowedRune = errors.New("precis: disallowed rune encountered")
|
||||
)
|
||||
|
||||
var dpTrie = newDerivedPropertiesTrie(0)
|
||||
|
||||
// A Profile represents a set of rules for normalizing and validating strings in
|
||||
// the PRECIS framework.
|
||||
type Profile struct {
|
||||
options
|
||||
class *class
|
||||
}
|
||||
|
||||
// NewIdentifier creates a new PRECIS profile based on the Identifier string
|
||||
// class. Profiles created from this class are suitable for use where safety is
|
||||
// prioritized over expressiveness like network identifiers, user accounts, chat
|
||||
// rooms, and file names.
|
||||
func NewIdentifier(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: identifier,
|
||||
}
|
||||
}
|
||||
|
||||
// NewFreeform creates a new PRECIS profile based on the Freeform string class.
|
||||
// Profiles created from this class are suitable for use where expressiveness is
|
||||
// prioritized over safety like passwords, and display-elements such as
|
||||
// nicknames in a chat room.
|
||||
func NewFreeform(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: freeform,
|
||||
}
|
||||
}
|
||||
|
||||
// NewRestrictedProfile creates a new PRECIS profile based on an existing
|
||||
// profile.
|
||||
// If the parent profile already had the Disallow option set, the new rule
|
||||
// overrides the parents rule.
|
||||
func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile {
|
||||
p := *parent
|
||||
Disallow(disallow)(&p.options)
|
||||
return &p
|
||||
}
|
||||
|
||||
// NewTransformer creates a new transform.Transformer that performs the PRECIS
|
||||
// preparation and enforcement steps on the given UTF-8 encoded bytes.
|
||||
func (p *Profile) NewTransformer() *Transformer {
|
||||
var ts []transform.Transformer
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc7564#section-7
|
||||
|
||||
// RFC 8266 §2.1:
|
||||
//
|
||||
// Implementation experience has shown that applying the rules for the
|
||||
// Nickname profile is not an idempotent procedure for all code points.
|
||||
// Therefore, an implementation SHOULD apply the rules repeatedly until
|
||||
// the output string is stable; if the output string does not stabilize
|
||||
// after reapplying the rules three (3) additional times after the first
|
||||
// application, the implementation SHOULD terminate application of the
|
||||
// rules and reject the input string as invalid.
|
||||
//
|
||||
// There is no known string that will change indefinitely, so repeat 4 times
|
||||
// and rely on the Span method to keep things relatively performant.
|
||||
r := 1
|
||||
if p.options.repeat {
|
||||
r = 4
|
||||
}
|
||||
for ; r > 0; r-- {
|
||||
if p.options.foldWidth {
|
||||
ts = append(ts, width.Fold)
|
||||
}
|
||||
|
||||
for _, f := range p.options.additional {
|
||||
ts = append(ts, f())
|
||||
}
|
||||
|
||||
if p.options.cases != nil {
|
||||
ts = append(ts, p.options.cases)
|
||||
}
|
||||
|
||||
ts = append(ts, p.options.norm)
|
||||
|
||||
if p.options.bidiRule {
|
||||
ts = append(ts, bidirule.New())
|
||||
}
|
||||
|
||||
ts = append(ts, &checker{p: p, allowed: p.Allowed()})
|
||||
}
|
||||
|
||||
// TODO: Add the disallow empty rule with a dummy transformer?
|
||||
|
||||
return &Transformer{transform.Chain(ts...)}
|
||||
}
|
||||
|
||||
var errEmptyString = errors.New("precis: transformation resulted in empty string")
|
||||
|
||||
type buffers struct {
|
||||
src []byte
|
||||
buf [2][]byte
|
||||
next int
|
||||
}
|
||||
|
||||
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
|
||||
n, err := t.Span(b.src, true)
|
||||
if err != transform.ErrEndOfSpan {
|
||||
return err
|
||||
}
|
||||
x := b.next & 1
|
||||
if b.buf[x] == nil {
|
||||
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
|
||||
}
|
||||
span := append(b.buf[x][:0], b.src[:n]...)
|
||||
b.src, _, err = transform.Append(t, span, b.src[n:])
|
||||
b.buf[x] = b.src
|
||||
b.next++
|
||||
return err
|
||||
}
|
||||
|
||||
// Pre-allocate transformers when possible. In some cases this avoids allocation.
|
||||
var (
|
||||
foldWidthT transform.SpanningTransformer = width.Fold
|
||||
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
)
|
||||
|
||||
// TODO: make this a method on profile.
|
||||
|
||||
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
|
||||
b.src = src
|
||||
|
||||
ascii := true
|
||||
for _, c := range src {
|
||||
if c >= utf8.RuneSelf {
|
||||
ascii = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// ASCII fast path.
|
||||
if ascii {
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case p.options.asciiLower || (comparing && p.options.ignorecase):
|
||||
for i, c := range b.src {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b.src[i] = c ^ 1<<5
|
||||
}
|
||||
}
|
||||
case p.options.cases != nil:
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for _, c := range b.src {
|
||||
if p.disallow.Contains(rune(c)) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc8264#section-7
|
||||
|
||||
r := 1
|
||||
if p.options.repeat {
|
||||
r = 4
|
||||
}
|
||||
for ; r > 0; r-- {
|
||||
// TODO: allow different width transforms options.
|
||||
if p.options.foldWidth || (p.options.ignorecase && comparing) {
|
||||
b.apply(foldWidthT)
|
||||
}
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if p.options.cases != nil {
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
if comparing && p.options.ignorecase {
|
||||
b.apply(lowerCaseT)
|
||||
}
|
||||
b.apply(p.norm)
|
||||
if p.options.bidiRule && !bidirule.Valid(b.src) {
|
||||
return nil, bidirule.ErrInvalid
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for i := 0; i < len(b.src); {
|
||||
r, size := utf8.DecodeRune(b.src[i:])
|
||||
if p.disallow.Contains(r) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
i += size
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// Append appends the result of applying p to src writing the result to dst.
|
||||
// It returns an error if the input string is invalid.
|
||||
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, b, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if buf.next == 0 {
|
||||
c := make([]byte, len(b))
|
||||
copy(c, b)
|
||||
return c, nil
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying the profile to b.
|
||||
func (p *Profile) Bytes(b []byte) ([]byte, error) {
|
||||
return processBytes(p, b, false)
|
||||
}
|
||||
|
||||
// AppendCompareKey appends the result of applying p to src (including any
|
||||
// optional rules to make strings comparable or useful in a map key such as
|
||||
// applying lowercasing) writing the result to dst. It returns an error if the
|
||||
// input string is invalid.
|
||||
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processString(p *Profile, s string, key bool) (string, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, []byte(s), key)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying the profile to s.
|
||||
func (p *Profile) String(s string) (string, error) {
|
||||
return processString(p, s, false)
|
||||
}
|
||||
|
||||
// CompareKey returns a string that can be used for comparison, hashing, or
|
||||
// collation.
|
||||
func (p *Profile) CompareKey(s string) (string, error) {
|
||||
return processString(p, s, true)
|
||||
}
|
||||
|
||||
// Compare enforces both strings, and then compares them for bit-string identity
|
||||
// (byte-for-byte equality). If either string cannot be enforced, the comparison
|
||||
// is false.
|
||||
func (p *Profile) Compare(a, b string) bool {
|
||||
var buf buffers
|
||||
|
||||
akey, err := buf.enforce(p, []byte(a), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
buf = buffers{}
|
||||
bkey, err := buf.enforce(p, []byte(b), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Compare(akey, bkey) == 0
|
||||
}
|
||||
|
||||
// Allowed returns a runes.Set containing every rune that is a member of the
|
||||
// underlying profile's string class and not disallowed by any profile specific
|
||||
// rules.
|
||||
func (p *Profile) Allowed() runes.Set {
|
||||
if p.options.disallow != nil {
|
||||
return runes.Predicate(func(r rune) bool {
|
||||
return p.class.Contains(r) && !p.options.disallow.Contains(r)
|
||||
})
|
||||
}
|
||||
return p.class
|
||||
}
|
||||
|
||||
type checker struct {
|
||||
p *Profile
|
||||
allowed runes.Set
|
||||
|
||||
beforeBits catBitmap
|
||||
termBits catBitmap
|
||||
acceptBits catBitmap
|
||||
}
|
||||
|
||||
func (c *checker) Reset() {
|
||||
c.beforeBits = 0
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
}
|
||||
|
||||
func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
e, sz := dpTrie.lookup(src[n:])
|
||||
d := categoryTransitions[category(e&catMask)]
|
||||
if sz == 0 {
|
||||
if !atEOF {
|
||||
return n, transform.ErrShortSrc
|
||||
}
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
doLookAhead := false
|
||||
if property(e) < c.p.class.validFrom {
|
||||
if d.rule == nil {
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
doLookAhead, err = d.rule(c.beforeBits)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
c.beforeBits &= d.keep
|
||||
c.beforeBits |= d.set
|
||||
if c.termBits != 0 {
|
||||
// We are currently in an unterminated lookahead.
|
||||
if c.beforeBits&c.termBits != 0 {
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
} else if c.beforeBits&c.acceptBits == 0 {
|
||||
// Invalid continuation of the unterminated lookahead sequence.
|
||||
return n, errContext
|
||||
}
|
||||
}
|
||||
if doLookAhead {
|
||||
if c.termBits != 0 {
|
||||
// A previous lookahead run has not been terminated yet.
|
||||
return n, errContext
|
||||
}
|
||||
c.termBits = d.term
|
||||
c.acceptBits = d.accept
|
||||
}
|
||||
n += sz
|
||||
}
|
||||
if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// TODO: we may get rid of this transform if transform.Chain understands
|
||||
// something like a Spanner interface.
|
||||
func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
short := false
|
||||
if len(dst) < len(src) {
|
||||
src = src[:len(dst)]
|
||||
atEOF = false
|
||||
short = true
|
||||
}
|
||||
nSrc, err = c.span(src, atEOF)
|
||||
nDst = copy(dst, src[:nSrc])
|
||||
if short && (err == transform.ErrShortSrc || err == nil) {
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
Normal file
78
vendor/golang.org/x/text/secure/precis/profiles.go
generated
vendored
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var (
|
||||
// Implements the Nickname profile specified in RFC 8266.
|
||||
Nickname *Profile = nickname
|
||||
|
||||
// Implements the UsernameCaseMapped profile specified in RFC 8265.
|
||||
UsernameCaseMapped *Profile = usernameCaseMap
|
||||
|
||||
// Implements the UsernameCasePreserved profile specified in RFC 8265.
|
||||
UsernameCasePreserved *Profile = usernameNoCaseMap
|
||||
|
||||
// Implements the OpaqueString profile defined in RFC 8265 for passwords and
|
||||
// other secure labels.
|
||||
OpaqueString *Profile = opaquestring
|
||||
)
|
||||
|
||||
var (
|
||||
nickname = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return &nickAdditionalMapping{}
|
||||
}),
|
||||
IgnoreCase,
|
||||
Norm(norm.NFKC),
|
||||
DisallowEmpty,
|
||||
repeat,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
usernameCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
LowerCase(),
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
usernameNoCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
opaquestring = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return mapSpaces
|
||||
}),
|
||||
Norm(norm.NFC),
|
||||
DisallowEmpty,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
)
|
||||
|
||||
// mapSpaces is a shared value of a runes.Map transformer.
|
||||
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
return ' '
|
||||
}
|
||||
return r
|
||||
})
|
||||
3890
vendor/golang.org/x/text/secure/precis/tables10.0.0.go
generated
vendored
Normal file
3890
vendor/golang.org/x/text/secure/precis/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4017
vendor/golang.org/x/text/secure/precis/tables11.0.0.go
generated
vendored
Normal file
4017
vendor/golang.org/x/text/secure/precis/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4119
vendor/golang.org/x/text/secure/precis/tables12.0.0.go
generated
vendored
Normal file
4119
vendor/golang.org/x/text/secure/precis/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
4153
vendor/golang.org/x/text/secure/precis/tables13.0.0.go
generated
vendored
Normal file
4153
vendor/golang.org/x/text/secure/precis/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
3791
vendor/golang.org/x/text/secure/precis/tables9.0.0.go
generated
vendored
Normal file
3791
vendor/golang.org/x/text/secure/precis/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
Normal file
32
vendor/golang.org/x/text/secure/precis/transformer.go
generated
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.Transformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
||||
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
Normal file
64
vendor/golang.org/x/text/secure/precis/trieval.go
generated
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package precis
|
||||
|
||||
// entry is the entry of a trie table
|
||||
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||
// 5..0 category
|
||||
type entry uint8
|
||||
|
||||
const (
|
||||
propShift = 6
|
||||
propMask = 0xc0
|
||||
catMask = 0x3f
|
||||
)
|
||||
|
||||
func (e entry) property() property { return property(e & propMask) }
|
||||
func (e entry) category() category { return category(e & catMask) }
|
||||
|
||||
type property uint8
|
||||
|
||||
// The order of these constants matter. A Profile may consider runes to be
|
||||
// allowed either from pValid or idDisOrFreePVal.
|
||||
const (
|
||||
unassigned property = iota << propShift
|
||||
disallowed
|
||||
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||
pValid
|
||||
)
|
||||
|
||||
// compute permutations of all properties and specialCategories.
|
||||
type category uint8
|
||||
|
||||
const (
|
||||
other category = iota
|
||||
|
||||
// Special rune types
|
||||
joiningL
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
viramaModifier
|
||||
viramaJoinT // Virama + JoiningT
|
||||
latinSmallL // U+006c
|
||||
greek
|
||||
greekJoinT // Greek + JoiningT
|
||||
hebrew
|
||||
hebrewJoinT // Hebrew + JoiningT
|
||||
japanese // hirigana, katakana, han
|
||||
|
||||
// Special rune types associated with contextual rules defined in
|
||||
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||
// ContextO
|
||||
zeroWidthNonJoiner // rule 1
|
||||
zeroWidthJoiner // rule 2
|
||||
// ContextJ
|
||||
middleDot // rule 3
|
||||
greekLowerNumeralSign // rule 4
|
||||
hebrewPreceding // rule 5 and 6
|
||||
katakanaMiddleDot // rule 7
|
||||
arabicIndicDigit // rule 8
|
||||
extendedArabicIndicDigit // rule 9
|
||||
|
||||
numCategories
|
||||
)
|
||||
28
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
Normal file
28
vendor/golang.org/x/text/width/kind_string.go
generated
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Code generated by "stringer -type=Kind"; DO NOT EDIT.
|
||||
|
||||
package width
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[Neutral-0]
|
||||
_ = x[EastAsianAmbiguous-1]
|
||||
_ = x[EastAsianWide-2]
|
||||
_ = x[EastAsianNarrow-3]
|
||||
_ = x[EastAsianFullwidth-4]
|
||||
_ = x[EastAsianHalfwidth-5]
|
||||
}
|
||||
|
||||
const _Kind_name = "NeutralEastAsianAmbiguousEastAsianWideEastAsianNarrowEastAsianFullwidthEastAsianHalfwidth"
|
||||
|
||||
var _Kind_index = [...]uint8{0, 7, 25, 38, 53, 71, 89}
|
||||
|
||||
func (i Kind) String() string {
|
||||
if i < 0 || i >= Kind(len(_Kind_index)-1) {
|
||||
return "Kind(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _Kind_name[_Kind_index[i]:_Kind_index[i+1]]
|
||||
}
|
||||
1319
vendor/golang.org/x/text/width/tables10.0.0.go
generated
vendored
Normal file
1319
vendor/golang.org/x/text/width/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1331
vendor/golang.org/x/text/width/tables11.0.0.go
generated
vendored
Normal file
1331
vendor/golang.org/x/text/width/tables11.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1351
vendor/golang.org/x/text/width/tables12.0.0.go
generated
vendored
Normal file
1351
vendor/golang.org/x/text/width/tables12.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1352
vendor/golang.org/x/text/width/tables13.0.0.go
generated
vendored
Normal file
1352
vendor/golang.org/x/text/width/tables13.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1287
vendor/golang.org/x/text/width/tables9.0.0.go
generated
vendored
Normal file
1287
vendor/golang.org/x/text/width/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
Normal file
239
vendor/golang.org/x/text/width/transform.go
generated
vendored
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package width
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type foldTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (foldTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if elem(v)&tagNeedsFold != 0 {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (foldTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if elem(v)&tagNeedsFold == 0 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type narrowTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (narrowTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
if src[n] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
for n++; n < len(src) && src[n] < utf8.RuneSelf; n++ {
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (narrowTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
// ASCII fast path.
|
||||
start, end := nSrc, len(src)
|
||||
if d := len(dst) - nDst; d < end-start {
|
||||
end = nSrc + d
|
||||
}
|
||||
for nSrc++; nSrc < end && src[nSrc] < utf8.RuneSelf; nSrc++ {
|
||||
}
|
||||
n := copy(dst[nDst:], src[start:nSrc])
|
||||
if nDst += n; nDst == len(dst) {
|
||||
nSrc = start + n
|
||||
if nSrc == len(src) {
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
if src[nSrc] < utf8.RuneSelf {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianFullwidth && k != EastAsianWide && k != EastAsianAmbiguous {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
|
||||
type wideTransform struct {
|
||||
transform.NopResetter
|
||||
}
|
||||
|
||||
func (wideTransform) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[n:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
n = len(src)
|
||||
}
|
||||
break
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (wideTransform) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// TODO: Consider ASCII fast path. Special-casing ASCII handling can
|
||||
// reduce the ns/op of BenchmarkWideASCII by about 30%. This is probably
|
||||
// not enough to warrant the extra code and complexity.
|
||||
v, size := trie.lookup(src[nSrc:])
|
||||
if size == 0 { // incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1 // gobble 1 byte
|
||||
}
|
||||
if k := elem(v).kind(); byte(v) == 0 || k != EastAsianHalfwidth && k != EastAsianNarrow {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
} else {
|
||||
data := inverseData[byte(v)]
|
||||
if len(dst)-nDst < int(data[0]) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
i := 1
|
||||
for end := int(data[0]); i < end; i++ {
|
||||
dst[nDst] = data[i]
|
||||
nDst++
|
||||
}
|
||||
dst[nDst] = data[i] ^ src[nSrc+size-1]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
||||
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
Normal file
30
vendor/golang.org/x/text/width/trieval.go
generated
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package width
|
||||
|
||||
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||
// the inverseData array.
|
||||
type elem uint16
|
||||
|
||||
const (
|
||||
tagNeutral elem = iota << typeShift
|
||||
tagAmbiguous
|
||||
tagWide
|
||||
tagNarrow
|
||||
tagFullwidth
|
||||
tagHalfwidth
|
||||
)
|
||||
|
||||
const (
|
||||
numTypeBits = 3
|
||||
typeShift = 16 - numTypeBits
|
||||
|
||||
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||
// the Won sign U+20A9.
|
||||
tagNeedsFold = 0x1000
|
||||
|
||||
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||
// variant.
|
||||
wonSign rune = 0x20A9
|
||||
)
|
||||
206
vendor/golang.org/x/text/width/width.go
generated
vendored
Normal file
206
vendor/golang.org/x/text/width/width.go
generated
vendored
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate stringer -type=Kind
|
||||
//go:generate go run gen.go gen_common.go gen_trieval.go
|
||||
|
||||
// Package width provides functionality for handling different widths in text.
|
||||
//
|
||||
// Wide characters behave like ideographs; they tend to allow line breaks after
|
||||
// each character and remain upright in vertical text layout. Narrow characters
|
||||
// are kept together in words or runs that are rotated sideways in vertical text
|
||||
// layout.
|
||||
//
|
||||
// For more information, see https://unicode.org/reports/tr11/.
|
||||
package width // import "golang.org/x/text/width"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO
|
||||
// 1) Reduce table size by compressing blocks.
|
||||
// 2) API proposition for computing display length
|
||||
// (approximation, fixed pitch only).
|
||||
// 3) Implement display length.
|
||||
|
||||
// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
|
||||
type Kind int
|
||||
|
||||
const (
|
||||
// Neutral characters do not occur in legacy East Asian character sets.
|
||||
Neutral Kind = iota
|
||||
|
||||
// EastAsianAmbiguous characters that can be sometimes wide and sometimes
|
||||
// narrow and require additional information not contained in the character
|
||||
// code to further resolve their width.
|
||||
EastAsianAmbiguous
|
||||
|
||||
// EastAsianWide characters are wide in its usual form. They occur only in
|
||||
// the context of East Asian typography. These runes may have explicit
|
||||
// halfwidth counterparts.
|
||||
EastAsianWide
|
||||
|
||||
// EastAsianNarrow characters are narrow in its usual form. They often have
|
||||
// fullwidth counterparts.
|
||||
EastAsianNarrow
|
||||
|
||||
// Note: there exist Narrow runes that do not have fullwidth or wide
|
||||
// counterparts, despite what the definition says (e.g. U+27E6).
|
||||
|
||||
// EastAsianFullwidth characters have a compatibility decompositions of type
|
||||
// wide that map to a narrow counterpart.
|
||||
EastAsianFullwidth
|
||||
|
||||
// EastAsianHalfwidth characters have a compatibility decomposition of type
|
||||
// narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
|
||||
// SIGN.
|
||||
EastAsianHalfwidth
|
||||
|
||||
// Note: there exist runes that have a halfwidth counterparts but that are
|
||||
// classified as Ambiguous, rather than wide (e.g. U+2190).
|
||||
)
|
||||
|
||||
// TODO: the generated tries need to return size 1 for invalid runes for the
|
||||
// width to be computed correctly (each byte should render width 1)
|
||||
|
||||
var trie = newWidthTrie(0)
|
||||
|
||||
// Lookup reports the Properties of the first rune in b and the number of bytes
|
||||
// of its UTF-8 encoding.
|
||||
func Lookup(b []byte) (p Properties, size int) {
|
||||
v, sz := trie.lookup(b)
|
||||
return Properties{elem(v), b[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupString reports the Properties of the first rune in s and the number of
|
||||
// bytes of its UTF-8 encoding.
|
||||
func LookupString(s string) (p Properties, size int) {
|
||||
v, sz := trie.lookupString(s)
|
||||
return Properties{elem(v), s[sz-1]}, sz
|
||||
}
|
||||
|
||||
// LookupRune reports the Properties of rune r.
|
||||
func LookupRune(r rune) Properties {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
v, _ := trie.lookup(buf[:n])
|
||||
last := byte(r)
|
||||
if r >= utf8.RuneSelf {
|
||||
last = 0x80 + byte(r&0x3f)
|
||||
}
|
||||
return Properties{elem(v), last}
|
||||
}
|
||||
|
||||
// Properties provides access to width properties of a rune.
|
||||
type Properties struct {
|
||||
elem elem
|
||||
last byte
|
||||
}
|
||||
|
||||
func (e elem) kind() Kind {
|
||||
return Kind(e >> typeShift)
|
||||
}
|
||||
|
||||
// Kind returns the Kind of a rune as defined in Unicode TR #11.
|
||||
// See https://unicode.org/reports/tr11/ for more details.
|
||||
func (p Properties) Kind() Kind {
|
||||
return p.elem.kind()
|
||||
}
|
||||
|
||||
// Folded returns the folded variant of a rune or 0 if the rune is canonical.
|
||||
func (p Properties) Folded() rune {
|
||||
if p.elem&tagNeedsFold != 0 {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Narrow returns the narrow variant of a rune or 0 if the rune is already
|
||||
// narrow or doesn't have a narrow variant.
|
||||
func (p Properties) Narrow() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Wide returns the wide variant of a rune or 0 if the rune is already
|
||||
// wide or doesn't have a wide variant.
|
||||
func (p Properties) Wide() rune {
|
||||
if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
|
||||
buf := inverseData[byte(p.elem)]
|
||||
buf[buf[0]] ^= p.last
|
||||
r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
|
||||
return r
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// TODO for Properties:
|
||||
// - Add Fullwidth/Halfwidth or Inverted methods for computing variants
|
||||
// mapping.
|
||||
// - Add width information (including information on non-spacing runes).
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
var (
|
||||
// Fold is a transform that maps all runes to their canonical width.
|
||||
//
|
||||
// Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
|
||||
// provide a more generic folding mechanism.
|
||||
Fold Transformer = Transformer{foldTransform{}}
|
||||
|
||||
// Widen is a transform that maps runes to their wide variant, if
|
||||
// available.
|
||||
Widen Transformer = Transformer{wideTransform{}}
|
||||
|
||||
// Narrow is a transform that maps runes to their narrow variant, if
|
||||
// available.
|
||||
Narrow Transformer = Transformer{narrowTransform{}}
|
||||
)
|
||||
|
||||
// TODO: Consider the following options:
|
||||
// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
|
||||
// generalized variant of this.
|
||||
// - Consider a wide Won character to be the default width (or some generalized
|
||||
// variant of this).
|
||||
// - Filter the set of characters that gets converted (the preferred approach is
|
||||
// to allow applying filters to transforms).
|
||||
Loading…
Add table
Add a link
Reference in a new issue