mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 10:02:26 -05:00 
			
		
		
		
	
		
			
	
	
		
			229 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			229 lines
		
	
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|  | // Copyright 2013 The Go Authors. All rights reserved. | ||
|  | // Use of this source code is governed by a BSD-style | ||
|  | // license that can be found in the LICENSE file. | ||
|  | 
 | ||
|  | package language | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"errors" | ||
|  | 	"strconv" | ||
|  | 	"strings" | ||
|  | 
 | ||
|  | 	"golang.org/x/text/internal/language" | ||
|  | ) | ||
|  | 
 | ||
|  | // ValueError is returned by any of the parsing functions when the | ||
|  | // input is well-formed but the respective subtag is not recognized | ||
|  | // as a valid value. | ||
|  | type ValueError interface { | ||
|  | 	error | ||
|  | 
 | ||
|  | 	// Subtag returns the subtag for which the error occurred. | ||
|  | 	Subtag() string | ||
|  | } | ||
|  | 
 | ||
|  | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing | ||
|  | // failed it returns an error and any part of the tag that could be parsed. | ||
|  | // If parsing succeeded but an unknown value was found, it returns | ||
|  | // ValueError. The Tag returned in this case is just stripped of the unknown | ||
|  | // value. All other values are preserved. It accepts tags in the BCP 47 format | ||
|  | // and extensions to this standard defined in | ||
|  | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | ||
|  | // The resulting tag is canonicalized using the default canonicalization type. | ||
|  | func Parse(s string) (t Tag, err error) { | ||
|  | 	return Default.Parse(s) | ||
|  | } | ||
|  | 
 | ||
|  | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing | ||
|  | // failed it returns an error and any part of the tag that could be parsed. | ||
|  | // If parsing succeeded but an unknown value was found, it returns | ||
|  | // ValueError. The Tag returned in this case is just stripped of the unknown | ||
|  | // value. All other values are preserved. It accepts tags in the BCP 47 format | ||
|  | // and extensions to this standard defined in | ||
|  | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | ||
|  | // The resulting tag is canonicalized using the canonicalization type c. | ||
|  | func (c CanonType) Parse(s string) (t Tag, err error) { | ||
|  | 	tt, err := language.Parse(s) | ||
|  | 	if err != nil { | ||
|  | 		return makeTag(tt), err | ||
|  | 	} | ||
|  | 	tt, changed := canonicalize(c, tt) | ||
|  | 	if changed { | ||
|  | 		tt.RemakeString() | ||
|  | 	} | ||
|  | 	return makeTag(tt), err | ||
|  | } | ||
|  | 
 | ||
|  | // Compose creates a Tag from individual parts, which may be of type Tag, Base, | ||
|  | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a | ||
|  | // Base, Script or Region or slice of type Variant or Extension is passed more | ||
|  | // than once, the latter will overwrite the former. Variants and Extensions are | ||
|  | // accumulated, but if two extensions of the same type are passed, the latter | ||
|  | // will replace the former. For -u extensions, though, the key-type pairs are | ||
|  | // added, where later values overwrite older ones. A Tag overwrites all former | ||
|  | // values and typically only makes sense as the first argument. The resulting | ||
|  | // tag is returned after canonicalizing using the Default CanonType. If one or | ||
|  | // more errors are encountered, one of the errors is returned. | ||
|  | func Compose(part ...interface{}) (t Tag, err error) { | ||
|  | 	return Default.Compose(part...) | ||
|  | } | ||
|  | 
 | ||
|  | // Compose creates a Tag from individual parts, which may be of type Tag, Base, | ||
|  | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a | ||
|  | // Base, Script or Region or slice of type Variant or Extension is passed more | ||
|  | // than once, the latter will overwrite the former. Variants and Extensions are | ||
|  | // accumulated, but if two extensions of the same type are passed, the latter | ||
|  | // will replace the former. For -u extensions, though, the key-type pairs are | ||
|  | // added, where later values overwrite older ones. A Tag overwrites all former | ||
|  | // values and typically only makes sense as the first argument. The resulting | ||
|  | // tag is returned after canonicalizing using CanonType c. If one or more errors | ||
|  | // are encountered, one of the errors is returned. | ||
|  | func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { | ||
|  | 	var b language.Builder | ||
|  | 	if err = update(&b, part...); err != nil { | ||
|  | 		return und, err | ||
|  | 	} | ||
|  | 	b.Tag, _ = canonicalize(c, b.Tag) | ||
|  | 	return makeTag(b.Make()), err | ||
|  | } | ||
|  | 
 | ||
|  | var errInvalidArgument = errors.New("invalid Extension or Variant") | ||
|  | 
 | ||
|  | func update(b *language.Builder, part ...interface{}) (err error) { | ||
|  | 	for _, x := range part { | ||
|  | 		switch v := x.(type) { | ||
|  | 		case Tag: | ||
|  | 			b.SetTag(v.tag()) | ||
|  | 		case Base: | ||
|  | 			b.Tag.LangID = v.langID | ||
|  | 		case Script: | ||
|  | 			b.Tag.ScriptID = v.scriptID | ||
|  | 		case Region: | ||
|  | 			b.Tag.RegionID = v.regionID | ||
|  | 		case Variant: | ||
|  | 			if v.variant == "" { | ||
|  | 				err = errInvalidArgument | ||
|  | 				break | ||
|  | 			} | ||
|  | 			b.AddVariant(v.variant) | ||
|  | 		case Extension: | ||
|  | 			if v.s == "" { | ||
|  | 				err = errInvalidArgument | ||
|  | 				break | ||
|  | 			} | ||
|  | 			b.SetExt(v.s) | ||
|  | 		case []Variant: | ||
|  | 			b.ClearVariants() | ||
|  | 			for _, v := range v { | ||
|  | 				b.AddVariant(v.variant) | ||
|  | 			} | ||
|  | 		case []Extension: | ||
|  | 			b.ClearExtensions() | ||
|  | 			for _, e := range v { | ||
|  | 				b.SetExt(e.s) | ||
|  | 			} | ||
|  | 		// TODO: support parsing of raw strings based on morphology or just extensions? | ||
|  | 		case error: | ||
|  | 			if v != nil { | ||
|  | 				err = v | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 	return | ||
|  | } | ||
|  | 
 | ||
|  | var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") | ||
|  | 
 | ||
|  | // ParseAcceptLanguage parses the contents of an Accept-Language header as | ||
|  | // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and | ||
|  | // a list of corresponding quality weights. It is more permissive than RFC 2616 | ||
|  | // and may return non-nil slices even if the input is not valid. | ||
|  | // The Tags will be sorted by highest weight first and then by first occurrence. | ||
|  | // Tags with a weight of zero will be dropped. An error will be returned if the | ||
|  | // input could not be parsed. | ||
|  | func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { | ||
|  | 	var entry string | ||
|  | 	for s != "" { | ||
|  | 		if entry, s = split(s, ','); entry == "" { | ||
|  | 			continue | ||
|  | 		} | ||
|  | 
 | ||
|  | 		entry, weight := split(entry, ';') | ||
|  | 
 | ||
|  | 		// Scan the language. | ||
|  | 		t, err := Parse(entry) | ||
|  | 		if err != nil { | ||
|  | 			id, ok := acceptFallback[entry] | ||
|  | 			if !ok { | ||
|  | 				return nil, nil, err | ||
|  | 			} | ||
|  | 			t = makeTag(language.Tag{LangID: id}) | ||
|  | 		} | ||
|  | 
 | ||
|  | 		// Scan the optional weight. | ||
|  | 		w := 1.0 | ||
|  | 		if weight != "" { | ||
|  | 			weight = consume(weight, 'q') | ||
|  | 			weight = consume(weight, '=') | ||
|  | 			// consume returns the empty string when a token could not be | ||
|  | 			// consumed, resulting in an error for ParseFloat. | ||
|  | 			if w, err = strconv.ParseFloat(weight, 32); err != nil { | ||
|  | 				return nil, nil, errInvalidWeight | ||
|  | 			} | ||
|  | 			// Drop tags with a quality weight of 0. | ||
|  | 			if w <= 0 { | ||
|  | 				continue | ||
|  | 			} | ||
|  | 		} | ||
|  | 
 | ||
|  | 		tag = append(tag, t) | ||
|  | 		q = append(q, float32(w)) | ||
|  | 	} | ||
|  | 	sortStable(&tagSort{tag, q}) | ||
|  | 	return tag, q, nil | ||
|  | } | ||
|  | 
 | ||
|  | // consume removes a leading token c from s and returns the result or the empty | ||
|  | // string if there is no such token. | ||
|  | func consume(s string, c byte) string { | ||
|  | 	if s == "" || s[0] != c { | ||
|  | 		return "" | ||
|  | 	} | ||
|  | 	return strings.TrimSpace(s[1:]) | ||
|  | } | ||
|  | 
 | ||
|  | func split(s string, c byte) (head, tail string) { | ||
|  | 	if i := strings.IndexByte(s, c); i >= 0 { | ||
|  | 		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) | ||
|  | 	} | ||
|  | 	return strings.TrimSpace(s), "" | ||
|  | } | ||
|  | 
 | ||
|  | // Add hack mapping to deal with a small number of cases that occur | ||
|  | // in Accept-Language (with reasonable frequency). | ||
|  | var acceptFallback = map[string]language.Language{ | ||
|  | 	"english": _en, | ||
|  | 	"deutsch": _de, | ||
|  | 	"italian": _it, | ||
|  | 	"french":  _fr, | ||
|  | 	"*":       _mul, // defined in the spec to match all languages. | ||
|  | } | ||
|  | 
 | ||
|  | type tagSort struct { | ||
|  | 	tag []Tag | ||
|  | 	q   []float32 | ||
|  | } | ||
|  | 
 | ||
|  | func (s *tagSort) Len() int { | ||
|  | 	return len(s.q) | ||
|  | } | ||
|  | 
 | ||
|  | func (s *tagSort) Less(i, j int) bool { | ||
|  | 	return s.q[i] > s.q[j] | ||
|  | } | ||
|  | 
 | ||
|  | func (s *tagSort) Swap(i, j int) { | ||
|  | 	s.tag[i], s.tag[j] = s.tag[j], s.tag[i] | ||
|  | 	s.q[i], s.q[j] = s.q[j], s.q[i] | ||
|  | } |