| 
									
										
										
										
											2021-08-12 21:03:24 +02:00
										 |  |  | // Copyright 2013 The Go Authors. All rights reserved. | 
					
						
							|  |  |  | // Use of this source code is governed by a BSD-style | 
					
						
							|  |  |  | // license that can be found in the LICENSE file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package language | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"errors" | 
					
						
							|  |  |  | 	"strconv" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"golang.org/x/text/internal/language" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ValueError is returned by any of the parsing functions when the | 
					
						
							|  |  |  | // input is well-formed but the respective subtag is not recognized | 
					
						
							|  |  |  | // as a valid value. | 
					
						
							|  |  |  | type ValueError interface { | 
					
						
							|  |  |  | 	error | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Subtag returns the subtag for which the error occurred. | 
					
						
							|  |  |  | 	Subtag() string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing | 
					
						
							|  |  |  | // failed it returns an error and any part of the tag that could be parsed. | 
					
						
							|  |  |  | // If parsing succeeded but an unknown value was found, it returns | 
					
						
							|  |  |  | // ValueError. The Tag returned in this case is just stripped of the unknown | 
					
						
							|  |  |  | // value. All other values are preserved. It accepts tags in the BCP 47 format | 
					
						
							|  |  |  | // and extensions to this standard defined in | 
					
						
							|  |  |  | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | 
					
						
							|  |  |  | // The resulting tag is canonicalized using the default canonicalization type. | 
					
						
							|  |  |  | func Parse(s string) (t Tag, err error) { | 
					
						
							|  |  |  | 	return Default.Parse(s) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Parse parses the given BCP 47 string and returns a valid Tag. If parsing | 
					
						
							|  |  |  | // failed it returns an error and any part of the tag that could be parsed. | 
					
						
							|  |  |  | // If parsing succeeded but an unknown value was found, it returns | 
					
						
							|  |  |  | // ValueError. The Tag returned in this case is just stripped of the unknown | 
					
						
							|  |  |  | // value. All other values are preserved. It accepts tags in the BCP 47 format | 
					
						
							|  |  |  | // and extensions to this standard defined in | 
					
						
							|  |  |  | // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. | 
					
						
							|  |  |  | // The resulting tag is canonicalized using the canonicalization type c. | 
					
						
							|  |  |  | func (c CanonType) Parse(s string) (t Tag, err error) { | 
					
						
							| 
									
										
										
										
											2021-09-10 14:42:14 +02:00
										 |  |  | 	defer func() { | 
					
						
							|  |  |  | 		if recover() != nil { | 
					
						
							|  |  |  | 			t = Tag{} | 
					
						
							|  |  |  | 			err = language.ErrSyntax | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-12 21:03:24 +02:00
										 |  |  | 	tt, err := language.Parse(s) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return makeTag(tt), err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	tt, changed := canonicalize(c, tt) | 
					
						
							|  |  |  | 	if changed { | 
					
						
							|  |  |  | 		tt.RemakeString() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return makeTag(tt), err | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Compose creates a Tag from individual parts, which may be of type Tag, Base, | 
					
						
							|  |  |  | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a | 
					
						
							|  |  |  | // Base, Script or Region or slice of type Variant or Extension is passed more | 
					
						
							|  |  |  | // than once, the latter will overwrite the former. Variants and Extensions are | 
					
						
							|  |  |  | // accumulated, but if two extensions of the same type are passed, the latter | 
					
						
							|  |  |  | // will replace the former. For -u extensions, though, the key-type pairs are | 
					
						
							|  |  |  | // added, where later values overwrite older ones. A Tag overwrites all former | 
					
						
							|  |  |  | // values and typically only makes sense as the first argument. The resulting | 
					
						
							|  |  |  | // tag is returned after canonicalizing using the Default CanonType. If one or | 
					
						
							|  |  |  | // more errors are encountered, one of the errors is returned. | 
					
						
							|  |  |  | func Compose(part ...interface{}) (t Tag, err error) { | 
					
						
							|  |  |  | 	return Default.Compose(part...) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Compose creates a Tag from individual parts, which may be of type Tag, Base, | 
					
						
							|  |  |  | // Script, Region, Variant, []Variant, Extension, []Extension or error. If a | 
					
						
							|  |  |  | // Base, Script or Region or slice of type Variant or Extension is passed more | 
					
						
							|  |  |  | // than once, the latter will overwrite the former. Variants and Extensions are | 
					
						
							|  |  |  | // accumulated, but if two extensions of the same type are passed, the latter | 
					
						
							|  |  |  | // will replace the former. For -u extensions, though, the key-type pairs are | 
					
						
							|  |  |  | // added, where later values overwrite older ones. A Tag overwrites all former | 
					
						
							|  |  |  | // values and typically only makes sense as the first argument. The resulting | 
					
						
							|  |  |  | // tag is returned after canonicalizing using CanonType c. If one or more errors | 
					
						
							|  |  |  | // are encountered, one of the errors is returned. | 
					
						
							|  |  |  | func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { | 
					
						
							| 
									
										
										
										
											2021-09-10 14:42:14 +02:00
										 |  |  | 	defer func() { | 
					
						
							|  |  |  | 		if recover() != nil { | 
					
						
							|  |  |  | 			t = Tag{} | 
					
						
							|  |  |  | 			err = language.ErrSyntax | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-12 21:03:24 +02:00
										 |  |  | 	var b language.Builder | 
					
						
							|  |  |  | 	if err = update(&b, part...); err != nil { | 
					
						
							|  |  |  | 		return und, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.Tag, _ = canonicalize(c, b.Tag) | 
					
						
							|  |  |  | 	return makeTag(b.Make()), err | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var errInvalidArgument = errors.New("invalid Extension or Variant") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func update(b *language.Builder, part ...interface{}) (err error) { | 
					
						
							|  |  |  | 	for _, x := range part { | 
					
						
							|  |  |  | 		switch v := x.(type) { | 
					
						
							|  |  |  | 		case Tag: | 
					
						
							|  |  |  | 			b.SetTag(v.tag()) | 
					
						
							|  |  |  | 		case Base: | 
					
						
							|  |  |  | 			b.Tag.LangID = v.langID | 
					
						
							|  |  |  | 		case Script: | 
					
						
							|  |  |  | 			b.Tag.ScriptID = v.scriptID | 
					
						
							|  |  |  | 		case Region: | 
					
						
							|  |  |  | 			b.Tag.RegionID = v.regionID | 
					
						
							|  |  |  | 		case Variant: | 
					
						
							|  |  |  | 			if v.variant == "" { | 
					
						
							|  |  |  | 				err = errInvalidArgument | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			b.AddVariant(v.variant) | 
					
						
							|  |  |  | 		case Extension: | 
					
						
							|  |  |  | 			if v.s == "" { | 
					
						
							|  |  |  | 				err = errInvalidArgument | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			b.SetExt(v.s) | 
					
						
							|  |  |  | 		case []Variant: | 
					
						
							|  |  |  | 			b.ClearVariants() | 
					
						
							|  |  |  | 			for _, v := range v { | 
					
						
							|  |  |  | 				b.AddVariant(v.variant) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case []Extension: | 
					
						
							|  |  |  | 			b.ClearExtensions() | 
					
						
							|  |  |  | 			for _, e := range v { | 
					
						
							|  |  |  | 				b.SetExt(e.s) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		// TODO: support parsing of raw strings based on morphology or just extensions? | 
					
						
							|  |  |  | 		case error: | 
					
						
							|  |  |  | 			if v != nil { | 
					
						
							|  |  |  | 				err = v | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ParseAcceptLanguage parses the contents of an Accept-Language header as | 
					
						
							|  |  |  | // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and | 
					
						
							|  |  |  | // a list of corresponding quality weights. It is more permissive than RFC 2616 | 
					
						
							|  |  |  | // and may return non-nil slices even if the input is not valid. | 
					
						
							|  |  |  | // The Tags will be sorted by highest weight first and then by first occurrence. | 
					
						
							|  |  |  | // Tags with a weight of zero will be dropped. An error will be returned if the | 
					
						
							|  |  |  | // input could not be parsed. | 
					
						
							|  |  |  | func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { | 
					
						
							| 
									
										
										
										
											2021-09-10 14:42:14 +02:00
										 |  |  | 	defer func() { | 
					
						
							|  |  |  | 		if recover() != nil { | 
					
						
							|  |  |  | 			tag = nil | 
					
						
							|  |  |  | 			q = nil | 
					
						
							|  |  |  | 			err = language.ErrSyntax | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-12 21:03:24 +02:00
										 |  |  | 	var entry string | 
					
						
							|  |  |  | 	for s != "" { | 
					
						
							|  |  |  | 		if entry, s = split(s, ','); entry == "" { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		entry, weight := split(entry, ';') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Scan the language. | 
					
						
							|  |  |  | 		t, err := Parse(entry) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			id, ok := acceptFallback[entry] | 
					
						
							|  |  |  | 			if !ok { | 
					
						
							|  |  |  | 				return nil, nil, err | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			t = makeTag(language.Tag{LangID: id}) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Scan the optional weight. | 
					
						
							|  |  |  | 		w := 1.0 | 
					
						
							|  |  |  | 		if weight != "" { | 
					
						
							|  |  |  | 			weight = consume(weight, 'q') | 
					
						
							|  |  |  | 			weight = consume(weight, '=') | 
					
						
							|  |  |  | 			// consume returns the empty string when a token could not be | 
					
						
							|  |  |  | 			// consumed, resulting in an error for ParseFloat. | 
					
						
							|  |  |  | 			if w, err = strconv.ParseFloat(weight, 32); err != nil { | 
					
						
							|  |  |  | 				return nil, nil, errInvalidWeight | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			// Drop tags with a quality weight of 0. | 
					
						
							|  |  |  | 			if w <= 0 { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		tag = append(tag, t) | 
					
						
							|  |  |  | 		q = append(q, float32(w)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sortStable(&tagSort{tag, q}) | 
					
						
							|  |  |  | 	return tag, q, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // consume removes a leading token c from s and returns the result or the empty | 
					
						
							|  |  |  | // string if there is no such token. | 
					
						
							|  |  |  | func consume(s string, c byte) string { | 
					
						
							|  |  |  | 	if s == "" || s[0] != c { | 
					
						
							|  |  |  | 		return "" | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return strings.TrimSpace(s[1:]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func split(s string, c byte) (head, tail string) { | 
					
						
							|  |  |  | 	if i := strings.IndexByte(s, c); i >= 0 { | 
					
						
							|  |  |  | 		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return strings.TrimSpace(s), "" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Add hack mapping to deal with a small number of cases that occur | 
					
						
							|  |  |  | // in Accept-Language (with reasonable frequency). | 
					
						
							|  |  |  | var acceptFallback = map[string]language.Language{ | 
					
						
							|  |  |  | 	"english": _en, | 
					
						
							|  |  |  | 	"deutsch": _de, | 
					
						
							|  |  |  | 	"italian": _it, | 
					
						
							|  |  |  | 	"french":  _fr, | 
					
						
							|  |  |  | 	"*":       _mul, // defined in the spec to match all languages. | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type tagSort struct { | 
					
						
							|  |  |  | 	tag []Tag | 
					
						
							|  |  |  | 	q   []float32 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s *tagSort) Len() int { | 
					
						
							|  |  |  | 	return len(s.q) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s *tagSort) Less(i, j int) bool { | 
					
						
							|  |  |  | 	return s.q[i] > s.q[j] | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s *tagSort) Swap(i, j int) { | 
					
						
							|  |  |  | 	s.tag[i], s.tag[j] = s.tag[j], s.tag[i] | 
					
						
							|  |  |  | 	s.q[i], s.q[j] = s.q[j], s.q[i] | 
					
						
							|  |  |  | } |