mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 06:52:26 -05:00 
			
		
		
		
	Text/status parsing fixes (#141)
* aaaaaa * vendor minify * update + test markdown parsing
This commit is contained in:
		
					parent
					
						
							
								ff406be68f
							
						
					
				
			
			
				commit
				
					
						ce190d867c
					
				
			
		
					 56 changed files with 7390 additions and 45 deletions
				
			
		
							
								
								
									
										1
									
								
								vendor/github.com/tdewolff/parse/v2/.gitattributes
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								vendor/github.com/tdewolff/parse/v2/.gitattributes
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| tests/*/corpus/* linguist-generated | ||||
							
								
								
									
										5
									
								
								vendor/github.com/tdewolff/parse/v2/.gitignore
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/tdewolff/parse/v2/.gitignore
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| tests/*/fuzz-fuzz.zip | ||||
| tests/*/crashers | ||||
| tests/*/suppressions | ||||
| tests/*/corpus/* | ||||
| !tests/*/corpus/*.* | ||||
							
								
								
									
										16
									
								
								vendor/github.com/tdewolff/parse/v2/.golangci.yml
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/tdewolff/parse/v2/.golangci.yml
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| linters: | ||||
|   enable: | ||||
|   - depguard | ||||
|   - dogsled | ||||
|   - gofmt | ||||
|   - goimports | ||||
|   - golint | ||||
|   - gosec | ||||
|   - govet | ||||
|   - megacheck | ||||
|   - misspell | ||||
|   - nakedret | ||||
|   - prealloc | ||||
|   - unconvert | ||||
|   - unparam | ||||
|   - wastedassign | ||||
							
								
								
									
										22
									
								
								vendor/github.com/tdewolff/parse/v2/LICENSE.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/tdewolff/parse/v2/LICENSE.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | |||
| Copyright (c) 2015 Taco de Wolff | ||||
| 
 | ||||
|  Permission is hereby granted, free of charge, to any person | ||||
|  obtaining a copy of this software and associated documentation | ||||
|  files (the "Software"), to deal in the Software without | ||||
|  restriction, including without limitation the rights to use, | ||||
|  copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
|  copies of the Software, and to permit persons to whom the | ||||
|  Software is furnished to do so, subject to the following | ||||
|  conditions: | ||||
| 
 | ||||
|  The above copyright notice and this permission notice shall be | ||||
|  included in all copies or substantial portions of the Software. | ||||
| 
 | ||||
|  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||||
|  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||||
|  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||||
|  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||||
|  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||||
|  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||||
|  OTHER DEALINGS IN THE SOFTWARE. | ||||
							
								
								
									
										64
									
								
								vendor/github.com/tdewolff/parse/v2/README.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								vendor/github.com/tdewolff/parse/v2/README.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | |||
| # Parse [](https://pkg.go.dev/github.com/tdewolff/parse/v2?tab=doc) [](https://goreportcard.com/report/github.com/tdewolff/parse) [](https://coveralls.io/github/tdewolff/parse?branch=master) [](https://www.patreon.com/tdewolff) | ||||
| 
 | ||||
| This package contains several lexers and parsers written in [Go][1]. All subpackages are built to be streaming, high performance and to be in accordance with the official (latest) specifications. | ||||
| 
 | ||||
| The lexers are implemented using `buffer.Lexer` in https://github.com/tdewolff/parse/buffer and the parsers work on top of the lexers. Some subpackages have hashes defined (using [Hasher](https://github.com/tdewolff/hasher)) that speed up common byte-slice comparisons. | ||||
| 
 | ||||
| ## Buffer | ||||
| ### Reader | ||||
| Reader is a wrapper around a `[]byte` that implements the `io.Reader` interface. It is comparable to `bytes.Reader` but has slightly different semantics (and a slightly smaller memory footprint). | ||||
| 
 | ||||
| ### Writer | ||||
| Writer is a buffer that implements the `io.Writer` interface and expands the buffer as needed. The reset functionality allows for better memory reuse. After calling `Reset`, it will overwrite the current buffer and thus reduce allocations. | ||||
| 
 | ||||
| ### Lexer | ||||
| Lexer is a read buffer specifically designed for building lexers. It keeps track of two positions: a start and end position. The start position is the beginning of the current token being parsed, the end position is being moved forward until a valid token is found. Calling `Shift` will collapse the positions to the end and return the parsed `[]byte`. | ||||
| 
 | ||||
| Moving the end position can go through `Move(int)` which also accepts negative integers. One can also use `Pos() int` to try and parse a token, and if it fails rewind with `Rewind(int)`, passing the previously saved position. | ||||
| 
 | ||||
| `Peek(int) byte` will peek forward (relative to the end position) and return the byte at that location. `PeekRune(int) (rune, int)` returns UTF-8 runes and its length at the given **byte** position. Upon an error `Peek` will return `0`, the **user must peek at every character** and not skip any, otherwise it may skip a `0` and panic on out-of-bounds indexing. | ||||
| 
 | ||||
| `Lexeme() []byte` will return the currently selected bytes, `Skip()` will collapse the selection. `Shift() []byte` is a combination of `Lexeme() []byte` and `Skip()`. | ||||
| 
 | ||||
| When the passed `io.Reader` returned an error, `Err() error` will return that error even if not at the end of the buffer. | ||||
| 
 | ||||
| ### StreamLexer | ||||
| StreamLexer behaves like Lexer but uses a buffer pool to read in chunks from `io.Reader`, retaining old buffers in memory that are still in use, and re-using old buffers otherwise. Calling `Free(n int)` frees up `n` bytes from the internal buffer(s). It holds an array of buffers to accommodate for keeping everything in-memory. Calling `ShiftLen() int` returns the number of bytes that have been shifted since the previous call to `ShiftLen`, which can be used to specify how many bytes need to be freed up from the buffer. If you don't need to keep returned byte slices around, call `Free(ShiftLen())` after every `Shift` call. | ||||
| 
 | ||||
| ## Strconv | ||||
| This package contains string conversion function much like the standard library's `strconv` package, but it is specifically tailored for the performance needs within the `minify` package. | ||||
| 
 | ||||
| For example, the floating-point to string conversion function is approximately twice as fast as the standard library, but it is not as precise. | ||||
| 
 | ||||
| ## CSS | ||||
| This package is a CSS3 lexer and parser. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF. | ||||
| 
 | ||||
| [See README here](https://github.com/tdewolff/parse/tree/master/css). | ||||
| 
 | ||||
| ## HTML | ||||
| This package is an HTML5 lexer. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. | ||||
| 
 | ||||
| [See README here](https://github.com/tdewolff/parse/tree/master/html). | ||||
| 
 | ||||
| ## JS | ||||
| This package is a JS lexer (ECMA-262, edition 6.0). It follows the specification at [ECMAScript Language Specification](http://www.ecma-international.org/ecma-262/6.0/). The lexer takes an io.Reader and converts it into tokens until the EOF. | ||||
| 
 | ||||
| [See README here](https://github.com/tdewolff/parse/tree/master/js). | ||||
| 
 | ||||
| ## JSON | ||||
| This package is a JSON parser (ECMA-404). It follows the specification at [JSON](http://json.org/). The parser takes an io.Reader and converts it into tokens until the EOF. | ||||
| 
 | ||||
| [See README here](https://github.com/tdewolff/parse/tree/master/json). | ||||
| 
 | ||||
| ## SVG | ||||
| This package contains common hashes for SVG1.1 tags and attributes. | ||||
| 
 | ||||
| ## XML | ||||
| This package is an XML1.0 lexer. It follows the specification at [Extensible Markup Language (XML) 1.0 (Fifth Edition)](http://www.w3.org/TR/xml/). The lexer takes an io.Reader and converts it into tokens until the EOF. | ||||
| 
 | ||||
| [See README here](https://github.com/tdewolff/parse/tree/master/xml). | ||||
| 
 | ||||
| ## License | ||||
| Released under the [MIT license](LICENSE.md). | ||||
| 
 | ||||
| [1]: http://golang.org/ "Go Language" | ||||
							
								
								
									
										12
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/buffer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/buffer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| // Package buffer contains buffer and wrapper types for byte slices. It is useful for writing lexers or other high-performance byte slice handling. | ||||
| // The `Reader` and `Writer` types implement the `io.Reader` and `io.Writer` respectively and provide a thinner and faster interface than `bytes.Buffer`. | ||||
| // The `Lexer` type is useful for building lexers because it keeps track of the start and end position of a byte selection, and shifts the bytes whenever a valid token is found. | ||||
| // The `StreamLexer` does the same, but keeps a buffer pool so that it reads a limited amount at a time, allowing to parse from streaming sources. | ||||
| package buffer | ||||
| 
 | ||||
| // defaultBufSize specifies the default initial length of internal buffers. | ||||
| var defaultBufSize = 4096 | ||||
| 
 | ||||
| // MinBuf specifies the default initial length of internal buffers. | ||||
| // Solely here to support old versions of parse. | ||||
| var MinBuf = defaultBufSize | ||||
							
								
								
									
										164
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/lexer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/lexer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,164 @@ | |||
| package buffer | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| ) | ||||
| 
 | ||||
| var nullBuffer = []byte{0} | ||||
| 
 | ||||
| // Lexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader. | ||||
| // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. | ||||
| type Lexer struct { | ||||
| 	buf   []byte | ||||
| 	pos   int // index in buf | ||||
| 	start int // index in buf | ||||
| 	err   error | ||||
| 
 | ||||
| 	restore func() | ||||
| } | ||||
| 
 | ||||
| // NewLexer returns a new Lexer for a given io.Reader, and uses ioutil.ReadAll to read it into a byte slice. | ||||
| // If the io.Reader implements Bytes, that is used instead. | ||||
| // It will append a NULL at the end of the buffer. | ||||
| func NewLexer(r io.Reader) *Lexer { | ||||
| 	var b []byte | ||||
| 	if r != nil { | ||||
| 		if buffer, ok := r.(interface { | ||||
| 			Bytes() []byte | ||||
| 		}); ok { | ||||
| 			b = buffer.Bytes() | ||||
| 		} else { | ||||
| 			var err error | ||||
| 			b, err = ioutil.ReadAll(r) | ||||
| 			if err != nil { | ||||
| 				return &Lexer{ | ||||
| 					buf: nullBuffer, | ||||
| 					err: err, | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return NewLexerBytes(b) | ||||
| } | ||||
| 
 | ||||
| // NewLexerBytes returns a new Lexer for a given byte slice, and appends NULL at the end. | ||||
| // To avoid reallocation, make sure the capacity has room for one more byte. | ||||
| func NewLexerBytes(b []byte) *Lexer { | ||||
| 	z := &Lexer{ | ||||
| 		buf: b, | ||||
| 	} | ||||
| 
 | ||||
| 	n := len(b) | ||||
| 	if n == 0 { | ||||
| 		z.buf = nullBuffer | ||||
| 	} else { | ||||
| 		// Append NULL to buffer, but try to avoid reallocation | ||||
| 		if cap(b) > n { | ||||
| 			// Overwrite next byte but restore when done | ||||
| 			b = b[:n+1] | ||||
| 			c := b[n] | ||||
| 			b[n] = 0 | ||||
| 
 | ||||
| 			z.buf = b | ||||
| 			z.restore = func() { | ||||
| 				b[n] = c | ||||
| 			} | ||||
| 		} else { | ||||
| 			z.buf = append(b, 0) | ||||
| 		} | ||||
| 	} | ||||
| 	return z | ||||
| } | ||||
| 
 | ||||
| // Restore restores the replaced byte past the end of the buffer by NULL. | ||||
| func (z *Lexer) Restore() { | ||||
| 	if z.restore != nil { | ||||
| 		z.restore() | ||||
| 		z.restore = nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Err returns the error returned from io.Reader or io.EOF when the end has been reached. | ||||
| func (z *Lexer) Err() error { | ||||
| 	return z.PeekErr(0) | ||||
| } | ||||
| 
 | ||||
| // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err(). | ||||
| func (z *Lexer) PeekErr(pos int) error { | ||||
| 	if z.err != nil { | ||||
| 		return z.err | ||||
| 	} else if z.pos+pos >= len(z.buf)-1 { | ||||
| 		return io.EOF | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Peek returns the ith byte relative to the end position. | ||||
| // Peek returns 0 when an error has occurred, Err returns the error. | ||||
| func (z *Lexer) Peek(pos int) byte { | ||||
| 	pos += z.pos | ||||
| 	return z.buf[pos] | ||||
| } | ||||
| 
 | ||||
| // PeekRune returns the rune and rune length of the ith byte relative to the end position. | ||||
| func (z *Lexer) PeekRune(pos int) (rune, int) { | ||||
| 	// from unicode/utf8 | ||||
| 	c := z.Peek(pos) | ||||
| 	if c < 0xC0 || z.Peek(pos+1) == 0 { | ||||
| 		return rune(c), 1 | ||||
| 	} else if c < 0xE0 || z.Peek(pos+2) == 0 { | ||||
| 		return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 | ||||
| 	} else if c < 0xF0 || z.Peek(pos+3) == 0 { | ||||
| 		return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 | ||||
| 	} | ||||
| 	return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 | ||||
| } | ||||
| 
 | ||||
| // Move advances the position. | ||||
| func (z *Lexer) Move(n int) { | ||||
| 	z.pos += n | ||||
| } | ||||
| 
 | ||||
| // Pos returns a mark to which can be rewinded. | ||||
| func (z *Lexer) Pos() int { | ||||
| 	return z.pos - z.start | ||||
| } | ||||
| 
 | ||||
| // Rewind rewinds the position to the given position. | ||||
| func (z *Lexer) Rewind(pos int) { | ||||
| 	z.pos = z.start + pos | ||||
| } | ||||
| 
 | ||||
| // Lexeme returns the bytes of the current selection. | ||||
| func (z *Lexer) Lexeme() []byte { | ||||
| 	return z.buf[z.start:z.pos:z.pos] | ||||
| } | ||||
| 
 | ||||
| // Skip collapses the position to the end of the selection. | ||||
| func (z *Lexer) Skip() { | ||||
| 	z.start = z.pos | ||||
| } | ||||
| 
 | ||||
| // Shift returns the bytes of the current selection and collapses the position to the end of the selection. | ||||
| func (z *Lexer) Shift() []byte { | ||||
| 	b := z.buf[z.start:z.pos:z.pos] | ||||
| 	z.start = z.pos | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // Offset returns the character position in the buffer. | ||||
| func (z *Lexer) Offset() int { | ||||
| 	return z.pos | ||||
| } | ||||
| 
 | ||||
| // Bytes returns the underlying buffer. | ||||
| func (z *Lexer) Bytes() []byte { | ||||
| 	return z.buf[: len(z.buf)-1 : len(z.buf)-1] | ||||
| } | ||||
| 
 | ||||
| // Reset resets position to the underlying buffer. | ||||
| func (z *Lexer) Reset() { | ||||
| 	z.start = 0 | ||||
| 	z.pos = 0 | ||||
| } | ||||
							
								
								
									
										44
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/reader.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/reader.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,44 @@ | |||
| package buffer | ||||
| 
 | ||||
| import "io" | ||||
| 
 | ||||
| // Reader implements an io.Reader over a byte slice. | ||||
| type Reader struct { | ||||
| 	buf []byte | ||||
| 	pos int | ||||
| } | ||||
| 
 | ||||
| // NewReader returns a new Reader for a given byte slice. | ||||
| func NewReader(buf []byte) *Reader { | ||||
| 	return &Reader{ | ||||
| 		buf: buf, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Read reads bytes into the given byte slice and returns the number of bytes read and an error if occurred. | ||||
| func (r *Reader) Read(b []byte) (n int, err error) { | ||||
| 	if len(b) == 0 { | ||||
| 		return 0, nil | ||||
| 	} | ||||
| 	if r.pos >= len(r.buf) { | ||||
| 		return 0, io.EOF | ||||
| 	} | ||||
| 	n = copy(b, r.buf[r.pos:]) | ||||
| 	r.pos += n | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // Bytes returns the underlying byte slice. | ||||
| func (r *Reader) Bytes() []byte { | ||||
| 	return r.buf | ||||
| } | ||||
| 
 | ||||
| // Reset resets the position of the read pointer to the beginning of the underlying byte slice. | ||||
| func (r *Reader) Reset() { | ||||
| 	r.pos = 0 | ||||
| } | ||||
| 
 | ||||
| // Len returns the length of the buffer. | ||||
| func (r *Reader) Len() int { | ||||
| 	return len(r.buf) | ||||
| } | ||||
							
								
								
									
										223
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/streamlexer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										223
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/streamlexer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,223 @@ | |||
| package buffer | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| ) | ||||
| 
 | ||||
| type block struct { | ||||
| 	buf    []byte | ||||
| 	next   int // index in pool plus one | ||||
| 	active bool | ||||
| } | ||||
| 
 | ||||
| type bufferPool struct { | ||||
| 	pool []block | ||||
| 	head int // index in pool plus one | ||||
| 	tail int // index in pool plus one | ||||
| 
 | ||||
| 	pos int // byte pos in tail | ||||
| } | ||||
| 
 | ||||
| func (z *bufferPool) swap(oldBuf []byte, size int) []byte { | ||||
| 	// find new buffer that can be reused | ||||
| 	swap := -1 | ||||
| 	for i := 0; i < len(z.pool); i++ { | ||||
| 		if !z.pool[i].active && size <= cap(z.pool[i].buf) { | ||||
| 			swap = i | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if swap == -1 { // no free buffer found for reuse | ||||
| 		if z.tail == 0 && z.pos >= len(oldBuf) && size <= cap(oldBuf) { // but we can reuse the current buffer! | ||||
| 			z.pos -= len(oldBuf) | ||||
| 			return oldBuf[:0] | ||||
| 		} | ||||
| 		// allocate new | ||||
| 		z.pool = append(z.pool, block{make([]byte, 0, size), 0, true}) | ||||
| 		swap = len(z.pool) - 1 | ||||
| 	} | ||||
| 
 | ||||
| 	newBuf := z.pool[swap].buf | ||||
| 
 | ||||
| 	// put current buffer into pool | ||||
| 	z.pool[swap] = block{oldBuf, 0, true} | ||||
| 	if z.head != 0 { | ||||
| 		z.pool[z.head-1].next = swap + 1 | ||||
| 	} | ||||
| 	z.head = swap + 1 | ||||
| 	if z.tail == 0 { | ||||
| 		z.tail = swap + 1 | ||||
| 	} | ||||
| 
 | ||||
| 	return newBuf[:0] | ||||
| } | ||||
| 
 | ||||
| func (z *bufferPool) free(n int) { | ||||
| 	z.pos += n | ||||
| 	// move the tail over to next buffers | ||||
| 	for z.tail != 0 && z.pos >= len(z.pool[z.tail-1].buf) { | ||||
| 		z.pos -= len(z.pool[z.tail-1].buf) | ||||
| 		newTail := z.pool[z.tail-1].next | ||||
| 		z.pool[z.tail-1].active = false // after this, any thread may pick up the inactive buffer, so it can't be used anymore | ||||
| 		z.tail = newTail | ||||
| 	} | ||||
| 	if z.tail == 0 { | ||||
| 		z.head = 0 | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // StreamLexer is a buffered reader that allows peeking forward and shifting, taking an io.Reader. | ||||
| // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. | ||||
| type StreamLexer struct { | ||||
| 	r   io.Reader | ||||
| 	err error | ||||
| 
 | ||||
| 	pool bufferPool | ||||
| 
 | ||||
| 	buf       []byte | ||||
| 	start     int // index in buf | ||||
| 	pos       int // index in buf | ||||
| 	prevStart int | ||||
| 
 | ||||
| 	free int | ||||
| } | ||||
| 
 | ||||
| // NewStreamLexer returns a new StreamLexer for a given io.Reader with a 4kB estimated buffer size. | ||||
| // If the io.Reader implements Bytes, that buffer is used instead. | ||||
| func NewStreamLexer(r io.Reader) *StreamLexer { | ||||
| 	return NewStreamLexerSize(r, defaultBufSize) | ||||
| } | ||||
| 
 | ||||
| // NewStreamLexerSize returns a new StreamLexer for a given io.Reader and estimated required buffer size. | ||||
| // If the io.Reader implements Bytes, that buffer is used instead. | ||||
| func NewStreamLexerSize(r io.Reader, size int) *StreamLexer { | ||||
| 	// if reader has the bytes in memory already, use that instead | ||||
| 	if buffer, ok := r.(interface { | ||||
| 		Bytes() []byte | ||||
| 	}); ok { | ||||
| 		return &StreamLexer{ | ||||
| 			err: io.EOF, | ||||
| 			buf: buffer.Bytes(), | ||||
| 		} | ||||
| 	} | ||||
| 	return &StreamLexer{ | ||||
| 		r:   r, | ||||
| 		buf: make([]byte, 0, size), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (z *StreamLexer) read(pos int) byte { | ||||
| 	if z.err != nil { | ||||
| 		return 0 | ||||
| 	} | ||||
| 
 | ||||
| 	// free unused bytes | ||||
| 	z.pool.free(z.free) | ||||
| 	z.free = 0 | ||||
| 
 | ||||
| 	// get new buffer | ||||
| 	c := cap(z.buf) | ||||
| 	p := pos - z.start + 1 | ||||
| 	if 2*p > c { // if the token is larger than half the buffer, increase buffer size | ||||
| 		c = 2*c + p | ||||
| 	} | ||||
| 	d := len(z.buf) - z.start | ||||
| 	buf := z.pool.swap(z.buf[:z.start], c) | ||||
| 	copy(buf[:d], z.buf[z.start:]) // copy the left-overs (unfinished token) from the old buffer | ||||
| 
 | ||||
| 	// read in new data for the rest of the buffer | ||||
| 	var n int | ||||
| 	for pos-z.start >= d && z.err == nil { | ||||
| 		n, z.err = z.r.Read(buf[d:cap(buf)]) | ||||
| 		d += n | ||||
| 	} | ||||
| 	pos -= z.start | ||||
| 	z.pos -= z.start | ||||
| 	z.start, z.buf = 0, buf[:d] | ||||
| 	if pos >= d { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	return z.buf[pos] | ||||
| } | ||||
| 
 | ||||
| // Err returns the error returned from io.Reader. It may still return valid bytes for a while though. | ||||
| func (z *StreamLexer) Err() error { | ||||
| 	if z.err == io.EOF && z.pos < len(z.buf) { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return z.err | ||||
| } | ||||
| 
 | ||||
| // Free frees up bytes of length n from previously shifted tokens. | ||||
| // Each call to Shift should at one point be followed by a call to Free with a length returned by ShiftLen. | ||||
| func (z *StreamLexer) Free(n int) { | ||||
| 	z.free += n | ||||
| } | ||||
| 
 | ||||
| // Peek returns the ith byte relative to the end position and possibly does an allocation. | ||||
| // Peek returns zero when an error has occurred, Err returns the error. | ||||
| // TODO: inline function | ||||
| func (z *StreamLexer) Peek(pos int) byte { | ||||
| 	pos += z.pos | ||||
| 	if uint(pos) < uint(len(z.buf)) { // uint for BCE | ||||
| 		return z.buf[pos] | ||||
| 	} | ||||
| 	return z.read(pos) | ||||
| } | ||||
| 
 | ||||
| // PeekRune returns the rune and rune length of the ith byte relative to the end position. | ||||
| func (z *StreamLexer) PeekRune(pos int) (rune, int) { | ||||
| 	// from unicode/utf8 | ||||
| 	c := z.Peek(pos) | ||||
| 	if c < 0xC0 { | ||||
| 		return rune(c), 1 | ||||
| 	} else if c < 0xE0 { | ||||
| 		return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 | ||||
| 	} else if c < 0xF0 { | ||||
| 		return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 | ||||
| 	} | ||||
| 	return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 | ||||
| } | ||||
| 
 | ||||
| // Move advances the position. | ||||
| func (z *StreamLexer) Move(n int) { | ||||
| 	z.pos += n | ||||
| } | ||||
| 
 | ||||
| // Pos returns a mark to which can be rewinded. | ||||
| func (z *StreamLexer) Pos() int { | ||||
| 	return z.pos - z.start | ||||
| } | ||||
| 
 | ||||
| // Rewind rewinds the position to the given position. | ||||
| func (z *StreamLexer) Rewind(pos int) { | ||||
| 	z.pos = z.start + pos | ||||
| } | ||||
| 
 | ||||
| // Lexeme returns the bytes of the current selection. | ||||
| func (z *StreamLexer) Lexeme() []byte { | ||||
| 	return z.buf[z.start:z.pos] | ||||
| } | ||||
| 
 | ||||
| // Skip collapses the position to the end of the selection. | ||||
| func (z *StreamLexer) Skip() { | ||||
| 	z.start = z.pos | ||||
| } | ||||
| 
 | ||||
| // Shift returns the bytes of the current selection and collapses the position to the end of the selection. | ||||
| // It also returns the number of bytes we moved since the last call to Shift. This can be used in calls to Free. | ||||
| func (z *StreamLexer) Shift() []byte { | ||||
| 	if z.pos > len(z.buf) { // make sure we peeked at least as much as we shift | ||||
| 		z.read(z.pos - 1) | ||||
| 	} | ||||
| 	b := z.buf[z.start:z.pos] | ||||
| 	z.start = z.pos | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // ShiftLen returns the number of bytes moved since the last call to ShiftLen. This can be used in calls to Free because it takes into account multiple Shifts or Skips. | ||||
| func (z *StreamLexer) ShiftLen() int { | ||||
| 	n := z.start - z.prevStart | ||||
| 	z.prevStart = z.start | ||||
| 	return n | ||||
| } | ||||
							
								
								
									
										41
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/writer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								vendor/github.com/tdewolff/parse/v2/buffer/writer.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,41 @@ | |||
| package buffer | ||||
| 
 | ||||
| // Writer implements an io.Writer over a byte slice. | ||||
| type Writer struct { | ||||
| 	buf []byte | ||||
| } | ||||
| 
 | ||||
| // NewWriter returns a new Writer for a given byte slice. | ||||
| func NewWriter(buf []byte) *Writer { | ||||
| 	return &Writer{ | ||||
| 		buf: buf, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Write writes bytes from the given byte slice and returns the number of bytes written and an error if occurred. When err != nil, n == 0. | ||||
| func (w *Writer) Write(b []byte) (int, error) { | ||||
| 	n := len(b) | ||||
| 	end := len(w.buf) | ||||
| 	if end+n > cap(w.buf) { | ||||
| 		buf := make([]byte, end, 2*cap(w.buf)+n) | ||||
| 		copy(buf, w.buf) | ||||
| 		w.buf = buf | ||||
| 	} | ||||
| 	w.buf = w.buf[:end+n] | ||||
| 	return copy(w.buf[end:], b), nil | ||||
| } | ||||
| 
 | ||||
| // Len returns the length of the underlying byte slice. | ||||
| func (w *Writer) Len() int { | ||||
| 	return len(w.buf) | ||||
| } | ||||
| 
 | ||||
| // Bytes returns the underlying byte slice. | ||||
| func (w *Writer) Bytes() []byte { | ||||
| 	return w.buf | ||||
| } | ||||
| 
 | ||||
| // Reset empties and reuses the current buffer. Subsequent writes will overwrite the buffer, so any reference to the underlying slice is invalidated after this call. | ||||
| func (w *Writer) Reset() { | ||||
| 	w.buf = w.buf[:0] | ||||
| } | ||||
							
								
								
									
										237
									
								
								vendor/github.com/tdewolff/parse/v2/common.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										237
									
								
								vendor/github.com/tdewolff/parse/v2/common.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,237 @@ | |||
| // Package parse contains a collection of parsers for various formats in its subpackages. | ||||
| package parse | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"encoding/base64" | ||||
| 	"errors" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	dataSchemeBytes = []byte("data:") | ||||
| 	base64Bytes     = []byte("base64") | ||||
| 	textMimeBytes   = []byte("text/plain") | ||||
| ) | ||||
| 
 | ||||
| // ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short. | ||||
| var ErrBadDataURI = errors.New("not a data URI") | ||||
| 
 | ||||
| // Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?. | ||||
| func Number(b []byte) int { | ||||
| 	if len(b) == 0 { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	i := 0 | ||||
| 	if b[i] == '+' || b[i] == '-' { | ||||
| 		i++ | ||||
| 		if i >= len(b) { | ||||
| 			return 0 | ||||
| 		} | ||||
| 	} | ||||
| 	firstDigit := (b[i] >= '0' && b[i] <= '9') | ||||
| 	if firstDigit { | ||||
| 		i++ | ||||
| 		for i < len(b) && b[i] >= '0' && b[i] <= '9' { | ||||
| 			i++ | ||||
| 		} | ||||
| 	} | ||||
| 	if i < len(b) && b[i] == '.' { | ||||
| 		i++ | ||||
| 		if i < len(b) && b[i] >= '0' && b[i] <= '9' { | ||||
| 			i++ | ||||
| 			for i < len(b) && b[i] >= '0' && b[i] <= '9' { | ||||
| 				i++ | ||||
| 			} | ||||
| 		} else if firstDigit { | ||||
| 			// . could belong to the next token | ||||
| 			i-- | ||||
| 			return i | ||||
| 		} else { | ||||
| 			return 0 | ||||
| 		} | ||||
| 	} else if !firstDigit { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	iOld := i | ||||
| 	if i < len(b) && (b[i] == 'e' || b[i] == 'E') { | ||||
| 		i++ | ||||
| 		if i < len(b) && (b[i] == '+' || b[i] == '-') { | ||||
| 			i++ | ||||
| 		} | ||||
| 		if i >= len(b) || b[i] < '0' || b[i] > '9' { | ||||
| 			// e could belong to next token | ||||
| 			return iOld | ||||
| 		} | ||||
| 		for i < len(b) && b[i] >= '0' && b[i] <= '9' { | ||||
| 			i++ | ||||
| 		} | ||||
| 	} | ||||
| 	return i | ||||
| } | ||||
| 
 | ||||
| // Dimension parses a byte-slice and returns the length of the number and its unit. | ||||
| func Dimension(b []byte) (int, int) { | ||||
| 	num := Number(b) | ||||
| 	if num == 0 || num == len(b) { | ||||
| 		return num, 0 | ||||
| 	} else if b[num] == '%' { | ||||
| 		return num, 1 | ||||
| 	} else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' { | ||||
| 		i := num + 1 | ||||
| 		for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') { | ||||
| 			i++ | ||||
| 		} | ||||
| 		return num, i - num | ||||
| 	} | ||||
| 	return num, 0 | ||||
| } | ||||
| 
 | ||||
| // Mediatype parses a given mediatype and splits the mimetype from the parameters. | ||||
| // It works similar to mime.ParseMediaType but is faster. | ||||
| func Mediatype(b []byte) ([]byte, map[string]string) { | ||||
| 	i := 0 | ||||
| 	for i < len(b) && b[i] == ' ' { | ||||
| 		i++ | ||||
| 	} | ||||
| 	b = b[i:] | ||||
| 	n := len(b) | ||||
| 	mimetype := b | ||||
| 	var params map[string]string | ||||
| 	for i := 3; i < n; i++ { // mimetype is at least three characters long | ||||
| 		if b[i] == ';' || b[i] == ' ' { | ||||
| 			mimetype = b[:i] | ||||
| 			if b[i] == ' ' { | ||||
| 				i++ // space | ||||
| 				for i < n && b[i] == ' ' { | ||||
| 					i++ | ||||
| 				} | ||||
| 				if n <= i || b[i] != ';' { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 			params = map[string]string{} | ||||
| 			s := string(b) | ||||
| 		PARAM: | ||||
| 			i++ // semicolon | ||||
| 			for i < n && s[i] == ' ' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			start := i | ||||
| 			for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			key := s[start:i] | ||||
| 			for i < n && s[i] == ' ' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			if i < n && s[i] == '=' { | ||||
| 				i++ | ||||
| 				for i < n && s[i] == ' ' { | ||||
| 					i++ | ||||
| 				} | ||||
| 				start = i | ||||
| 				for i < n && s[i] != ';' && s[i] != ' ' { | ||||
| 					i++ | ||||
| 				} | ||||
| 			} else { | ||||
| 				start = i | ||||
| 			} | ||||
| 			params[key] = s[start:i] | ||||
| 			for i < n && s[i] == ' ' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			if i < n && s[i] == ';' { | ||||
| 				goto PARAM | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	return mimetype, params | ||||
| } | ||||
| 
 | ||||
| // DataURI parses the given data URI and returns the mediatype, data and ok. | ||||
| func DataURI(dataURI []byte) ([]byte, []byte, error) { | ||||
| 	if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) { | ||||
| 		dataURI = dataURI[5:] | ||||
| 		inBase64 := false | ||||
| 		var mediatype []byte | ||||
| 		i := 0 | ||||
| 		for j := 0; j < len(dataURI); j++ { | ||||
| 			c := dataURI[j] | ||||
| 			if c == '=' || c == ';' || c == ',' { | ||||
| 				if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) { | ||||
| 					if len(mediatype) > 0 { | ||||
| 						mediatype = mediatype[:len(mediatype)-1] | ||||
| 					} | ||||
| 					inBase64 = true | ||||
| 					i = j | ||||
| 				} else if c != ',' { | ||||
| 					mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c) | ||||
| 					i = j + 1 | ||||
| 				} else { | ||||
| 					mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...) | ||||
| 				} | ||||
| 				if c == ',' { | ||||
| 					if len(mediatype) == 0 || mediatype[0] == ';' { | ||||
| 						mediatype = textMimeBytes | ||||
| 					} | ||||
| 					data := dataURI[j+1:] | ||||
| 					if inBase64 { | ||||
| 						decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data))) | ||||
| 						n, err := base64.StdEncoding.Decode(decoded, data) | ||||
| 						if err != nil { | ||||
| 							return nil, nil, err | ||||
| 						} | ||||
| 						data = decoded[:n] | ||||
| 					} else { | ||||
| 						data = DecodeURL(data) | ||||
| 					} | ||||
| 					return mediatype, data, nil | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return nil, nil, ErrBadDataURI | ||||
| } | ||||
| 
 | ||||
| // QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length. | ||||
| // TODO: deprecated | ||||
| func QuoteEntity(b []byte) (quote byte, n int) { | ||||
| 	if len(b) < 5 || b[0] != '&' { | ||||
| 		return 0, 0 | ||||
| 	} | ||||
| 	if b[1] == '#' { | ||||
| 		if b[2] == 'x' { | ||||
| 			i := 3 | ||||
| 			for i < len(b) && b[i] == '0' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' { | ||||
| 				if b[i+1] == '2' { | ||||
| 					return '"', i + 3 // " | ||||
| 				} else if b[i+1] == '7' { | ||||
| 					return '\'', i + 3 // ' | ||||
| 				} | ||||
| 			} | ||||
| 		} else { | ||||
| 			i := 2 | ||||
| 			for i < len(b) && b[i] == '0' { | ||||
| 				i++ | ||||
| 			} | ||||
| 			if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' { | ||||
| 				if b[i+1] == '4' { | ||||
| 					return '"', i + 3 // " | ||||
| 				} else if b[i+1] == '9' { | ||||
| 					return '\'', i + 3 // ' | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} else if len(b) >= 6 && b[5] == ';' { | ||||
| 		if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) { | ||||
| 			return '"', 6 // " | ||||
| 		} else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) { | ||||
| 			return '\'', 6 // ' | ||||
| 		} | ||||
| 	} | ||||
| 	return 0, 0 | ||||
| } | ||||
							
								
								
									
										47
									
								
								vendor/github.com/tdewolff/parse/v2/error.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								vendor/github.com/tdewolff/parse/v2/error.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| package parse | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| ) | ||||
| 
 | ||||
| // Error is a parsing error returned by parser. It contains a message and an offset at which the error occurred. | ||||
| type Error struct { | ||||
| 	Message string | ||||
| 	Line    int | ||||
| 	Column  int | ||||
| 	Context string | ||||
| } | ||||
| 
 | ||||
| // NewError creates a new error | ||||
| func NewError(r io.Reader, offset int, message string, a ...interface{}) *Error { | ||||
| 	line, column, context := Position(r, offset) | ||||
| 	if 0 < len(a) { | ||||
| 		message = fmt.Sprintf(message, a...) | ||||
| 	} | ||||
| 	return &Error{ | ||||
| 		Message: message, | ||||
| 		Line:    line, | ||||
| 		Column:  column, | ||||
| 		Context: context, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // NewErrorLexer creates a new error from an active Lexer. | ||||
| func NewErrorLexer(l *Input, message string, a ...interface{}) *Error { | ||||
| 	r := bytes.NewBuffer(l.Bytes()) | ||||
| 	offset := l.Offset() | ||||
| 	return NewError(r, offset, message, a...) | ||||
| } | ||||
| 
 | ||||
| // Position returns the line, column, and context of the error. | ||||
| // Context is the entire line at which the error occurred. | ||||
| func (e *Error) Position() (int, int, string) { | ||||
| 	return e.Line, e.Column, e.Context | ||||
| } | ||||
| 
 | ||||
| // Error returns the error string, containing the context and line + column number. | ||||
| func (e *Error) Error() string { | ||||
| 	return fmt.Sprintf("%s on line %d and column %d\n%s", e.Message, e.Line, e.Column, e.Context) | ||||
| } | ||||
							
								
								
									
										5
									
								
								vendor/github.com/tdewolff/parse/v2/go.mod
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/tdewolff/parse/v2/go.mod
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| module github.com/tdewolff/parse/v2 | ||||
| 
 | ||||
| go 1.13 | ||||
| 
 | ||||
| require github.com/tdewolff/test v1.0.6 | ||||
							
								
								
									
										2
									
								
								vendor/github.com/tdewolff/parse/v2/go.sum
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/tdewolff/parse/v2/go.sum
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,2 @@ | |||
| github.com/tdewolff/test v1.0.6 h1:76mzYJQ83Op284kMT+63iCNCI7NEERsIN8dLM+RiKr4= | ||||
| github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= | ||||
							
								
								
									
										98
									
								
								vendor/github.com/tdewolff/parse/v2/html/README.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								vendor/github.com/tdewolff/parse/v2/html/README.md
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,98 @@ | |||
| # HTML [](https://pkg.go.dev/github.com/tdewolff/parse/v2/html?tab=doc) | ||||
| 
 | ||||
| This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. | ||||
| 
 | ||||
| ## Installation | ||||
| Run the following command | ||||
| 
 | ||||
| 	go get -u github.com/tdewolff/parse/v2/html | ||||
| 
 | ||||
| or add the following import and run project with `go get` | ||||
| 
 | ||||
| 	import "github.com/tdewolff/parse/v2/html" | ||||
| 
 | ||||
| ## Lexer | ||||
| ### Usage | ||||
| The following initializes a new Lexer with io.Reader `r`: | ||||
| ``` go | ||||
| l := html.NewLexer(parse.NewInput(r)) | ||||
| ``` | ||||
| 
 | ||||
| To tokenize until EOF an error, use: | ||||
| ``` go | ||||
| for { | ||||
| 	tt, data := l.Next() | ||||
| 	switch tt { | ||||
| 	case html.ErrorToken: | ||||
| 		// error or EOF set in l.Err() | ||||
| 		return | ||||
| 	case html.StartTagToken: | ||||
| 		// ... | ||||
| 		for { | ||||
| 			ttAttr, dataAttr := l.Next() | ||||
| 			if ttAttr != html.AttributeToken { | ||||
| 				break | ||||
| 			} | ||||
| 			// ... | ||||
| 		} | ||||
| 	// ... | ||||
| 	} | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| All tokens: | ||||
| ``` go | ||||
| ErrorToken TokenType = iota // extra token when errors occur | ||||
| CommentToken | ||||
| DoctypeToken | ||||
| StartTagToken | ||||
| StartTagCloseToken | ||||
| StartTagVoidToken | ||||
| EndTagToken | ||||
| AttributeToken | ||||
| TextToken | ||||
| ``` | ||||
| 
 | ||||
| ### Examples | ||||
| ``` go | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/tdewolff/parse/v2/html" | ||||
| ) | ||||
| 
 | ||||
| // Tokenize HTML from stdin. | ||||
| func main() { | ||||
| 	l := html.NewLexer(parse.NewInput(os.Stdin)) | ||||
| 	for { | ||||
| 		tt, data := l.Next() | ||||
| 		switch tt { | ||||
| 		case html.ErrorToken: | ||||
| 			if l.Err() != io.EOF { | ||||
| 				fmt.Println("Error on line", l.Line(), ":", l.Err()) | ||||
| 			} | ||||
| 			return | ||||
| 		case html.StartTagToken: | ||||
| 			fmt.Println("Tag", string(data)) | ||||
| 			for { | ||||
| 				ttAttr, dataAttr := l.Next() | ||||
| 				if ttAttr != html.AttributeToken { | ||||
| 					break | ||||
| 				} | ||||
| 
 | ||||
| 				key := dataAttr | ||||
| 				val := l.AttrVal() | ||||
| 				fmt.Println("Attribute", string(key), "=", string(val)) | ||||
| 			} | ||||
| 		// ... | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| ## License | ||||
| Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). | ||||
| 
 | ||||
| [1]: http://golang.org/ "Go Language" | ||||
							
								
								
									
										81
									
								
								vendor/github.com/tdewolff/parse/v2/html/hash.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								vendor/github.com/tdewolff/parse/v2/html/hash.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,81 @@ | |||
| package html | ||||
| 
 | ||||
| // generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate | ||||
| 
 | ||||
| // uses github.com/tdewolff/hasher | ||||
| //go:generate hasher -type=Hash -file=hash.go | ||||
| 
 | ||||
| // Hash defines perfect hashes for a predefined list of strings | ||||
| type Hash uint32 | ||||
| 
 | ||||
| // Unique hash definitions to be used instead of strings | ||||
| const ( | ||||
| 	Iframe    Hash = 0x6    // iframe | ||||
| 	Math      Hash = 0x604  // math | ||||
| 	Plaintext Hash = 0x1e09 // plaintext | ||||
| 	Script    Hash = 0xa06  // script | ||||
| 	Style     Hash = 0x1405 // style | ||||
| 	Svg       Hash = 0x1903 // svg | ||||
| 	Textarea  Hash = 0x2308 // textarea | ||||
| 	Title     Hash = 0xf05  // title | ||||
| 	Xmp       Hash = 0x1c03 // xmp | ||||
| ) | ||||
| 
 | ||||
| // String returns the hash' name. | ||||
| func (i Hash) String() string { | ||||
| 	start := uint32(i >> 8) | ||||
| 	n := uint32(i & 0xff) | ||||
| 	if start+n > uint32(len(_Hash_text)) { | ||||
| 		return "" | ||||
| 	} | ||||
| 	return _Hash_text[start : start+n] | ||||
| } | ||||
| 
 | ||||
| // ToHash returns the hash whose name is s. It returns zero if there is no | ||||
| // such hash. It is case sensitive. | ||||
| func ToHash(s []byte) Hash { | ||||
| 	if len(s) == 0 || len(s) > _Hash_maxLen { | ||||
| 		return 0 | ||||
| 	} | ||||
| 	h := uint32(_Hash_hash0) | ||||
| 	for i := 0; i < len(s); i++ { | ||||
| 		h ^= uint32(s[i]) | ||||
| 		h *= 16777619 | ||||
| 	} | ||||
| 	if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { | ||||
| 		t := _Hash_text[i>>8 : i>>8+i&0xff] | ||||
| 		for i := 0; i < len(s); i++ { | ||||
| 			if t[i] != s[i] { | ||||
| 				goto NEXT | ||||
| 			} | ||||
| 		} | ||||
| 		return i | ||||
| 	} | ||||
| NEXT: | ||||
| 	if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { | ||||
| 		t := _Hash_text[i>>8 : i>>8+i&0xff] | ||||
| 		for i := 0; i < len(s); i++ { | ||||
| 			if t[i] != s[i] { | ||||
| 				return 0 | ||||
| 			} | ||||
| 		} | ||||
| 		return i | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
| 
 | ||||
| const _Hash_hash0 = 0x9acb0442 | ||||
| const _Hash_maxLen = 9 | ||||
| const _Hash_text = "iframemathscriptitlestylesvgxmplaintextarea" | ||||
| 
 | ||||
| var _Hash_table = [1 << 4]Hash{ | ||||
| 	0x0: 0x2308, // textarea | ||||
| 	0x2: 0x6,    // iframe | ||||
| 	0x4: 0xf05,  // title | ||||
| 	0x5: 0x1e09, // plaintext | ||||
| 	0x7: 0x1405, // style | ||||
| 	0x8: 0x604,  // math | ||||
| 	0x9: 0xa06,  // script | ||||
| 	0xa: 0x1903, // svg | ||||
| 	0xb: 0x1c03, // xmp | ||||
| } | ||||
							
								
								
									
										493
									
								
								vendor/github.com/tdewolff/parse/v2/html/lex.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										493
									
								
								vendor/github.com/tdewolff/parse/v2/html/lex.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,493 @@ | |||
| // Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html. | ||||
| package html | ||||
| 
 | ||||
| import ( | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	"github.com/tdewolff/parse/v2" | ||||
| ) | ||||
| 
 | ||||
| // TokenType determines the type of token, eg. a number or a semicolon. | ||||
| type TokenType uint32 | ||||
| 
 | ||||
| // TokenType values. | ||||
| const ( | ||||
| 	ErrorToken TokenType = iota // extra token when errors occur | ||||
| 	CommentToken | ||||
| 	DoctypeToken | ||||
| 	StartTagToken | ||||
| 	StartTagCloseToken | ||||
| 	StartTagVoidToken | ||||
| 	EndTagToken | ||||
| 	AttributeToken | ||||
| 	TextToken | ||||
| 	SvgToken | ||||
| 	MathToken | ||||
| ) | ||||
| 
 | ||||
| // String returns the string representation of a TokenType. | ||||
| func (tt TokenType) String() string { | ||||
| 	switch tt { | ||||
| 	case ErrorToken: | ||||
| 		return "Error" | ||||
| 	case CommentToken: | ||||
| 		return "Comment" | ||||
| 	case DoctypeToken: | ||||
| 		return "Doctype" | ||||
| 	case StartTagToken: | ||||
| 		return "StartTag" | ||||
| 	case StartTagCloseToken: | ||||
| 		return "StartTagClose" | ||||
| 	case StartTagVoidToken: | ||||
| 		return "StartTagVoid" | ||||
| 	case EndTagToken: | ||||
| 		return "EndTag" | ||||
| 	case AttributeToken: | ||||
| 		return "Attribute" | ||||
| 	case TextToken: | ||||
| 		return "Text" | ||||
| 	case SvgToken: | ||||
| 		return "Svg" | ||||
| 	case MathToken: | ||||
| 		return "Math" | ||||
| 	} | ||||
| 	return "Invalid(" + strconv.Itoa(int(tt)) + ")" | ||||
| } | ||||
| 
 | ||||
| //////////////////////////////////////////////////////////////// | ||||
| 
 | ||||
| // Lexer is the state for the lexer. | ||||
| type Lexer struct { | ||||
| 	r   *parse.Input | ||||
| 	err error | ||||
| 
 | ||||
| 	rawTag Hash | ||||
| 	inTag  bool | ||||
| 
 | ||||
| 	text    []byte | ||||
| 	attrVal []byte | ||||
| } | ||||
| 
 | ||||
| // NewLexer returns a new Lexer for a given io.Reader. | ||||
| func NewLexer(r *parse.Input) *Lexer { | ||||
| 	return &Lexer{ | ||||
| 		r: r, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. | ||||
| func (l *Lexer) Err() error { | ||||
| 	if l.err != nil { | ||||
| 		return l.err | ||||
| 	} | ||||
| 	return l.r.Err() | ||||
| } | ||||
| 
 | ||||
| // Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters. | ||||
| func (l *Lexer) Text() []byte { | ||||
| 	return l.text | ||||
| } | ||||
| 
 | ||||
| // AttrVal returns the attribute value when an AttributeToken was returned from Next. | ||||
| func (l *Lexer) AttrVal() []byte { | ||||
| 	return l.attrVal | ||||
| } | ||||
| 
 | ||||
| // Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. | ||||
| func (l *Lexer) Next() (TokenType, []byte) { | ||||
| 	l.text = nil | ||||
| 	var c byte | ||||
| 	if l.inTag { | ||||
| 		l.attrVal = nil | ||||
| 		for { // before attribute name state | ||||
| 			if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { | ||||
| 				l.r.Move(1) | ||||
| 				continue | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		if c == 0 && l.r.Err() != nil { | ||||
| 			return ErrorToken, nil | ||||
| 		} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') { | ||||
| 			return AttributeToken, l.shiftAttribute() | ||||
| 		} | ||||
| 		l.r.Skip() | ||||
| 		l.inTag = false | ||||
| 		if c == '/' { | ||||
| 			l.r.Move(2) | ||||
| 			return StartTagVoidToken, l.r.Shift() | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 		return StartTagCloseToken, l.r.Shift() | ||||
| 	} | ||||
| 
 | ||||
| 	if l.rawTag != 0 { | ||||
| 		if rawText := l.shiftRawText(); len(rawText) > 0 { | ||||
| 			l.rawTag = 0 | ||||
| 			return TextToken, rawText | ||||
| 		} | ||||
| 		l.rawTag = 0 | ||||
| 	} | ||||
| 
 | ||||
| 	for { | ||||
| 		c = l.r.Peek(0) | ||||
| 		if c == '<' { | ||||
| 			c = l.r.Peek(1) | ||||
| 			isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil) | ||||
| 			if l.r.Pos() > 0 { | ||||
| 				if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' { | ||||
| 					// return currently buffered texttoken so that we can return tag next iteration | ||||
| 					l.text = l.r.Shift() | ||||
| 					return TextToken, l.text | ||||
| 				} | ||||
| 			} else if isEndTag { | ||||
| 				l.r.Move(2) | ||||
| 				// only endtags that are not followed by > or EOF arrive here | ||||
| 				if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { | ||||
| 					return CommentToken, l.shiftBogusComment() | ||||
| 				} | ||||
| 				return EndTagToken, l.shiftEndTag() | ||||
| 			} else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { | ||||
| 				l.r.Move(1) | ||||
| 				l.inTag = true | ||||
| 				return l.shiftStartTag() | ||||
| 			} else if c == '!' { | ||||
| 				l.r.Move(2) | ||||
| 				return l.readMarkup() | ||||
| 			} else if c == '?' { | ||||
| 				l.r.Move(1) | ||||
| 				return CommentToken, l.shiftBogusComment() | ||||
| 			} | ||||
| 		} else if c == 0 && l.r.Err() != nil { | ||||
| 			if l.r.Pos() > 0 { | ||||
| 				l.text = l.r.Shift() | ||||
| 				return TextToken, l.text | ||||
| 			} | ||||
| 			return ErrorToken, nil | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| //////////////////////////////////////////////////////////////// | ||||
| 
 | ||||
| // The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html | ||||
| 
 | ||||
| func (l *Lexer) shiftRawText() []byte { | ||||
| 	if l.rawTag == Plaintext { | ||||
| 		for { | ||||
| 			if l.r.Peek(0) == 0 && l.r.Err() != nil { | ||||
| 				return l.r.Shift() | ||||
| 			} | ||||
| 			l.r.Move(1) | ||||
| 		} | ||||
| 	} else { // RCDATA, RAWTEXT and SCRIPT | ||||
| 		for { | ||||
| 			c := l.r.Peek(0) | ||||
| 			if c == '<' { | ||||
| 				if l.r.Peek(1) == '/' { | ||||
| 					mark := l.r.Pos() | ||||
| 					l.r.Move(2) | ||||
| 					for { | ||||
| 						if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { | ||||
| 							break | ||||
| 						} | ||||
| 						l.r.Move(1) | ||||
| 					} | ||||
| 					if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice | ||||
| 						l.r.Rewind(mark) | ||||
| 						return l.r.Shift() | ||||
| 					} | ||||
| 				} else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { | ||||
| 					l.r.Move(4) | ||||
| 					inScript := false | ||||
| 					for { | ||||
| 						c := l.r.Peek(0) | ||||
| 						if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { | ||||
| 							l.r.Move(3) | ||||
| 							break | ||||
| 						} else if c == '<' { | ||||
| 							isEnd := l.r.Peek(1) == '/' | ||||
| 							if isEnd { | ||||
| 								l.r.Move(2) | ||||
| 							} else { | ||||
| 								l.r.Move(1) | ||||
| 							} | ||||
| 							mark := l.r.Pos() | ||||
| 							for { | ||||
| 								if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { | ||||
| 									break | ||||
| 								} | ||||
| 								l.r.Move(1) | ||||
| 							} | ||||
| 							if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice | ||||
| 								if !isEnd { | ||||
| 									inScript = true | ||||
| 								} else { | ||||
| 									if !inScript { | ||||
| 										l.r.Rewind(mark - 2) | ||||
| 										return l.r.Shift() | ||||
| 									} | ||||
| 									inScript = false | ||||
| 								} | ||||
| 							} | ||||
| 						} else if c == 0 && l.r.Err() != nil { | ||||
| 							return l.r.Shift() | ||||
| 						} else { | ||||
| 							l.r.Move(1) | ||||
| 						} | ||||
| 					} | ||||
| 				} else { | ||||
| 					l.r.Move(1) | ||||
| 				} | ||||
| 			} else if c == 0 && l.r.Err() != nil { | ||||
| 				return l.r.Shift() | ||||
| 			} else { | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) readMarkup() (TokenType, []byte) { | ||||
| 	if l.at('-', '-') { | ||||
| 		l.r.Move(2) | ||||
| 		for { | ||||
| 			if l.r.Peek(0) == 0 && l.r.Err() != nil { | ||||
| 				l.text = l.r.Lexeme()[4:] | ||||
| 				return CommentToken, l.r.Shift() | ||||
| 			} else if l.at('-', '-', '>') { | ||||
| 				l.text = l.r.Lexeme()[4:] | ||||
| 				l.r.Move(3) | ||||
| 				return CommentToken, l.r.Shift() | ||||
| 			} else if l.at('-', '-', '!', '>') { | ||||
| 				l.text = l.r.Lexeme()[4:] | ||||
| 				l.r.Move(4) | ||||
| 				return CommentToken, l.r.Shift() | ||||
| 			} | ||||
| 			l.r.Move(1) | ||||
| 		} | ||||
| 	} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') { | ||||
| 		l.r.Move(7) | ||||
| 		for { | ||||
| 			if l.r.Peek(0) == 0 && l.r.Err() != nil { | ||||
| 				l.text = l.r.Lexeme()[9:] | ||||
| 				return TextToken, l.r.Shift() | ||||
| 			} else if l.at(']', ']', '>') { | ||||
| 				l.text = l.r.Lexeme()[9:] | ||||
| 				l.r.Move(3) | ||||
| 				return TextToken, l.r.Shift() | ||||
| 			} | ||||
| 			l.r.Move(1) | ||||
| 		} | ||||
| 	} else { | ||||
| 		if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') { | ||||
| 			l.r.Move(7) | ||||
| 			if l.r.Peek(0) == ' ' { | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 			for { | ||||
| 				if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil { | ||||
| 					l.text = l.r.Lexeme()[9:] | ||||
| 					if c == '>' { | ||||
| 						l.r.Move(1) | ||||
| 					} | ||||
| 					return DoctypeToken, l.r.Shift() | ||||
| 				} | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return CommentToken, l.shiftBogusComment() | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) shiftBogusComment() []byte { | ||||
| 	for { | ||||
| 		c := l.r.Peek(0) | ||||
| 		if c == '>' { | ||||
| 			l.text = l.r.Lexeme()[2:] | ||||
| 			l.r.Move(1) | ||||
| 			return l.r.Shift() | ||||
| 		} else if c == 0 && l.r.Err() != nil { | ||||
| 			l.text = l.r.Lexeme()[2:] | ||||
| 			return l.r.Shift() | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) shiftStartTag() (TokenType, []byte) { | ||||
| 	for { | ||||
| 		if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil { | ||||
| 			break | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| 	l.text = parse.ToLower(l.r.Lexeme()[1:]) | ||||
| 	if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math { | ||||
| 		if h == Svg || h == Math { | ||||
| 			data := l.shiftXML(h) | ||||
| 			if l.err != nil { | ||||
| 				return ErrorToken, nil | ||||
| 			} | ||||
| 
 | ||||
| 			l.inTag = false | ||||
| 			if h == Svg { | ||||
| 				return SvgToken, data | ||||
| 			} | ||||
| 			return MathToken, data | ||||
| 		} | ||||
| 		l.rawTag = h | ||||
| 	} | ||||
| 	return StartTagToken, l.r.Shift() | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) shiftAttribute() []byte { | ||||
| 	nameStart := l.r.Pos() | ||||
| 	var c byte | ||||
| 	for { // attribute name state | ||||
| 		if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil { | ||||
| 			break | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| 	nameEnd := l.r.Pos() | ||||
| 	for { // after attribute name state | ||||
| 		if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { | ||||
| 			l.r.Move(1) | ||||
| 			continue | ||||
| 		} | ||||
| 		break | ||||
| 	} | ||||
| 	if c == '=' { | ||||
| 		l.r.Move(1) | ||||
| 		for { // before attribute value state | ||||
| 			if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { | ||||
| 				l.r.Move(1) | ||||
| 				continue | ||||
| 			} | ||||
| 			break | ||||
| 		} | ||||
| 		attrPos := l.r.Pos() | ||||
| 		delim := c | ||||
| 		if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state | ||||
| 			l.r.Move(1) | ||||
| 			for { | ||||
| 				c := l.r.Peek(0) | ||||
| 				if c == delim { | ||||
| 					l.r.Move(1) | ||||
| 					break | ||||
| 				} else if c == 0 && l.r.Err() != nil { | ||||
| 					break | ||||
| 				} | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 		} else { // attribute value unquoted state | ||||
| 			for { | ||||
| 				if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil { | ||||
| 					break | ||||
| 				} | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 		} | ||||
| 		l.attrVal = l.r.Lexeme()[attrPos:] | ||||
| 	} else { | ||||
| 		l.r.Rewind(nameEnd) | ||||
| 		l.attrVal = nil | ||||
| 	} | ||||
| 	l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd]) | ||||
| 	return l.r.Shift() | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) shiftEndTag() []byte { | ||||
| 	for { | ||||
| 		c := l.r.Peek(0) | ||||
| 		if c == '>' { | ||||
| 			l.text = l.r.Lexeme()[2:] | ||||
| 			l.r.Move(1) | ||||
| 			break | ||||
| 		} else if c == 0 && l.r.Err() != nil { | ||||
| 			l.text = l.r.Lexeme()[2:] | ||||
| 			break | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| 
 | ||||
| 	end := len(l.text) | ||||
| 	for end > 0 { | ||||
| 		if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' { | ||||
| 			end-- | ||||
| 			continue | ||||
| 		} | ||||
| 		break | ||||
| 	} | ||||
| 	l.text = l.text[:end] | ||||
| 	return parse.ToLower(l.r.Shift()) | ||||
| } | ||||
| 
 | ||||
| // shiftXML parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself. | ||||
| // So far we have already parsed `<svg` or `<math`. | ||||
| func (l *Lexer) shiftXML(rawTag Hash) []byte { | ||||
| 	inQuote := false | ||||
| 	for { | ||||
| 		c := l.r.Peek(0) | ||||
| 		if c == '"' { | ||||
| 			inQuote = !inQuote | ||||
| 			l.r.Move(1) | ||||
| 		} else if c == '<' && !inQuote && l.r.Peek(1) == '/' { | ||||
| 			mark := l.r.Pos() | ||||
| 			l.r.Move(2) | ||||
| 			for { | ||||
| 				if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { | ||||
| 					break | ||||
| 				} | ||||
| 				l.r.Move(1) | ||||
| 			} | ||||
| 			if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice | ||||
| 				break | ||||
| 			} | ||||
| 		} else if c == 0 { | ||||
| 			if l.r.Err() == nil { | ||||
| 				l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character") | ||||
| 			} | ||||
| 			return l.r.Shift() | ||||
| 		} else { | ||||
| 			l.r.Move(1) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	for { | ||||
| 		c := l.r.Peek(0) | ||||
| 		if c == '>' { | ||||
| 			l.r.Move(1) | ||||
| 			break | ||||
| 		} else if c == 0 { | ||||
| 			if l.r.Err() == nil { | ||||
| 				l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character") | ||||
| 			} | ||||
| 			return l.r.Shift() | ||||
| 		} | ||||
| 		l.r.Move(1) | ||||
| 	} | ||||
| 	return l.r.Shift() | ||||
| } | ||||
| 
 | ||||
| //////////////////////////////////////////////////////////////// | ||||
| 
 | ||||
| func (l *Lexer) at(b ...byte) bool { | ||||
| 	for i, c := range b { | ||||
| 		if l.r.Peek(i) != c { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
| 
 | ||||
| func (l *Lexer) atCaseInsensitive(b ...byte) bool { | ||||
| 	for i, c := range b { | ||||
| 		if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
							
								
								
									
										103
									
								
								vendor/github.com/tdewolff/parse/v2/html/util.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								vendor/github.com/tdewolff/parse/v2/html/util.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,103 @@ | |||
| package html | ||||
| 
 | ||||
| var ( | ||||
| 	singleQuoteEntityBytes = []byte("'") | ||||
| 	doubleQuoteEntityBytes = []byte(""") | ||||
| ) | ||||
| 
 | ||||
| // EscapeAttrVal returns the escaped attribute value bytes without quotes. | ||||
| func EscapeAttrVal(buf *[]byte, orig, b []byte, isXML bool) []byte { | ||||
| 	singles := 0 | ||||
| 	doubles := 0 | ||||
| 	unquoted := true | ||||
| 	entities := false | ||||
| 	for _, c := range b { | ||||
| 		if charTable[c] { | ||||
| 			unquoted = false | ||||
| 			if c == '"' { | ||||
| 				doubles++ | ||||
| 			} else if c == '\'' { | ||||
| 				singles++ | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if unquoted && !isXML { | ||||
| 		return b | ||||
| 	} else if !entities && len(orig) == len(b)+2 && (singles == 0 && orig[0] == '\'' || doubles == 0 && orig[0] == '"') { | ||||
| 		return orig | ||||
| 	} | ||||
| 
 | ||||
| 	n := len(b) + 2 | ||||
| 	var quote byte | ||||
| 	var escapedQuote []byte | ||||
| 	if singles >= doubles || isXML { | ||||
| 		n += doubles * 4 | ||||
| 		quote = '"' | ||||
| 		escapedQuote = doubleQuoteEntityBytes | ||||
| 	} else { | ||||
| 		n += singles * 4 | ||||
| 		quote = '\'' | ||||
| 		escapedQuote = singleQuoteEntityBytes | ||||
| 	} | ||||
| 	if n > cap(*buf) { | ||||
| 		*buf = make([]byte, 0, n) // maximum size, not actual size | ||||
| 	} | ||||
| 	t := (*buf)[:n] // maximum size, not actual size | ||||
| 	t[0] = quote | ||||
| 	j := 1 | ||||
| 	start := 0 | ||||
| 	for i, c := range b { | ||||
| 		if c == quote { | ||||
| 			j += copy(t[j:], b[start:i]) | ||||
| 			j += copy(t[j:], escapedQuote) | ||||
| 			start = i + 1 | ||||
| 		} | ||||
| 	} | ||||
| 	j += copy(t[j:], b[start:]) | ||||
| 	t[j] = quote | ||||
| 	return t[:j+1] | ||||
| } | ||||
| 
 | ||||
| var charTable = [256]bool{ | ||||
| 	// ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, true, true, false, true, true, false, false, // tab, line feed, form feed, carriage return | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	true, false, true, false, false, false, false, true, // space, "), ' | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, true, true, true, false, // <, =, > | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	true, false, false, false, false, false, false, false, // ` | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	// non-ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| } | ||||
							
								
								
									
										173
									
								
								vendor/github.com/tdewolff/parse/v2/input.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								vendor/github.com/tdewolff/parse/v2/input.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,173 @@ | |||
| package parse | ||||
| 
 | ||||
| import ( | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| ) | ||||
| 
 | ||||
| var nullBuffer = []byte{0} | ||||
| 
 | ||||
| // Input is a buffered reader that allows peeking forward and shifting, taking an io.Input. | ||||
| // It keeps data in-memory until Free, taking a byte length, is called to move beyond the data. | ||||
| type Input struct { | ||||
| 	buf   []byte | ||||
| 	pos   int // index in buf | ||||
| 	start int // index in buf | ||||
| 	err   error | ||||
| 
 | ||||
| 	restore func() | ||||
| } | ||||
| 
 | ||||
| // NewInput returns a new Input for a given io.Input and uses ioutil.ReadAll to read it into a byte slice. | ||||
| // If the io.Input implements Bytes, that is used instead. It will append a NULL at the end of the buffer. | ||||
| func NewInput(r io.Reader) *Input { | ||||
| 	var b []byte | ||||
| 	if r != nil { | ||||
| 		if buffer, ok := r.(interface { | ||||
| 			Bytes() []byte | ||||
| 		}); ok { | ||||
| 			b = buffer.Bytes() | ||||
| 		} else { | ||||
| 			var err error | ||||
| 			b, err = ioutil.ReadAll(r) | ||||
| 			if err != nil { | ||||
| 				return &Input{ | ||||
| 					buf: nullBuffer, | ||||
| 					err: err, | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return NewInputBytes(b) | ||||
| } | ||||
| 
 | ||||
| // NewInputString returns a new Input for a given string and appends NULL at the end. | ||||
| func NewInputString(s string) *Input { | ||||
| 	return NewInputBytes([]byte(s)) | ||||
| } | ||||
| 
 | ||||
| // NewInputBytes returns a new Input for a given byte slice and appends NULL at the end. | ||||
| // To avoid reallocation, make sure the capacity has room for one more byte. | ||||
| func NewInputBytes(b []byte) *Input { | ||||
| 	z := &Input{ | ||||
| 		buf: b, | ||||
| 	} | ||||
| 
 | ||||
| 	n := len(b) | ||||
| 	if n == 0 { | ||||
| 		z.buf = nullBuffer | ||||
| 	} else { | ||||
| 		// Append NULL to buffer, but try to avoid reallocation | ||||
| 		if cap(b) > n { | ||||
| 			// Overwrite next byte but restore when done | ||||
| 			b = b[:n+1] | ||||
| 			c := b[n] | ||||
| 			b[n] = 0 | ||||
| 
 | ||||
| 			z.buf = b | ||||
| 			z.restore = func() { | ||||
| 				b[n] = c | ||||
| 			} | ||||
| 		} else { | ||||
| 			z.buf = append(b, 0) | ||||
| 		} | ||||
| 	} | ||||
| 	return z | ||||
| } | ||||
| 
 | ||||
| // Restore restores the replaced byte past the end of the buffer by NULL. | ||||
| func (z *Input) Restore() { | ||||
| 	if z.restore != nil { | ||||
| 		z.restore() | ||||
| 		z.restore = nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Err returns the error returned from io.Input or io.EOF when the end has been reached. | ||||
| func (z *Input) Err() error { | ||||
| 	return z.PeekErr(0) | ||||
| } | ||||
| 
 | ||||
| // PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err(). | ||||
| func (z *Input) PeekErr(pos int) error { | ||||
| 	if z.err != nil { | ||||
| 		return z.err | ||||
| 	} else if z.pos+pos >= len(z.buf)-1 { | ||||
| 		return io.EOF | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Peek returns the ith byte relative to the end position. | ||||
| // Peek returns 0 when an error has occurred, Err returns the erroz. | ||||
| func (z *Input) Peek(pos int) byte { | ||||
| 	pos += z.pos | ||||
| 	return z.buf[pos] | ||||
| } | ||||
| 
 | ||||
| // PeekRune returns the rune and rune length of the ith byte relative to the end position. | ||||
| func (z *Input) PeekRune(pos int) (rune, int) { | ||||
| 	// from unicode/utf8 | ||||
| 	c := z.Peek(pos) | ||||
| 	if c < 0xC0 || z.Peek(pos+1) == 0 { | ||||
| 		return rune(c), 1 | ||||
| 	} else if c < 0xE0 || z.Peek(pos+2) == 0 { | ||||
| 		return rune(c&0x1F)<<6 | rune(z.Peek(pos+1)&0x3F), 2 | ||||
| 	} else if c < 0xF0 || z.Peek(pos+3) == 0 { | ||||
| 		return rune(c&0x0F)<<12 | rune(z.Peek(pos+1)&0x3F)<<6 | rune(z.Peek(pos+2)&0x3F), 3 | ||||
| 	} | ||||
| 	return rune(c&0x07)<<18 | rune(z.Peek(pos+1)&0x3F)<<12 | rune(z.Peek(pos+2)&0x3F)<<6 | rune(z.Peek(pos+3)&0x3F), 4 | ||||
| } | ||||
| 
 | ||||
| // Move advances the position. | ||||
| func (z *Input) Move(n int) { | ||||
| 	z.pos += n | ||||
| } | ||||
| 
 | ||||
| // Pos returns a mark to which can be rewinded. | ||||
| func (z *Input) Pos() int { | ||||
| 	return z.pos - z.start | ||||
| } | ||||
| 
 | ||||
| // Rewind rewinds the position to the given position. | ||||
| func (z *Input) Rewind(pos int) { | ||||
| 	z.pos = z.start + pos | ||||
| } | ||||
| 
 | ||||
| // Lexeme returns the bytes of the current selection. | ||||
| func (z *Input) Lexeme() []byte { | ||||
| 	return z.buf[z.start:z.pos:z.pos] | ||||
| } | ||||
| 
 | ||||
| // Skip collapses the position to the end of the selection. | ||||
| func (z *Input) Skip() { | ||||
| 	z.start = z.pos | ||||
| } | ||||
| 
 | ||||
| // Shift returns the bytes of the current selection and collapses the position to the end of the selection. | ||||
| func (z *Input) Shift() []byte { | ||||
| 	b := z.buf[z.start:z.pos:z.pos] | ||||
| 	z.start = z.pos | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // Offset returns the character position in the buffez. | ||||
| func (z *Input) Offset() int { | ||||
| 	return z.pos | ||||
| } | ||||
| 
 | ||||
| // Bytes returns the underlying buffez. | ||||
| func (z *Input) Bytes() []byte { | ||||
| 	return z.buf[: len(z.buf)-1 : len(z.buf)-1] | ||||
| } | ||||
| 
 | ||||
| // Len returns the length of the underlying buffez. | ||||
| func (z *Input) Len() int { | ||||
| 	return len(z.buf) - 1 | ||||
| } | ||||
| 
 | ||||
| // Reset resets position to the underlying buffez. | ||||
| func (z *Input) Reset() { | ||||
| 	z.start = 0 | ||||
| 	z.pos = 0 | ||||
| } | ||||
							
								
								
									
										95
									
								
								vendor/github.com/tdewolff/parse/v2/position.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								vendor/github.com/tdewolff/parse/v2/position.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,95 @@ | |||
| package parse | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"strings" | ||||
| 	"unicode" | ||||
| ) | ||||
| 
 | ||||
| // Position returns the line and column number for a certain position in a file. It is useful for recovering the position in a file that caused an error. | ||||
| // It only treates \n, \r, and \r\n as newlines, which might be different from some languages also recognizing \f, \u2028, and \u2029 to be newlines. | ||||
| func Position(r io.Reader, offset int) (line, col int, context string) { | ||||
| 	l := NewInput(r) | ||||
| 	line = 1 | ||||
| 	for l.Pos() < offset { | ||||
| 		c := l.Peek(0) | ||||
| 		n := 1 | ||||
| 		newline := false | ||||
| 		if c == '\n' { | ||||
| 			newline = true | ||||
| 		} else if c == '\r' { | ||||
| 			if l.Peek(1) == '\n' { | ||||
| 				newline = true | ||||
| 				n = 2 | ||||
| 			} else { | ||||
| 				newline = true | ||||
| 			} | ||||
| 		} else if c >= 0xC0 { | ||||
| 			var r rune | ||||
| 			if r, n = l.PeekRune(0); r == '\u2028' || r == '\u2029' { | ||||
| 				newline = true | ||||
| 			} | ||||
| 		} else if c == 0 && l.Err() != nil { | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		if 1 < n && offset < l.Pos()+n { | ||||
| 			break | ||||
| 		} | ||||
| 		l.Move(n) | ||||
| 
 | ||||
| 		if newline { | ||||
| 			line++ | ||||
| 			offset -= l.Pos() | ||||
| 			l.Skip() | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	col = len([]rune(string(l.Lexeme()))) + 1 | ||||
| 	context = positionContext(l, line, col) | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| func positionContext(l *Input, line, col int) (context string) { | ||||
| 	for { | ||||
| 		c := l.Peek(0) | ||||
| 		if c == 0 && l.Err() != nil || c == '\n' || c == '\r' { | ||||
| 			break | ||||
| 		} | ||||
| 		l.Move(1) | ||||
| 	} | ||||
| 	rs := []rune(string(l.Lexeme())) | ||||
| 
 | ||||
| 	// cut off front or rear of context to stay between 60 characters | ||||
| 	limit := 60 | ||||
| 	offset := 20 | ||||
| 	ellipsisFront := "" | ||||
| 	ellipsisRear := "" | ||||
| 	if limit < len(rs) { | ||||
| 		if col <= limit-offset { | ||||
| 			ellipsisRear = "..." | ||||
| 			rs = rs[:limit-3] | ||||
| 		} else if col >= len(rs)-offset-3 { | ||||
| 			ellipsisFront = "..." | ||||
| 			col -= len(rs) - offset - offset - 7 | ||||
| 			rs = rs[len(rs)-offset-offset-4:] | ||||
| 		} else { | ||||
| 			ellipsisFront = "..." | ||||
| 			ellipsisRear = "..." | ||||
| 			rs = rs[col-offset-1 : col+offset] | ||||
| 			col = offset + 4 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// replace unprintable characters by a space | ||||
| 	for i, r := range rs { | ||||
| 		if !unicode.IsGraphic(r) { | ||||
| 			rs[i] = '·' | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	context += fmt.Sprintf("%5d: %s%s%s\n", line, ellipsisFront, string(rs), ellipsisRear) | ||||
| 	context += fmt.Sprintf("%s^", strings.Repeat(" ", 6+col)) | ||||
| 	return | ||||
| } | ||||
							
								
								
									
										257
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/float.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										257
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/float.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,257 @@ | |||
| package strconv | ||||
| 
 | ||||
| import ( | ||||
| 	"math" | ||||
| ) | ||||
| 
 | ||||
| var float64pow10 = []float64{ | ||||
| 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, | ||||
| 	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, | ||||
| 	1e20, 1e21, 1e22, | ||||
| } | ||||
| 
 | ||||
| // ParseFloat parses a byte-slice and returns the float it represents. | ||||
| // If an invalid character is encountered, it will stop there. | ||||
| func ParseFloat(b []byte) (float64, int) { | ||||
| 	i := 0 | ||||
| 	neg := false | ||||
| 	if i < len(b) && (b[i] == '+' || b[i] == '-') { | ||||
| 		neg = b[i] == '-' | ||||
| 		i++ | ||||
| 	} | ||||
| 	start := i | ||||
| 	dot := -1 | ||||
| 	trunk := -1 | ||||
| 	n := uint64(0) | ||||
| 	for ; i < len(b); i++ { | ||||
| 		c := b[i] | ||||
| 		if c >= '0' && c <= '9' { | ||||
| 			if trunk == -1 { | ||||
| 				if n > math.MaxUint64/10 { | ||||
| 					trunk = i | ||||
| 				} else { | ||||
| 					n *= 10 | ||||
| 					n += uint64(c - '0') | ||||
| 				} | ||||
| 			} | ||||
| 		} else if dot == -1 && c == '.' { | ||||
| 			dot = i | ||||
| 		} else { | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	if i == start || i == start+1 && dot == start { | ||||
| 		return 0.0, 0 | ||||
| 	} | ||||
| 
 | ||||
| 	f := float64(n) | ||||
| 	if neg { | ||||
| 		f = -f | ||||
| 	} | ||||
| 
 | ||||
| 	mantExp := int64(0) | ||||
| 	if dot != -1 { | ||||
| 		if trunk == -1 { | ||||
| 			trunk = i | ||||
| 		} | ||||
| 		mantExp = int64(trunk - dot - 1) | ||||
| 	} else if trunk != -1 { | ||||
| 		mantExp = int64(trunk - i) | ||||
| 	} | ||||
| 	expExp := int64(0) | ||||
| 	if i < len(b) && (b[i] == 'e' || b[i] == 'E') { | ||||
| 		startExp := i | ||||
| 		i++ | ||||
| 		if e, expLen := ParseInt(b[i:]); expLen > 0 { | ||||
| 			expExp = e | ||||
| 			i += expLen | ||||
| 		} else { | ||||
| 			i = startExp | ||||
| 		} | ||||
| 	} | ||||
| 	exp := expExp - mantExp | ||||
| 
 | ||||
| 	// copied from strconv/atof.go | ||||
| 	if exp == 0 { | ||||
| 		return f, i | ||||
| 	} else if exp > 0 && exp <= 15+22 { // int * 10^k | ||||
| 		// If exponent is big but number of digits is not, | ||||
| 		// can move a few zeros into the integer part. | ||||
| 		if exp > 22 { | ||||
| 			f *= float64pow10[exp-22] | ||||
| 			exp = 22 | ||||
| 		} | ||||
| 		if f <= 1e15 && f >= -1e15 { | ||||
| 			return f * float64pow10[exp], i | ||||
| 		} | ||||
| 	} else if exp < 0 && exp >= -22 { // int / 10^k | ||||
| 		return f / float64pow10[-exp], i | ||||
| 	} | ||||
| 	f *= math.Pow10(int(-mantExp)) | ||||
| 	return f * math.Pow10(int(expExp)), i | ||||
| } | ||||
| 
 | ||||
| const log2 = 0.3010299956639812 | ||||
| 
 | ||||
| func float64exp(f float64) int { | ||||
| 	exp2 := 0 | ||||
| 	if f != 0.0 { | ||||
| 		x := math.Float64bits(f) | ||||
| 		exp2 = int(x>>(64-11-1))&0x7FF - 1023 + 1 | ||||
| 	} | ||||
| 
 | ||||
| 	exp10 := float64(exp2) * log2 | ||||
| 	if exp10 < 0 { | ||||
| 		exp10 -= 1.0 | ||||
| 	} | ||||
| 	return int(exp10) | ||||
| } | ||||
| 
 | ||||
| // AppendFloat appends a float to `b` with precision `prec`. It returns the new slice and whether successful or not. Precision is the number of decimals to display, thus prec + 1 == number of significant digits. | ||||
| func AppendFloat(b []byte, f float64, prec int) ([]byte, bool) { | ||||
| 	if math.IsNaN(f) || math.IsInf(f, 0) { | ||||
| 		return b, false | ||||
| 	} | ||||
| 
 | ||||
| 	neg := false | ||||
| 	if f < 0.0 { | ||||
| 		f = -f | ||||
| 		neg = true | ||||
| 	} | ||||
| 	if prec < 0 || 17 < prec { | ||||
| 		prec = 17 // maximum number of significant digits in double | ||||
| 	} | ||||
| 	prec -= float64exp(f) // number of digits in front of the dot | ||||
| 	f *= math.Pow10(prec) | ||||
| 
 | ||||
| 	// calculate mantissa and exponent | ||||
| 	mant := int64(f) | ||||
| 	mantLen := LenInt(mant) | ||||
| 	mantExp := mantLen - prec - 1 | ||||
| 	if mant == 0 { | ||||
| 		return append(b, '0'), true | ||||
| 	} | ||||
| 
 | ||||
| 	// expLen is zero for positive exponents, because positive exponents are determined later on in the big conversion loop | ||||
| 	exp := 0 | ||||
| 	expLen := 0 | ||||
| 	if mantExp > 0 { | ||||
| 		// positive exponent is determined in the loop below | ||||
| 		// but if we initially decreased the exponent to fit in an integer, we can't set the new exponent in the loop alone, | ||||
| 		// since the number of zeros at the end determines the positive exponent in the loop, and we just artificially lost zeros | ||||
| 		if prec < 0 { | ||||
| 			exp = mantExp | ||||
| 		} | ||||
| 		expLen = 1 + LenInt(int64(exp)) // e + digits | ||||
| 	} else if mantExp < -3 { | ||||
| 		exp = mantExp | ||||
| 		expLen = 2 + LenInt(int64(exp)) // e + minus + digits | ||||
| 	} else if mantExp < -1 { | ||||
| 		mantLen += -mantExp - 1 // extra zero between dot and first digit | ||||
| 	} | ||||
| 
 | ||||
| 	// reserve space in b | ||||
| 	i := len(b) | ||||
| 	maxLen := 1 + mantLen + expLen // dot + mantissa digits + exponent | ||||
| 	if neg { | ||||
| 		maxLen++ | ||||
| 	} | ||||
| 	if i+maxLen > cap(b) { | ||||
| 		b = append(b, make([]byte, maxLen)...) | ||||
| 	} else { | ||||
| 		b = b[:i+maxLen] | ||||
| 	} | ||||
| 
 | ||||
| 	// write to string representation | ||||
| 	if neg { | ||||
| 		b[i] = '-' | ||||
| 		i++ | ||||
| 	} | ||||
| 
 | ||||
| 	// big conversion loop, start at the end and move to the front | ||||
| 	// initially print trailing zeros and remove them later on | ||||
| 	// for example if the first non-zero digit is three positions in front of the dot, it will overwrite the zeros with a positive exponent | ||||
| 	zero := true | ||||
| 	last := i + mantLen      // right-most position of digit that is non-zero + dot | ||||
| 	dot := last - prec - exp // position of dot | ||||
| 	j := last | ||||
| 	for mant > 0 { | ||||
| 		if j == dot { | ||||
| 			b[j] = '.' | ||||
| 			j-- | ||||
| 		} | ||||
| 		newMant := mant / 10 | ||||
| 		digit := mant - 10*newMant | ||||
| 		if zero && digit > 0 { | ||||
| 			// first non-zero digit, if we are still behind the dot we can trim the end to this position | ||||
| 			// otherwise trim to the dot (including the dot) | ||||
| 			if j > dot { | ||||
| 				i = j + 1 | ||||
| 				// decrease negative exponent further to get rid of dot | ||||
| 				if exp < 0 { | ||||
| 					newExp := exp - (j - dot) | ||||
| 					// getting rid of the dot shouldn't lower the exponent to more digits (e.g. -9 -> -10) | ||||
| 					if LenInt(int64(newExp)) == LenInt(int64(exp)) { | ||||
| 						exp = newExp | ||||
| 						dot = j | ||||
| 						j-- | ||||
| 						i-- | ||||
| 					} | ||||
| 				} | ||||
| 			} else { | ||||
| 				i = dot | ||||
| 			} | ||||
| 			last = j | ||||
| 			zero = false | ||||
| 		} | ||||
| 		b[j] = '0' + byte(digit) | ||||
| 		j-- | ||||
| 		mant = newMant | ||||
| 	} | ||||
| 
 | ||||
| 	if j > dot { | ||||
| 		// extra zeros behind the dot | ||||
| 		for j > dot { | ||||
| 			b[j] = '0' | ||||
| 			j-- | ||||
| 		} | ||||
| 		b[j] = '.' | ||||
| 	} else if last+3 < dot { | ||||
| 		// add positive exponent because we have 3 or more zeros in front of the dot | ||||
| 		i = last + 1 | ||||
| 		exp = dot - last - 1 | ||||
| 	} else if j == dot { | ||||
| 		// handle 0.1 | ||||
| 		b[j] = '.' | ||||
| 	} | ||||
| 
 | ||||
| 	// exponent | ||||
| 	if exp != 0 { | ||||
| 		if exp == 1 { | ||||
| 			b[i] = '0' | ||||
| 			i++ | ||||
| 		} else if exp == 2 { | ||||
| 			b[i] = '0' | ||||
| 			b[i+1] = '0' | ||||
| 			i += 2 | ||||
| 		} else { | ||||
| 			b[i] = 'e' | ||||
| 			i++ | ||||
| 			if exp < 0 { | ||||
| 				b[i] = '-' | ||||
| 				i++ | ||||
| 				exp = -exp | ||||
| 			} | ||||
| 			i += LenInt(int64(exp)) | ||||
| 			j := i | ||||
| 			for exp > 0 { | ||||
| 				newExp := exp / 10 | ||||
| 				digit := exp - 10*newExp | ||||
| 				j-- | ||||
| 				b[j] = '0' + byte(digit) | ||||
| 				exp = newExp | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return b[:i], true | ||||
| } | ||||
							
								
								
									
										88
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/int.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/int.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,88 @@ | |||
| package strconv | ||||
| 
 | ||||
| import ( | ||||
| 	"math" | ||||
| ) | ||||
| 
 | ||||
| // ParseInt parses a byte-slice and returns the integer it represents. | ||||
| // If an invalid character is encountered, it will stop there. | ||||
| func ParseInt(b []byte) (int64, int) { | ||||
| 	i := 0 | ||||
| 	neg := false | ||||
| 	if len(b) > 0 && (b[0] == '+' || b[0] == '-') { | ||||
| 		neg = b[0] == '-' | ||||
| 		i++ | ||||
| 	} | ||||
| 	start := i | ||||
| 	n := uint64(0) | ||||
| 	for i < len(b) { | ||||
| 		c := b[i] | ||||
| 		if n > math.MaxUint64/10 { | ||||
| 			return 0, 0 | ||||
| 		} else if c >= '0' && c <= '9' { | ||||
| 			n *= 10 | ||||
| 			n += uint64(c - '0') | ||||
| 		} else { | ||||
| 			break | ||||
| 		} | ||||
| 		i++ | ||||
| 	} | ||||
| 	if i == start { | ||||
| 		return 0, 0 | ||||
| 	} | ||||
| 	if !neg && n > uint64(math.MaxInt64) || n > uint64(math.MaxInt64)+1 { | ||||
| 		return 0, 0 | ||||
| 	} else if neg { | ||||
| 		return -int64(n), i | ||||
| 	} | ||||
| 	return int64(n), i | ||||
| } | ||||
| 
 | ||||
| // LenInt returns the written length of an integer. | ||||
| func LenInt(i int64) int { | ||||
| 	if i < 0 { | ||||
| 		if i == -9223372036854775808 { | ||||
| 			return 19 | ||||
| 		} | ||||
| 		i = -i | ||||
| 	} | ||||
| 	switch { | ||||
| 	case i < 10: | ||||
| 		return 1 | ||||
| 	case i < 100: | ||||
| 		return 2 | ||||
| 	case i < 1000: | ||||
| 		return 3 | ||||
| 	case i < 10000: | ||||
| 		return 4 | ||||
| 	case i < 100000: | ||||
| 		return 5 | ||||
| 	case i < 1000000: | ||||
| 		return 6 | ||||
| 	case i < 10000000: | ||||
| 		return 7 | ||||
| 	case i < 100000000: | ||||
| 		return 8 | ||||
| 	case i < 1000000000: | ||||
| 		return 9 | ||||
| 	case i < 10000000000: | ||||
| 		return 10 | ||||
| 	case i < 100000000000: | ||||
| 		return 11 | ||||
| 	case i < 1000000000000: | ||||
| 		return 12 | ||||
| 	case i < 10000000000000: | ||||
| 		return 13 | ||||
| 	case i < 100000000000000: | ||||
| 		return 14 | ||||
| 	case i < 1000000000000000: | ||||
| 		return 15 | ||||
| 	case i < 10000000000000000: | ||||
| 		return 16 | ||||
| 	case i < 100000000000000000: | ||||
| 		return 17 | ||||
| 	case i < 1000000000000000000: | ||||
| 		return 18 | ||||
| 	} | ||||
| 	return 19 | ||||
| } | ||||
							
								
								
									
										83
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/price.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								vendor/github.com/tdewolff/parse/v2/strconv/price.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,83 @@ | |||
| package strconv | ||||
| 
 | ||||
| // AppendPrice will append an int64 formatted as a price, where the int64 is the price in cents. | ||||
| // It does not display whether a price is negative or not. | ||||
| func AppendPrice(b []byte, price int64, dec bool, milSeparator byte, decSeparator byte) []byte { | ||||
| 	if price < 0 { | ||||
| 		if price == -9223372036854775808 { | ||||
| 			x := []byte("92 233 720 368 547 758 08") | ||||
| 			x[2] = milSeparator | ||||
| 			x[6] = milSeparator | ||||
| 			x[10] = milSeparator | ||||
| 			x[14] = milSeparator | ||||
| 			x[18] = milSeparator | ||||
| 			x[22] = decSeparator | ||||
| 			return append(b, x...) | ||||
| 		} | ||||
| 		price = -price | ||||
| 	} | ||||
| 
 | ||||
| 	// rounding | ||||
| 	if !dec { | ||||
| 		firstDec := (price / 10) % 10 | ||||
| 		if firstDec >= 5 { | ||||
| 			price += 100 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// calculate size | ||||
| 	n := LenInt(price) - 2 | ||||
| 	if n > 0 { | ||||
| 		n += (n - 1) / 3 // mil separator | ||||
| 	} else { | ||||
| 		n = 1 | ||||
| 	} | ||||
| 	if dec { | ||||
| 		n += 2 + 1 // decimals + dec separator | ||||
| 	} | ||||
| 
 | ||||
| 	// resize byte slice | ||||
| 	i := len(b) | ||||
| 	if i+n > cap(b) { | ||||
| 		b = append(b, make([]byte, n)...) | ||||
| 	} else { | ||||
| 		b = b[:i+n] | ||||
| 	} | ||||
| 
 | ||||
| 	// print fractional-part | ||||
| 	i += n - 1 | ||||
| 	if dec { | ||||
| 		for j := 0; j < 2; j++ { | ||||
| 			c := byte(price%10) + '0' | ||||
| 			price /= 10 | ||||
| 			b[i] = c | ||||
| 			i-- | ||||
| 		} | ||||
| 		b[i] = decSeparator | ||||
| 		i-- | ||||
| 	} else { | ||||
| 		price /= 100 | ||||
| 	} | ||||
| 
 | ||||
| 	if price == 0 { | ||||
| 		b[i] = '0' | ||||
| 		return b | ||||
| 	} | ||||
| 
 | ||||
| 	// print integer-part | ||||
| 	j := 0 | ||||
| 	for price > 0 { | ||||
| 		if j == 3 { | ||||
| 			b[i] = milSeparator | ||||
| 			i-- | ||||
| 			j = 0 | ||||
| 		} | ||||
| 
 | ||||
| 		c := byte(price%10) + '0' | ||||
| 		price /= 10 | ||||
| 		b[i] = c | ||||
| 		i-- | ||||
| 		j++ | ||||
| 	} | ||||
| 	return b | ||||
| } | ||||
							
								
								
									
										489
									
								
								vendor/github.com/tdewolff/parse/v2/util.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										489
									
								
								vendor/github.com/tdewolff/parse/v2/util.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,489 @@ | |||
| package parse | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"unicode" | ||||
| ) | ||||
| 
 | ||||
| // Copy returns a copy of the given byte slice. | ||||
| func Copy(src []byte) (dst []byte) { | ||||
| 	dst = make([]byte, len(src)) | ||||
| 	copy(dst, src) | ||||
| 	return | ||||
| } | ||||
| 
 | ||||
| // ToLower converts all characters in the byte slice from A-Z to a-z. | ||||
| func ToLower(src []byte) []byte { | ||||
| 	for i, c := range src { | ||||
| 		if c >= 'A' && c <= 'Z' { | ||||
| 			src[i] = c + ('a' - 'A') | ||||
| 		} | ||||
| 	} | ||||
| 	return src | ||||
| } | ||||
| 
 | ||||
| // EqualFold returns true when s matches case-insensitively the targetLower (which must be lowercase). | ||||
| func EqualFold(s, targetLower []byte) bool { | ||||
| 	if len(s) != len(targetLower) { | ||||
| 		return false | ||||
| 	} | ||||
| 	for i, c := range targetLower { | ||||
| 		d := s[i] | ||||
| 		if d != c && (d < 'A' || d > 'Z' || d+('a'-'A') != c) { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
| 
 | ||||
| // Printable returns a printable string for given rune | ||||
| func Printable(r rune) string { | ||||
| 	if unicode.IsGraphic(r) { | ||||
| 		return fmt.Sprintf("%c", r) | ||||
| 	} else if r < 128 { | ||||
| 		return fmt.Sprintf("0x%02X", r) | ||||
| 	} | ||||
| 	return fmt.Sprintf("%U", r) | ||||
| } | ||||
| 
 | ||||
| var whitespaceTable = [256]bool{ | ||||
| 	// ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, true, true, false, true, true, false, false, // tab, new line, form feed, carriage return | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	true, false, false, false, false, false, false, false, // space | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	// non-ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| } | ||||
| 
 | ||||
| // IsWhitespace returns true for space, \n, \r, \t, \f. | ||||
| func IsWhitespace(c byte) bool { | ||||
| 	return whitespaceTable[c] | ||||
| } | ||||
| 
 | ||||
| var newlineTable = [256]bool{ | ||||
| 	// ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, true, false, false, true, false, false, // new line, carriage return | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	// non-ASCII | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| } | ||||
| 
 | ||||
| // IsNewline returns true for \n, \r. | ||||
| func IsNewline(c byte) bool { | ||||
| 	return newlineTable[c] | ||||
| } | ||||
| 
 | ||||
| // IsAllWhitespace returns true when the entire byte slice consists of space, \n, \r, \t, \f. | ||||
| func IsAllWhitespace(b []byte) bool { | ||||
| 	for _, c := range b { | ||||
| 		if !IsWhitespace(c) { | ||||
| 			return false | ||||
| 		} | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
| 
 | ||||
| // TrimWhitespace removes any leading and trailing whitespace characters. | ||||
| func TrimWhitespace(b []byte) []byte { | ||||
| 	n := len(b) | ||||
| 	start := n | ||||
| 	for i := 0; i < n; i++ { | ||||
| 		if !IsWhitespace(b[i]) { | ||||
| 			start = i | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	end := n | ||||
| 	for i := n - 1; i >= start; i-- { | ||||
| 		if !IsWhitespace(b[i]) { | ||||
| 			end = i + 1 | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	return b[start:end] | ||||
| } | ||||
| 
 | ||||
| // ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r). | ||||
| func ReplaceMultipleWhitespace(b []byte) []byte { | ||||
| 	j, k := 0, 0 // j is write position, k is start of next text section | ||||
| 	for i := 0; i < len(b); i++ { | ||||
| 		if IsWhitespace(b[i]) { | ||||
| 			start := i | ||||
| 			newline := IsNewline(b[i]) | ||||
| 			i++ | ||||
| 			for ; i < len(b) && IsWhitespace(b[i]); i++ { | ||||
| 				if IsNewline(b[i]) { | ||||
| 					newline = true | ||||
| 				} | ||||
| 			} | ||||
| 			if newline { | ||||
| 				b[start] = '\n' | ||||
| 			} else { | ||||
| 				b[start] = ' ' | ||||
| 			} | ||||
| 			if 1 < i-start { // more than one whitespace | ||||
| 				if j == 0 { | ||||
| 					j = start + 1 | ||||
| 				} else { | ||||
| 					j += copy(b[j:], b[k:start+1]) | ||||
| 				} | ||||
| 				k = i | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	if j == 0 { | ||||
| 		return b | ||||
| 	} else if j == 1 { // only if starts with whitespace | ||||
| 		b[k-1] = b[0] | ||||
| 		return b[k-1:] | ||||
| 	} else if k < len(b) { | ||||
| 		j += copy(b[j:], b[k:]) | ||||
| 	} | ||||
| 	return b[:j] | ||||
| } | ||||
| 
 | ||||
| // replaceEntities will replace in b at index i, assuming that b[i] == '&' and that i+3<len(b). The returned int will be the last character of the entity, so that the next iteration can safely do i++ to continue and not miss any entitites. | ||||
| func replaceEntities(b []byte, i int, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) ([]byte, int) { | ||||
| 	const MaxEntityLength = 31 // longest HTML entity: CounterClockwiseContourIntegral | ||||
| 	var r []byte | ||||
| 	j := i + 1 | ||||
| 	if b[j] == '#' { | ||||
| 		j++ | ||||
| 		if b[j] == 'x' { | ||||
| 			j++ | ||||
| 			c := 0 | ||||
| 			for ; j < len(b) && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ { | ||||
| 				if b[j] <= '9' { | ||||
| 					c = c<<4 + int(b[j]-'0') | ||||
| 				} else if b[j] <= 'F' { | ||||
| 					c = c<<4 + int(b[j]-'A') + 10 | ||||
| 				} else if b[j] <= 'f' { | ||||
| 					c = c<<4 + int(b[j]-'a') + 10 | ||||
| 				} | ||||
| 			} | ||||
| 			if j <= i+3 || 10000 <= c { | ||||
| 				return b, j - 1 | ||||
| 			} | ||||
| 			if c < 128 { | ||||
| 				r = []byte{byte(c)} | ||||
| 			} else { | ||||
| 				r = append(r, '&', '#') | ||||
| 				r = strconv.AppendInt(r, int64(c), 10) | ||||
| 				r = append(r, ';') | ||||
| 			} | ||||
| 		} else { | ||||
| 			c := 0 | ||||
| 			for ; j < len(b) && c < 128 && b[j] >= '0' && b[j] <= '9'; j++ { | ||||
| 				c = c*10 + int(b[j]-'0') | ||||
| 			} | ||||
| 			if j <= i+2 || 128 <= c { | ||||
| 				return b, j - 1 | ||||
| 			} | ||||
| 			r = []byte{byte(c)} | ||||
| 		} | ||||
| 	} else { | ||||
| 		for ; j < len(b) && j-i-1 <= MaxEntityLength && b[j] != ';'; j++ { | ||||
| 		} | ||||
| 		if j <= i+1 || len(b) <= j { | ||||
| 			return b, j - 1 | ||||
| 		} | ||||
| 
 | ||||
| 		var ok bool | ||||
| 		r, ok = entitiesMap[string(b[i+1:j])] | ||||
| 		if !ok { | ||||
| 			return b, j | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// j is at semicolon | ||||
| 	n := j + 1 - i | ||||
| 	if j < len(b) && b[j] == ';' && 2 < n { | ||||
| 		if len(r) == 1 { | ||||
| 			if q, ok := revEntitiesMap[r[0]]; ok { | ||||
| 				if len(q) == len(b[i:j+1]) && bytes.Equal(q, b[i:j+1]) { | ||||
| 					return b, j | ||||
| 				} | ||||
| 				r = q | ||||
| 			} else if r[0] == '&' { | ||||
| 				// check if for example & is followed by something that could potentially be an entity | ||||
| 				k := j + 1 | ||||
| 				if k < len(b) && b[k] == '#' { | ||||
| 					k++ | ||||
| 				} | ||||
| 				for ; k < len(b) && k-j <= MaxEntityLength && (b[k] >= '0' && b[k] <= '9' || b[k] >= 'a' && b[k] <= 'z' || b[k] >= 'A' && b[k] <= 'Z'); k++ { | ||||
| 				} | ||||
| 				if k < len(b) && b[k] == ';' { | ||||
| 					return b, k | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		copy(b[i:], r) | ||||
| 		copy(b[i+len(r):], b[j+1:]) | ||||
| 		b = b[:len(b)-n+len(r)] | ||||
| 		return b, i + len(r) - 1 | ||||
| 	} | ||||
| 	return b, i | ||||
| } | ||||
| 
 | ||||
| // ReplaceEntities replaces all occurrences of entites (such as ") to their respective unencoded bytes. | ||||
| func ReplaceEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte { | ||||
| 	for i := 0; i < len(b); i++ { | ||||
| 		if b[i] == '&' && i+3 < len(b) { | ||||
| 			b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap) | ||||
| 		} | ||||
| 	} | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // ReplaceMultipleWhitespaceAndEntities is a combination of ReplaceMultipleWhitespace and ReplaceEntities. It is faster than executing both sequentially. | ||||
| func ReplaceMultipleWhitespaceAndEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte { | ||||
| 	j, k := 0, 0 // j is write position, k is start of next text section | ||||
| 	for i := 0; i < len(b); i++ { | ||||
| 		if IsWhitespace(b[i]) { | ||||
| 			start := i | ||||
| 			newline := IsNewline(b[i]) | ||||
| 			i++ | ||||
| 			for ; i < len(b) && IsWhitespace(b[i]); i++ { | ||||
| 				if IsNewline(b[i]) { | ||||
| 					newline = true | ||||
| 				} | ||||
| 			} | ||||
| 			if newline { | ||||
| 				b[start] = '\n' | ||||
| 			} else { | ||||
| 				b[start] = ' ' | ||||
| 			} | ||||
| 			if 1 < i-start { // more than one whitespace | ||||
| 				if j == 0 { | ||||
| 					j = start + 1 | ||||
| 				} else { | ||||
| 					j += copy(b[j:], b[k:start+1]) | ||||
| 				} | ||||
| 				k = i | ||||
| 			} | ||||
| 		} | ||||
| 		if i+3 < len(b) && b[i] == '&' { | ||||
| 			b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap) | ||||
| 		} | ||||
| 	} | ||||
| 	if j == 0 { | ||||
| 		return b | ||||
| 	} else if j == 1 { // only if starts with whitespace | ||||
| 		b[k-1] = b[0] | ||||
| 		return b[k-1:] | ||||
| 	} else if k < len(b) { | ||||
| 		j += copy(b[j:], b[k:]) | ||||
| 	} | ||||
| 	return b[:j] | ||||
| } | ||||
| 
 | ||||
| // URLEncodingTable is a charmap for which characters need escaping in the URL encoding scheme | ||||
| var URLEncodingTable = [256]bool{ | ||||
| 	// ASCII | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, false, true, true, true, true, true, false, // space, ", #, $, %, & | ||||
| 	false, false, false, true, true, false, false, true, // +, comma, / | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, true, true, true, true, true, true, // :, ;, <, =, >, ? | ||||
| 
 | ||||
| 	true, false, false, false, false, false, false, false, // @ | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, true, true, true, true, false, // [, \, ], ^ | ||||
| 
 | ||||
| 	true, false, false, false, false, false, false, false, // ` | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, true, true, true, false, true, // {, |, }, DEL | ||||
| 
 | ||||
| 	// non-ASCII | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| } | ||||
| 
 | ||||
| // DataURIEncodingTable is a charmap for which characters need escaping in the Data URI encoding scheme | ||||
| // Escape only non-printable characters, unicode and %, #, &. IE11 additionally requires encoding of | ||||
| // \, [, ], ", <, >, `, {, }, |, ^ which is not required by Chrome, Firefox, Opera, Edge, Safari, Yandex | ||||
| var DataURIEncodingTable = [256]bool{ | ||||
| 	// ASCII | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	false, false, true, true, false, true, true, false, // ", #, %, & | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, true, false, true, false, // <, > | ||||
| 
 | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, true, true, true, true, false, // [, \, ], ^ | ||||
| 
 | ||||
| 	true, false, false, false, false, false, false, false, // ` | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, false, false, false, false, false, | ||||
| 	false, false, false, true, true, true, false, true, // {, |, }, DEL | ||||
| 
 | ||||
| 	// non-ASCII | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 
 | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| 	true, true, true, true, true, true, true, true, | ||||
| } | ||||
| 
 | ||||
| // EncodeURL encodes bytes using the URL encoding scheme | ||||
| func EncodeURL(b []byte, table [256]bool) []byte { | ||||
| 	for i := 0; i < len(b); i++ { | ||||
| 		c := b[i] | ||||
| 		if table[c] { | ||||
| 			if c == ' ' { | ||||
| 				b[i] = '+' | ||||
| 			} else { | ||||
| 				b = append(b, 0, 0) | ||||
| 				copy(b[i+3:], b[i+1:]) | ||||
| 				b[i+0] = '%' | ||||
| 				b[i+1] = "0123456789ABCDEF"[c>>4] | ||||
| 				b[i+2] = "0123456789ABCDEF"[c&15] | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // DecodeURL decodes an URL encoded using the URL encoding scheme | ||||
| func DecodeURL(b []byte) []byte { | ||||
| 	for i := 0; i < len(b); i++ { | ||||
| 		if b[i] == '%' && i+2 < len(b) { | ||||
| 			j := i + 1 | ||||
| 			c := 0 | ||||
| 			for ; j < i+3 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ { | ||||
| 				if b[j] <= '9' { | ||||
| 					c = c<<4 + int(b[j]-'0') | ||||
| 				} else if b[j] <= 'F' { | ||||
| 					c = c<<4 + int(b[j]-'A') + 10 | ||||
| 				} else if b[j] <= 'f' { | ||||
| 					c = c<<4 + int(b[j]-'a') + 10 | ||||
| 				} | ||||
| 			} | ||||
| 			if j == i+3 && c < 128 { | ||||
| 				b[i] = byte(c) | ||||
| 				b = append(b[:i+1], b[i+3:]...) | ||||
| 			} | ||||
| 		} else if b[i] == '+' { | ||||
| 			b[i] = ' ' | ||||
| 		} | ||||
| 	} | ||||
| 	return b | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue