mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-30 17:22:26 -05:00 
			
		
		
		
	add git.iim.gay/grufwub/go-store for storage backend, replacing blob.Storage
Signed-off-by: kim (grufwub) <grufwub@gmail.com>
This commit is contained in:
		
					parent
					
						
							
								ab32ce642b
							
						
					
				
			
			
				commit
				
					
						e43a46e982
					
				
			
		
					 89 changed files with 9372 additions and 240 deletions
				
			
		
							
								
								
									
										16
									
								
								vendor/github.com/golang/snappy/.gitignore
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/golang/snappy/.gitignore
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| cmd/snappytool/snappytool | ||||
| testdata/bench | ||||
| 
 | ||||
| # These explicitly listed benchmark data files are for an obsolete version of | ||||
| # snappy_test.go. | ||||
| testdata/alice29.txt | ||||
| testdata/asyoulik.txt | ||||
| testdata/fireworks.jpeg | ||||
| testdata/geo.protodata | ||||
| testdata/html | ||||
| testdata/html_x_4 | ||||
| testdata/kppkn.gtb | ||||
| testdata/lcet10.txt | ||||
| testdata/paper-100k.pdf | ||||
| testdata/plrabn12.txt | ||||
| testdata/urls.10K | ||||
							
								
								
									
										17
									
								
								vendor/github.com/golang/snappy/AUTHORS
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								vendor/github.com/golang/snappy/AUTHORS
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| # This is the official list of Snappy-Go authors for copyright purposes. | ||||
| # This file is distinct from the CONTRIBUTORS files. | ||||
| # See the latter for an explanation. | ||||
| 
 | ||||
| # Names should be added to this file as | ||||
| #	Name or Organization <email address> | ||||
| # The email address is not required for organizations. | ||||
| 
 | ||||
| # Please keep the list sorted. | ||||
| 
 | ||||
| Amazon.com, Inc | ||||
| Damian Gryski <dgryski@gmail.com> | ||||
| Google Inc. | ||||
| Jan Mercl <0xjnml@gmail.com> | ||||
| Klaus Post <klauspost@gmail.com> | ||||
| Rodolfo Carvalho <rhcarvalho@gmail.com> | ||||
| Sebastien Binet <seb.binet@gmail.com> | ||||
							
								
								
									
										39
									
								
								vendor/github.com/golang/snappy/CONTRIBUTORS
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								vendor/github.com/golang/snappy/CONTRIBUTORS
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,39 @@ | |||
| # This is the official list of people who can contribute | ||||
| # (and typically have contributed) code to the Snappy-Go repository. | ||||
| # The AUTHORS file lists the copyright holders; this file | ||||
| # lists people.  For example, Google employees are listed here | ||||
| # but not in AUTHORS, because Google holds the copyright. | ||||
| # | ||||
| # The submission process automatically checks to make sure | ||||
| # that people submitting code are listed in this file (by email address). | ||||
| # | ||||
| # Names should be added to this file only after verifying that | ||||
| # the individual or the individual's organization has agreed to | ||||
| # the appropriate Contributor License Agreement, found here: | ||||
| # | ||||
| #     http://code.google.com/legal/individual-cla-v1.0.html | ||||
| #     http://code.google.com/legal/corporate-cla-v1.0.html | ||||
| # | ||||
| # The agreement for individuals can be filled out on the web. | ||||
| # | ||||
| # When adding J Random Contributor's name to this file, | ||||
| # either J's name or J's organization's name should be | ||||
| # added to the AUTHORS file, depending on whether the | ||||
| # individual or corporate CLA was used. | ||||
| 
 | ||||
| # Names should be added to this file like so: | ||||
| #     Name <email address> | ||||
| 
 | ||||
| # Please keep the list sorted. | ||||
| 
 | ||||
| Damian Gryski <dgryski@gmail.com> | ||||
| Jan Mercl <0xjnml@gmail.com> | ||||
| Jonathan Swinney <jswinney@amazon.com> | ||||
| Kai Backman <kaib@golang.org> | ||||
| Klaus Post <klauspost@gmail.com> | ||||
| Marc-Antoine Ruel <maruel@chromium.org> | ||||
| Nigel Tao <nigeltao@golang.org> | ||||
| Rob Pike <r@golang.org> | ||||
| Rodolfo Carvalho <rhcarvalho@gmail.com> | ||||
| Russ Cox <rsc@golang.org> | ||||
| Sebastien Binet <seb.binet@gmail.com> | ||||
							
								
								
									
										27
									
								
								vendor/github.com/golang/snappy/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								vendor/github.com/golang/snappy/LICENSE
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,27 @@ | |||
| Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. | ||||
| 
 | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
| 
 | ||||
|    * Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
|    * Redistributions in binary form must reproduce the above | ||||
| copyright notice, this list of conditions and the following disclaimer | ||||
| in the documentation and/or other materials provided with the | ||||
| distribution. | ||||
|    * Neither the name of Google Inc. nor the names of its | ||||
| contributors may be used to endorse or promote products derived from | ||||
| this software without specific prior written permission. | ||||
| 
 | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
							
								
								
									
										107
									
								
								vendor/github.com/golang/snappy/README
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								vendor/github.com/golang/snappy/README
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,107 @@ | |||
| The Snappy compression format in the Go programming language. | ||||
| 
 | ||||
| To download and install from source: | ||||
| $ go get github.com/golang/snappy | ||||
| 
 | ||||
| Unless otherwise noted, the Snappy-Go source files are distributed | ||||
| under the BSD-style license found in the LICENSE file. | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| Benchmarks. | ||||
| 
 | ||||
| The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten | ||||
| or so files, the same set used by the C++ Snappy code (github.com/google/snappy | ||||
| and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ | ||||
| 3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: | ||||
| 
 | ||||
| "go test -test.bench=." | ||||
| 
 | ||||
| _UFlat0-8         2.19GB/s ± 0%  html | ||||
| _UFlat1-8         1.41GB/s ± 0%  urls | ||||
| _UFlat2-8         23.5GB/s ± 2%  jpg | ||||
| _UFlat3-8         1.91GB/s ± 0%  jpg_200 | ||||
| _UFlat4-8         14.0GB/s ± 1%  pdf | ||||
| _UFlat5-8         1.97GB/s ± 0%  html4 | ||||
| _UFlat6-8          814MB/s ± 0%  txt1 | ||||
| _UFlat7-8          785MB/s ± 0%  txt2 | ||||
| _UFlat8-8          857MB/s ± 0%  txt3 | ||||
| _UFlat9-8          719MB/s ± 1%  txt4 | ||||
| _UFlat10-8        2.84GB/s ± 0%  pb | ||||
| _UFlat11-8        1.05GB/s ± 0%  gaviota | ||||
| 
 | ||||
| _ZFlat0-8         1.04GB/s ± 0%  html | ||||
| _ZFlat1-8          534MB/s ± 0%  urls | ||||
| _ZFlat2-8         15.7GB/s ± 1%  jpg | ||||
| _ZFlat3-8          740MB/s ± 3%  jpg_200 | ||||
| _ZFlat4-8         9.20GB/s ± 1%  pdf | ||||
| _ZFlat5-8          991MB/s ± 0%  html4 | ||||
| _ZFlat6-8          379MB/s ± 0%  txt1 | ||||
| _ZFlat7-8          352MB/s ± 0%  txt2 | ||||
| _ZFlat8-8          396MB/s ± 1%  txt3 | ||||
| _ZFlat9-8          327MB/s ± 1%  txt4 | ||||
| _ZFlat10-8        1.33GB/s ± 1%  pb | ||||
| _ZFlat11-8         605MB/s ± 1%  gaviota | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| "go test -test.bench=. -tags=noasm" | ||||
| 
 | ||||
| _UFlat0-8          621MB/s ± 2%  html | ||||
| _UFlat1-8          494MB/s ± 1%  urls | ||||
| _UFlat2-8         23.2GB/s ± 1%  jpg | ||||
| _UFlat3-8         1.12GB/s ± 1%  jpg_200 | ||||
| _UFlat4-8         4.35GB/s ± 1%  pdf | ||||
| _UFlat5-8          609MB/s ± 0%  html4 | ||||
| _UFlat6-8          296MB/s ± 0%  txt1 | ||||
| _UFlat7-8          288MB/s ± 0%  txt2 | ||||
| _UFlat8-8          309MB/s ± 1%  txt3 | ||||
| _UFlat9-8          280MB/s ± 1%  txt4 | ||||
| _UFlat10-8         753MB/s ± 0%  pb | ||||
| _UFlat11-8         400MB/s ± 0%  gaviota | ||||
| 
 | ||||
| _ZFlat0-8          409MB/s ± 1%  html | ||||
| _ZFlat1-8          250MB/s ± 1%  urls | ||||
| _ZFlat2-8         12.3GB/s ± 1%  jpg | ||||
| _ZFlat3-8          132MB/s ± 0%  jpg_200 | ||||
| _ZFlat4-8         2.92GB/s ± 0%  pdf | ||||
| _ZFlat5-8          405MB/s ± 1%  html4 | ||||
| _ZFlat6-8          179MB/s ± 1%  txt1 | ||||
| _ZFlat7-8          170MB/s ± 1%  txt2 | ||||
| _ZFlat8-8          189MB/s ± 1%  txt3 | ||||
| _ZFlat9-8          164MB/s ± 1%  txt4 | ||||
| _ZFlat10-8         479MB/s ± 1%  pb | ||||
| _ZFlat11-8         270MB/s ± 1%  gaviota | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| For comparison (Go's encoded output is byte-for-byte identical to C++'s), here | ||||
| are the numbers from C++ Snappy's | ||||
| 
 | ||||
| make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log | ||||
| 
 | ||||
| BM_UFlat/0     2.4GB/s  html | ||||
| BM_UFlat/1     1.4GB/s  urls | ||||
| BM_UFlat/2    21.8GB/s  jpg | ||||
| BM_UFlat/3     1.5GB/s  jpg_200 | ||||
| BM_UFlat/4    13.3GB/s  pdf | ||||
| BM_UFlat/5     2.1GB/s  html4 | ||||
| BM_UFlat/6     1.0GB/s  txt1 | ||||
| BM_UFlat/7   959.4MB/s  txt2 | ||||
| BM_UFlat/8     1.0GB/s  txt3 | ||||
| BM_UFlat/9   864.5MB/s  txt4 | ||||
| BM_UFlat/10    2.9GB/s  pb | ||||
| BM_UFlat/11    1.2GB/s  gaviota | ||||
| 
 | ||||
| BM_ZFlat/0   944.3MB/s  html (22.31 %) | ||||
| BM_ZFlat/1   501.6MB/s  urls (47.78 %) | ||||
| BM_ZFlat/2    14.3GB/s  jpg (99.95 %) | ||||
| BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %) | ||||
| BM_ZFlat/4     8.3GB/s  pdf (83.30 %) | ||||
| BM_ZFlat/5   903.5MB/s  html4 (22.52 %) | ||||
| BM_ZFlat/6   336.0MB/s  txt1 (57.88 %) | ||||
| BM_ZFlat/7   312.3MB/s  txt2 (61.91 %) | ||||
| BM_ZFlat/8   353.1MB/s  txt3 (54.99 %) | ||||
| BM_ZFlat/9   289.9MB/s  txt4 (66.26 %) | ||||
| BM_ZFlat/10    1.2GB/s  pb (19.68 %) | ||||
| BM_ZFlat/11  527.4MB/s  gaviota (37.72 %) | ||||
							
								
								
									
										241
									
								
								vendor/github.com/golang/snappy/decode.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								vendor/github.com/golang/snappy/decode.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,241 @@ | |||
| // Copyright 2011 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	// ErrCorrupt reports that the input is invalid. | ||||
| 	ErrCorrupt = errors.New("snappy: corrupt input") | ||||
| 	// ErrTooLarge reports that the uncompressed length is too large. | ||||
| 	ErrTooLarge = errors.New("snappy: decoded block is too large") | ||||
| 	// ErrUnsupported reports that the input isn't supported. | ||||
| 	ErrUnsupported = errors.New("snappy: unsupported input") | ||||
| 
 | ||||
| 	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") | ||||
| ) | ||||
| 
 | ||||
| // DecodedLen returns the length of the decoded block. | ||||
| func DecodedLen(src []byte) (int, error) { | ||||
| 	v, _, err := decodedLen(src) | ||||
| 	return v, err | ||||
| } | ||||
| 
 | ||||
| // decodedLen returns the length of the decoded block and the number of bytes | ||||
| // that the length header occupied. | ||||
| func decodedLen(src []byte) (blockLen, headerLen int, err error) { | ||||
| 	v, n := binary.Uvarint(src) | ||||
| 	if n <= 0 || v > 0xffffffff { | ||||
| 		return 0, 0, ErrCorrupt | ||||
| 	} | ||||
| 
 | ||||
| 	const wordSize = 32 << (^uint(0) >> 32 & 1) | ||||
| 	if wordSize == 32 && v > 0x7fffffff { | ||||
| 		return 0, 0, ErrTooLarge | ||||
| 	} | ||||
| 	return int(v), n, nil | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	decodeErrCodeCorrupt                  = 1 | ||||
| 	decodeErrCodeUnsupportedLiteralLength = 2 | ||||
| ) | ||||
| 
 | ||||
| // Decode returns the decoded form of src. The returned slice may be a sub- | ||||
| // slice of dst if dst was large enough to hold the entire decoded block. | ||||
| // Otherwise, a newly allocated slice will be returned. | ||||
| // | ||||
| // The dst and src must not overlap. It is valid to pass a nil dst. | ||||
| // | ||||
| // Decode handles the Snappy block format, not the Snappy stream format. | ||||
| func Decode(dst, src []byte) ([]byte, error) { | ||||
| 	dLen, s, err := decodedLen(src) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if dLen <= len(dst) { | ||||
| 		dst = dst[:dLen] | ||||
| 	} else { | ||||
| 		dst = make([]byte, dLen) | ||||
| 	} | ||||
| 	switch decode(dst, src[s:]) { | ||||
| 	case 0: | ||||
| 		return dst, nil | ||||
| 	case decodeErrCodeUnsupportedLiteralLength: | ||||
| 		return nil, errUnsupportedLiteralLength | ||||
| 	} | ||||
| 	return nil, ErrCorrupt | ||||
| } | ||||
| 
 | ||||
| // NewReader returns a new Reader that decompresses from r, using the framing | ||||
| // format described at | ||||
| // https://github.com/google/snappy/blob/master/framing_format.txt | ||||
| func NewReader(r io.Reader) *Reader { | ||||
| 	return &Reader{ | ||||
| 		r:       r, | ||||
| 		decoded: make([]byte, maxBlockSize), | ||||
| 		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Reader is an io.Reader that can read Snappy-compressed bytes. | ||||
| // | ||||
| // Reader handles the Snappy stream format, not the Snappy block format. | ||||
| type Reader struct { | ||||
| 	r       io.Reader | ||||
| 	err     error | ||||
| 	decoded []byte | ||||
| 	buf     []byte | ||||
| 	// decoded[i:j] contains decoded bytes that have not yet been passed on. | ||||
| 	i, j       int | ||||
| 	readHeader bool | ||||
| } | ||||
| 
 | ||||
| // Reset discards any buffered data, resets all state, and switches the Snappy | ||||
| // reader to read from r. This permits reusing a Reader rather than allocating | ||||
| // a new one. | ||||
| func (r *Reader) Reset(reader io.Reader) { | ||||
| 	r.r = reader | ||||
| 	r.err = nil | ||||
| 	r.i = 0 | ||||
| 	r.j = 0 | ||||
| 	r.readHeader = false | ||||
| } | ||||
| 
 | ||||
| func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { | ||||
| 	if _, r.err = io.ReadFull(r.r, p); r.err != nil { | ||||
| 		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { | ||||
| 			r.err = ErrCorrupt | ||||
| 		} | ||||
| 		return false | ||||
| 	} | ||||
| 	return true | ||||
| } | ||||
| 
 | ||||
| // Read satisfies the io.Reader interface. | ||||
| func (r *Reader) Read(p []byte) (int, error) { | ||||
| 	if r.err != nil { | ||||
| 		return 0, r.err | ||||
| 	} | ||||
| 	for { | ||||
| 		if r.i < r.j { | ||||
| 			n := copy(p, r.decoded[r.i:r.j]) | ||||
| 			r.i += n | ||||
| 			return n, nil | ||||
| 		} | ||||
| 		if !r.readFull(r.buf[:4], true) { | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 		chunkType := r.buf[0] | ||||
| 		if !r.readHeader { | ||||
| 			if chunkType != chunkTypeStreamIdentifier { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			r.readHeader = true | ||||
| 		} | ||||
| 		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 | ||||
| 		if chunkLen > len(r.buf) { | ||||
| 			r.err = ErrUnsupported | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 
 | ||||
| 		// The chunk types are specified at | ||||
| 		// https://github.com/google/snappy/blob/master/framing_format.txt | ||||
| 		switch chunkType { | ||||
| 		case chunkTypeCompressedData: | ||||
| 			// Section 4.2. Compressed data (chunk type 0x00). | ||||
| 			if chunkLen < checksumSize { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			buf := r.buf[:chunkLen] | ||||
| 			if !r.readFull(buf, false) { | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 | ||||
| 			buf = buf[checksumSize:] | ||||
| 
 | ||||
| 			n, err := DecodedLen(buf) | ||||
| 			if err != nil { | ||||
| 				r.err = err | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if n > len(r.decoded) { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if _, err := Decode(r.decoded, buf); err != nil { | ||||
| 				r.err = err | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if crc(r.decoded[:n]) != checksum { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			r.i, r.j = 0, n | ||||
| 			continue | ||||
| 
 | ||||
| 		case chunkTypeUncompressedData: | ||||
| 			// Section 4.3. Uncompressed data (chunk type 0x01). | ||||
| 			if chunkLen < checksumSize { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			buf := r.buf[:checksumSize] | ||||
| 			if !r.readFull(buf, false) { | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 | ||||
| 			// Read directly into r.decoded instead of via r.buf. | ||||
| 			n := chunkLen - checksumSize | ||||
| 			if n > len(r.decoded) { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if !r.readFull(r.decoded[:n], false) { | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if crc(r.decoded[:n]) != checksum { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			r.i, r.j = 0, n | ||||
| 			continue | ||||
| 
 | ||||
| 		case chunkTypeStreamIdentifier: | ||||
| 			// Section 4.1. Stream identifier (chunk type 0xff). | ||||
| 			if chunkLen != len(magicBody) { | ||||
| 				r.err = ErrCorrupt | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			if !r.readFull(r.buf[:len(magicBody)], false) { | ||||
| 				return 0, r.err | ||||
| 			} | ||||
| 			for i := 0; i < len(magicBody); i++ { | ||||
| 				if r.buf[i] != magicBody[i] { | ||||
| 					r.err = ErrCorrupt | ||||
| 					return 0, r.err | ||||
| 				} | ||||
| 			} | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if chunkType <= 0x7f { | ||||
| 			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). | ||||
| 			r.err = ErrUnsupported | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 		// Section 4.4 Padding (chunk type 0xfe). | ||||
| 		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). | ||||
| 		if !r.readFull(r.buf[:chunkLen], false) { | ||||
| 			return 0, r.err | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										490
									
								
								vendor/github.com/golang/snappy/decode_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										490
									
								
								vendor/github.com/golang/snappy/decode_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,490 @@ | |||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // The asm code generally follows the pure Go code in decode_other.go, except | ||||
| // where marked with a "!!!". | ||||
| 
 | ||||
| // func decode(dst, src []byte) int | ||||
| // | ||||
| // All local variables fit into registers. The non-zero stack size is only to | ||||
| // spill registers and push args when issuing a CALL. The register allocation: | ||||
| //	- AX	scratch | ||||
| //	- BX	scratch | ||||
| //	- CX	length or x | ||||
| //	- DX	offset | ||||
| //	- SI	&src[s] | ||||
| //	- DI	&dst[d] | ||||
| //	+ R8	dst_base | ||||
| //	+ R9	dst_len | ||||
| //	+ R10	dst_base + dst_len | ||||
| //	+ R11	src_base | ||||
| //	+ R12	src_len | ||||
| //	+ R13	src_base + src_len | ||||
| //	- R14	used by doCopy | ||||
| //	- R15	used by doCopy | ||||
| // | ||||
| // The registers R8-R13 (marked with a "+") are set at the start of the | ||||
| // function, and after a CALL returns, and are not otherwise modified. | ||||
| // | ||||
| // The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI. | ||||
| // The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. | ||||
| TEXT ·decode(SB), NOSPLIT, $48-56 | ||||
| 	// Initialize SI, DI and R8-R13. | ||||
| 	MOVQ dst_base+0(FP), R8 | ||||
| 	MOVQ dst_len+8(FP), R9 | ||||
| 	MOVQ R8, DI | ||||
| 	MOVQ R8, R10 | ||||
| 	ADDQ R9, R10 | ||||
| 	MOVQ src_base+24(FP), R11 | ||||
| 	MOVQ src_len+32(FP), R12 | ||||
| 	MOVQ R11, SI | ||||
| 	MOVQ R11, R13 | ||||
| 	ADDQ R12, R13 | ||||
| 
 | ||||
| loop: | ||||
| 	// for s < len(src) | ||||
| 	CMPQ SI, R13 | ||||
| 	JEQ  end | ||||
| 
 | ||||
| 	// CX = uint32(src[s]) | ||||
| 	// | ||||
| 	// switch src[s] & 0x03 | ||||
| 	MOVBLZX (SI), CX | ||||
| 	MOVL    CX, BX | ||||
| 	ANDL    $3, BX | ||||
| 	CMPL    BX, $1 | ||||
| 	JAE     tagCopy | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// The code below handles literal tags. | ||||
| 
 | ||||
| 	// case tagLiteral: | ||||
| 	// x := uint32(src[s] >> 2) | ||||
| 	// switch | ||||
| 	SHRL $2, CX | ||||
| 	CMPL CX, $60 | ||||
| 	JAE  tagLit60Plus | ||||
| 
 | ||||
| 	// case x < 60: | ||||
| 	// s++ | ||||
| 	INCQ SI | ||||
| 
 | ||||
| doLit: | ||||
| 	// This is the end of the inner "switch", when we have a literal tag. | ||||
| 	// | ||||
| 	// We assume that CX == x and x fits in a uint32, where x is the variable | ||||
| 	// used in the pure Go decode_other.go code. | ||||
| 
 | ||||
| 	// length = int(x) + 1 | ||||
| 	// | ||||
| 	// Unlike the pure Go code, we don't need to check if length <= 0 because | ||||
| 	// CX can hold 64 bits, so the increment cannot overflow. | ||||
| 	INCQ CX | ||||
| 
 | ||||
| 	// Prepare to check if copying length bytes will run past the end of dst or | ||||
| 	// src. | ||||
| 	// | ||||
| 	// AX = len(dst) - d | ||||
| 	// BX = len(src) - s | ||||
| 	MOVQ R10, AX | ||||
| 	SUBQ DI, AX | ||||
| 	MOVQ R13, BX | ||||
| 	SUBQ SI, BX | ||||
| 
 | ||||
| 	// !!! Try a faster technique for short (16 or fewer bytes) copies. | ||||
| 	// | ||||
| 	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { | ||||
| 	//   goto callMemmove // Fall back on calling runtime·memmove. | ||||
| 	// } | ||||
| 	// | ||||
| 	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s | ||||
| 	// against 21 instead of 16, because it cannot assume that all of its input | ||||
| 	// is contiguous in memory and so it needs to leave enough source bytes to | ||||
| 	// read the next tag without refilling buffers, but Go's Decode assumes | ||||
| 	// contiguousness (the src argument is a []byte). | ||||
| 	CMPQ CX, $16 | ||||
| 	JGT  callMemmove | ||||
| 	CMPQ AX, $16 | ||||
| 	JLT  callMemmove | ||||
| 	CMPQ BX, $16 | ||||
| 	JLT  callMemmove | ||||
| 
 | ||||
| 	// !!! Implement the copy from src to dst as a 16-byte load and store. | ||||
| 	// (Decode's documentation says that dst and src must not overlap.) | ||||
| 	// | ||||
| 	// This always copies 16 bytes, instead of only length bytes, but that's | ||||
| 	// OK. If the input is a valid Snappy encoding then subsequent iterations | ||||
| 	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a | ||||
| 	// non-nil error), so the overrun will be ignored. | ||||
| 	// | ||||
| 	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or | ||||
| 	// 16-byte loads and stores. This technique probably wouldn't be as | ||||
| 	// effective on architectures that are fussier about alignment. | ||||
| 	MOVOU 0(SI), X0 | ||||
| 	MOVOU X0, 0(DI) | ||||
| 
 | ||||
| 	// d += length | ||||
| 	// s += length | ||||
| 	ADDQ CX, DI | ||||
| 	ADDQ CX, SI | ||||
| 	JMP  loop | ||||
| 
 | ||||
| callMemmove: | ||||
| 	// if length > len(dst)-d || length > len(src)-s { etc } | ||||
| 	CMPQ CX, AX | ||||
| 	JGT  errCorrupt | ||||
| 	CMPQ CX, BX | ||||
| 	JGT  errCorrupt | ||||
| 
 | ||||
| 	// copy(dst[d:], src[s:s+length]) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push | ||||
| 	// DI, SI and CX as arguments. Coincidentally, we also need to spill those | ||||
| 	// three registers to the stack, to save local variables across the CALL. | ||||
| 	MOVQ DI, 0(SP) | ||||
| 	MOVQ SI, 8(SP) | ||||
| 	MOVQ CX, 16(SP) | ||||
| 	MOVQ DI, 24(SP) | ||||
| 	MOVQ SI, 32(SP) | ||||
| 	MOVQ CX, 40(SP) | ||||
| 	CALL runtime·memmove(SB) | ||||
| 
 | ||||
| 	// Restore local variables: unspill registers from the stack and | ||||
| 	// re-calculate R8-R13. | ||||
| 	MOVQ 24(SP), DI | ||||
| 	MOVQ 32(SP), SI | ||||
| 	MOVQ 40(SP), CX | ||||
| 	MOVQ dst_base+0(FP), R8 | ||||
| 	MOVQ dst_len+8(FP), R9 | ||||
| 	MOVQ R8, R10 | ||||
| 	ADDQ R9, R10 | ||||
| 	MOVQ src_base+24(FP), R11 | ||||
| 	MOVQ src_len+32(FP), R12 | ||||
| 	MOVQ R11, R13 | ||||
| 	ADDQ R12, R13 | ||||
| 
 | ||||
| 	// d += length | ||||
| 	// s += length | ||||
| 	ADDQ CX, DI | ||||
| 	ADDQ CX, SI | ||||
| 	JMP  loop | ||||
| 
 | ||||
| tagLit60Plus: | ||||
| 	// !!! This fragment does the | ||||
| 	// | ||||
| 	// s += x - 58; if uint(s) > uint(len(src)) { etc }
 | ||||
| 	// | ||||
| 	// checks. In the asm version, we code it once instead of once per switch case. | ||||
| 	ADDQ CX, SI | ||||
| 	SUBQ $58, SI | ||||
| 	MOVQ SI, BX | ||||
| 	SUBQ R11, BX | ||||
| 	CMPQ BX, R12 | ||||
| 	JA   errCorrupt | ||||
| 
 | ||||
| 	// case x == 60: | ||||
| 	CMPL CX, $61 | ||||
| 	JEQ  tagLit61 | ||||
| 	JA   tagLit62Plus | ||||
| 
 | ||||
| 	// x = uint32(src[s-1]) | ||||
| 	MOVBLZX -1(SI), CX | ||||
| 	JMP     doLit | ||||
| 
 | ||||
| tagLit61: | ||||
| 	// case x == 61: | ||||
| 	// x = uint32(src[s-2]) | uint32(src[s-1])<<8 | ||||
| 	MOVWLZX -2(SI), CX | ||||
| 	JMP     doLit | ||||
| 
 | ||||
| tagLit62Plus: | ||||
| 	CMPL CX, $62 | ||||
| 	JA   tagLit63 | ||||
| 
 | ||||
| 	// case x == 62: | ||||
| 	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 | ||||
| 	MOVWLZX -3(SI), CX | ||||
| 	MOVBLZX -1(SI), BX | ||||
| 	SHLL    $16, BX | ||||
| 	ORL     BX, CX | ||||
| 	JMP     doLit | ||||
| 
 | ||||
| tagLit63: | ||||
| 	// case x == 63: | ||||
| 	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 | ||||
| 	MOVL -4(SI), CX | ||||
| 	JMP  doLit | ||||
| 
 | ||||
| // The code above handles literal tags. | ||||
| // ---------------------------------------- | ||||
| // The code below handles copy tags. | ||||
| 
 | ||||
| tagCopy4: | ||||
| 	// case tagCopy4: | ||||
| 	// s += 5 | ||||
| 	ADDQ $5, SI | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVQ SI, BX | ||||
| 	SUBQ R11, BX | ||||
| 	CMPQ BX, R12 | ||||
| 	JA   errCorrupt | ||||
| 
 | ||||
| 	// length = 1 + int(src[s-5])>>2 | ||||
| 	SHRQ $2, CX | ||||
| 	INCQ CX | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) | ||||
| 	MOVLQZX -4(SI), DX | ||||
| 	JMP     doCopy | ||||
| 
 | ||||
| tagCopy2: | ||||
| 	// case tagCopy2: | ||||
| 	// s += 3 | ||||
| 	ADDQ $3, SI | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVQ SI, BX | ||||
| 	SUBQ R11, BX | ||||
| 	CMPQ BX, R12 | ||||
| 	JA   errCorrupt | ||||
| 
 | ||||
| 	// length = 1 + int(src[s-3])>>2 | ||||
| 	SHRQ $2, CX | ||||
| 	INCQ CX | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) | ||||
| 	MOVWQZX -2(SI), DX | ||||
| 	JMP     doCopy | ||||
| 
 | ||||
| tagCopy: | ||||
| 	// We have a copy tag. We assume that: | ||||
| 	//	- BX == src[s] & 0x03 | ||||
| 	//	- CX == src[s] | ||||
| 	CMPQ BX, $2 | ||||
| 	JEQ  tagCopy2 | ||||
| 	JA   tagCopy4 | ||||
| 
 | ||||
| 	// case tagCopy1: | ||||
| 	// s += 2 | ||||
| 	ADDQ $2, SI | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVQ SI, BX | ||||
| 	SUBQ R11, BX | ||||
| 	CMPQ BX, R12 | ||||
| 	JA   errCorrupt | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) | ||||
| 	MOVQ    CX, DX | ||||
| 	ANDQ    $0xe0, DX | ||||
| 	SHLQ    $3, DX | ||||
| 	MOVBQZX -1(SI), BX | ||||
| 	ORQ     BX, DX | ||||
| 
 | ||||
| 	// length = 4 + int(src[s-2])>>2&0x7 | ||||
| 	SHRQ $2, CX | ||||
| 	ANDQ $7, CX | ||||
| 	ADDQ $4, CX | ||||
| 
 | ||||
| doCopy: | ||||
| 	// This is the end of the outer "switch", when we have a copy tag. | ||||
| 	// | ||||
| 	// We assume that: | ||||
| 	//	- CX == length && CX > 0 | ||||
| 	//	- DX == offset | ||||
| 
 | ||||
| 	// if offset <= 0 { etc } | ||||
| 	CMPQ DX, $0 | ||||
| 	JLE  errCorrupt | ||||
| 
 | ||||
| 	// if d < offset { etc } | ||||
| 	MOVQ DI, BX | ||||
| 	SUBQ R8, BX | ||||
| 	CMPQ BX, DX | ||||
| 	JLT  errCorrupt | ||||
| 
 | ||||
| 	// if length > len(dst)-d { etc } | ||||
| 	MOVQ R10, BX | ||||
| 	SUBQ DI, BX | ||||
| 	CMPQ CX, BX | ||||
| 	JGT  errCorrupt | ||||
| 
 | ||||
| 	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
 | ||||
| 	// | ||||
| 	// Set: | ||||
| 	//	- R14 = len(dst)-d | ||||
| 	//	- R15 = &dst[d-offset] | ||||
| 	MOVQ R10, R14 | ||||
| 	SUBQ DI, R14 | ||||
| 	MOVQ DI, R15 | ||||
| 	SUBQ DX, R15 | ||||
| 
 | ||||
| 	// !!! Try a faster technique for short (16 or fewer bytes) forward copies. | ||||
| 	// | ||||
| 	// First, try using two 8-byte load/stores, similar to the doLit technique | ||||
| 	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is | ||||
| 	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores | ||||
| 	// and not one 16-byte load/store, and the first store has to be before the | ||||
| 	// second load, due to the overlap if offset is in the range [8, 16). | ||||
| 	// | ||||
| 	// if length > 16 || offset < 8 || len(dst)-d < 16 { | ||||
| 	//   goto slowForwardCopy | ||||
| 	// } | ||||
| 	// copy 16 bytes | ||||
| 	// d += length | ||||
| 	CMPQ CX, $16 | ||||
| 	JGT  slowForwardCopy | ||||
| 	CMPQ DX, $8 | ||||
| 	JLT  slowForwardCopy | ||||
| 	CMPQ R14, $16 | ||||
| 	JLT  slowForwardCopy | ||||
| 	MOVQ 0(R15), AX | ||||
| 	MOVQ AX, 0(DI) | ||||
| 	MOVQ 8(R15), BX | ||||
| 	MOVQ BX, 8(DI) | ||||
| 	ADDQ CX, DI | ||||
| 	JMP  loop | ||||
| 
 | ||||
| slowForwardCopy: | ||||
| 	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we | ||||
| 	// can still try 8-byte load stores, provided we can overrun up to 10 extra | ||||
| 	// bytes. As above, the overrun will be fixed up by subsequent iterations | ||||
| 	// of the outermost loop. | ||||
| 	// | ||||
| 	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its | ||||
| 	// commentary says: | ||||
| 	// | ||||
| 	// ---- | ||||
| 	// | ||||
| 	// The main part of this loop is a simple copy of eight bytes at a time | ||||
| 	// until we've copied (at least) the requested amount of bytes.  However, | ||||
| 	// if d and d-offset are less than eight bytes apart (indicating a | ||||
| 	// repeating pattern of length < 8), we first need to expand the pattern in | ||||
| 	// order to get the correct results. For instance, if the buffer looks like | ||||
| 	// this, with the eight-byte <d-offset> and <d> patterns marked as | ||||
| 	// intervals: | ||||
| 	// | ||||
| 	//    abxxxxxxxxxxxx | ||||
| 	//    [------]           d-offset | ||||
| 	//      [------]         d | ||||
| 	// | ||||
| 	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern | ||||
| 	// once, after which we can move <d> two bytes without moving <d-offset>: | ||||
| 	// | ||||
| 	//    ababxxxxxxxxxx | ||||
| 	//    [------]           d-offset | ||||
| 	//        [------]       d | ||||
| 	// | ||||
| 	// and repeat the exercise until the two no longer overlap. | ||||
| 	// | ||||
| 	// This allows us to do very well in the special case of one single byte | ||||
| 	// repeated many times, without taking a big hit for more general cases. | ||||
| 	// | ||||
| 	// The worst case of extra writing past the end of the match occurs when | ||||
| 	// offset == 1 and length == 1; the last copy will read from byte positions
 | ||||
| 	// [0..7] and write to [4..11], whereas it was only supposed to write to | ||||
| 	// position 1. Thus, ten excess bytes. | ||||
| 	// | ||||
| 	// ---- | ||||
| 	// | ||||
| 	// That "10 byte overrun" worst case is confirmed by Go's | ||||
| 	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy | ||||
| 	// and finishSlowForwardCopy algorithm. | ||||
| 	// | ||||
| 	// if length > len(dst)-d-10 { | ||||
| 	//   goto verySlowForwardCopy | ||||
| 	// } | ||||
| 	SUBQ $10, R14 | ||||
| 	CMPQ CX, R14 | ||||
| 	JGT  verySlowForwardCopy | ||||
| 
 | ||||
| makeOffsetAtLeast8: | ||||
| 	// !!! As above, expand the pattern so that offset >= 8 and we can use | ||||
| 	// 8-byte load/stores. | ||||
| 	// | ||||
| 	// for offset < 8 { | ||||
| 	//   copy 8 bytes from dst[d-offset:] to dst[d:] | ||||
| 	//   length -= offset | ||||
| 	//   d      += offset | ||||
| 	//   offset += offset | ||||
| 	//   // The two previous lines together means that d-offset, and therefore | ||||
| 	//   // R15, is unchanged. | ||||
| 	// } | ||||
| 	CMPQ DX, $8 | ||||
| 	JGE  fixUpSlowForwardCopy | ||||
| 	MOVQ (R15), BX | ||||
| 	MOVQ BX, (DI) | ||||
| 	SUBQ DX, CX | ||||
| 	ADDQ DX, DI | ||||
| 	ADDQ DX, DX | ||||
| 	JMP  makeOffsetAtLeast8 | ||||
| 
 | ||||
| fixUpSlowForwardCopy: | ||||
| 	// !!! Add length (which might be negative now) to d (implied by DI being | ||||
| 	// &dst[d]) so that d ends up at the right place when we jump back to the | ||||
| 	// top of the loop. Before we do that, though, we save DI to AX so that, if | ||||
| 	// length is positive, copying the remaining length bytes will write to the | ||||
| 	// right place. | ||||
| 	MOVQ DI, AX | ||||
| 	ADDQ CX, DI | ||||
| 
 | ||||
| finishSlowForwardCopy: | ||||
| 	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative | ||||
| 	// length means that we overrun, but as above, that will be fixed up by | ||||
| 	// subsequent iterations of the outermost loop. | ||||
| 	CMPQ CX, $0 | ||||
| 	JLE  loop | ||||
| 	MOVQ (R15), BX | ||||
| 	MOVQ BX, (AX) | ||||
| 	ADDQ $8, R15 | ||||
| 	ADDQ $8, AX | ||||
| 	SUBQ $8, CX | ||||
| 	JMP  finishSlowForwardCopy | ||||
| 
 | ||||
| verySlowForwardCopy: | ||||
| 	// verySlowForwardCopy is a simple implementation of forward copy. In C | ||||
| 	// parlance, this is a do/while loop instead of a while loop, since we know | ||||
| 	// that length > 0. In Go syntax: | ||||
| 	// | ||||
| 	// for { | ||||
| 	//   dst[d] = dst[d - offset] | ||||
| 	//   d++ | ||||
| 	//   length-- | ||||
| 	//   if length == 0 { | ||||
| 	//     break | ||||
| 	//   } | ||||
| 	// } | ||||
| 	MOVB (R15), BX | ||||
| 	MOVB BX, (DI) | ||||
| 	INCQ R15 | ||||
| 	INCQ DI | ||||
| 	DECQ CX | ||||
| 	JNZ  verySlowForwardCopy | ||||
| 	JMP  loop | ||||
| 
 | ||||
| // The code above handles copy tags. | ||||
| // ---------------------------------------- | ||||
| 
 | ||||
| end: | ||||
| 	// This is the end of the "for s < len(src)". | ||||
| 	// | ||||
| 	// if d != len(dst) { etc } | ||||
| 	CMPQ DI, R10 | ||||
| 	JNE  errCorrupt | ||||
| 
 | ||||
| 	// return 0 | ||||
| 	MOVQ $0, ret+48(FP) | ||||
| 	RET | ||||
| 
 | ||||
| errCorrupt: | ||||
| 	// return decodeErrCodeCorrupt | ||||
| 	MOVQ $1, ret+48(FP) | ||||
| 	RET | ||||
							
								
								
									
										494
									
								
								vendor/github.com/golang/snappy/decode_arm64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										494
									
								
								vendor/github.com/golang/snappy/decode_arm64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,494 @@ | |||
| // Copyright 2020 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // The asm code generally follows the pure Go code in decode_other.go, except | ||||
| // where marked with a "!!!". | ||||
| 
 | ||||
| // func decode(dst, src []byte) int | ||||
| // | ||||
| // All local variables fit into registers. The non-zero stack size is only to | ||||
| // spill registers and push args when issuing a CALL. The register allocation: | ||||
| //	- R2	scratch | ||||
| //	- R3	scratch | ||||
| //	- R4	length or x | ||||
| //	- R5	offset | ||||
| //	- R6	&src[s] | ||||
| //	- R7	&dst[d] | ||||
| //	+ R8	dst_base | ||||
| //	+ R9	dst_len | ||||
| //	+ R10	dst_base + dst_len | ||||
| //	+ R11	src_base | ||||
| //	+ R12	src_len | ||||
| //	+ R13	src_base + src_len | ||||
| //	- R14	used by doCopy | ||||
| //	- R15	used by doCopy | ||||
| // | ||||
| // The registers R8-R13 (marked with a "+") are set at the start of the | ||||
| // function, and after a CALL returns, and are not otherwise modified. | ||||
| // | ||||
| // The d variable is implicitly R7 - R8,  and len(dst)-d is R10 - R7. | ||||
| // The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. | ||||
| TEXT ·decode(SB), NOSPLIT, $56-56 | ||||
| 	// Initialize R6, R7 and R8-R13. | ||||
| 	MOVD dst_base+0(FP), R8 | ||||
| 	MOVD dst_len+8(FP), R9 | ||||
| 	MOVD R8, R7 | ||||
| 	MOVD R8, R10 | ||||
| 	ADD  R9, R10, R10 | ||||
| 	MOVD src_base+24(FP), R11 | ||||
| 	MOVD src_len+32(FP), R12 | ||||
| 	MOVD R11, R6 | ||||
| 	MOVD R11, R13 | ||||
| 	ADD  R12, R13, R13 | ||||
| 
 | ||||
| loop: | ||||
| 	// for s < len(src) | ||||
| 	CMP R13, R6 | ||||
| 	BEQ end | ||||
| 
 | ||||
| 	// R4 = uint32(src[s]) | ||||
| 	// | ||||
| 	// switch src[s] & 0x03 | ||||
| 	MOVBU (R6), R4 | ||||
| 	MOVW  R4, R3 | ||||
| 	ANDW  $3, R3 | ||||
| 	MOVW  $1, R1 | ||||
| 	CMPW  R1, R3 | ||||
| 	BGE   tagCopy | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// The code below handles literal tags. | ||||
| 
 | ||||
| 	// case tagLiteral: | ||||
| 	// x := uint32(src[s] >> 2) | ||||
| 	// switch | ||||
| 	MOVW $60, R1 | ||||
| 	LSRW $2, R4, R4 | ||||
| 	CMPW R4, R1 | ||||
| 	BLS  tagLit60Plus | ||||
| 
 | ||||
| 	// case x < 60: | ||||
| 	// s++ | ||||
| 	ADD $1, R6, R6 | ||||
| 
 | ||||
| doLit: | ||||
| 	// This is the end of the inner "switch", when we have a literal tag. | ||||
| 	// | ||||
| 	// We assume that R4 == x and x fits in a uint32, where x is the variable | ||||
| 	// used in the pure Go decode_other.go code. | ||||
| 
 | ||||
| 	// length = int(x) + 1 | ||||
| 	// | ||||
| 	// Unlike the pure Go code, we don't need to check if length <= 0 because | ||||
| 	// R4 can hold 64 bits, so the increment cannot overflow. | ||||
| 	ADD $1, R4, R4 | ||||
| 
 | ||||
| 	// Prepare to check if copying length bytes will run past the end of dst or | ||||
| 	// src. | ||||
| 	// | ||||
| 	// R2 = len(dst) - d | ||||
| 	// R3 = len(src) - s | ||||
| 	MOVD R10, R2 | ||||
| 	SUB  R7, R2, R2 | ||||
| 	MOVD R13, R3 | ||||
| 	SUB  R6, R3, R3 | ||||
| 
 | ||||
| 	// !!! Try a faster technique for short (16 or fewer bytes) copies. | ||||
| 	// | ||||
| 	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { | ||||
| 	//   goto callMemmove // Fall back on calling runtime·memmove. | ||||
| 	// } | ||||
| 	// | ||||
| 	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s | ||||
| 	// against 21 instead of 16, because it cannot assume that all of its input | ||||
| 	// is contiguous in memory and so it needs to leave enough source bytes to | ||||
| 	// read the next tag without refilling buffers, but Go's Decode assumes | ||||
| 	// contiguousness (the src argument is a []byte). | ||||
| 	CMP $16, R4 | ||||
| 	BGT callMemmove | ||||
| 	CMP $16, R2 | ||||
| 	BLT callMemmove | ||||
| 	CMP $16, R3 | ||||
| 	BLT callMemmove | ||||
| 
 | ||||
| 	// !!! Implement the copy from src to dst as a 16-byte load and store. | ||||
| 	// (Decode's documentation says that dst and src must not overlap.) | ||||
| 	// | ||||
| 	// This always copies 16 bytes, instead of only length bytes, but that's | ||||
| 	// OK. If the input is a valid Snappy encoding then subsequent iterations | ||||
| 	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a | ||||
| 	// non-nil error), so the overrun will be ignored. | ||||
| 	// | ||||
| 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or | ||||
| 	// 16-byte loads and stores. This technique probably wouldn't be as | ||||
| 	// effective on architectures that are fussier about alignment. | ||||
| 	LDP 0(R6), (R14, R15) | ||||
| 	STP (R14, R15), 0(R7) | ||||
| 
 | ||||
| 	// d += length | ||||
| 	// s += length | ||||
| 	ADD R4, R7, R7 | ||||
| 	ADD R4, R6, R6 | ||||
| 	B   loop | ||||
| 
 | ||||
| callMemmove: | ||||
| 	// if length > len(dst)-d || length > len(src)-s { etc } | ||||
| 	CMP R2, R4 | ||||
| 	BGT errCorrupt | ||||
| 	CMP R3, R4 | ||||
| 	BGT errCorrupt | ||||
| 
 | ||||
| 	// copy(dst[d:], src[s:s+length]) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push | ||||
| 	// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those | ||||
| 	// three registers to the stack, to save local variables across the CALL. | ||||
| 	MOVD R7, 8(RSP) | ||||
| 	MOVD R6, 16(RSP) | ||||
| 	MOVD R4, 24(RSP) | ||||
| 	MOVD R7, 32(RSP) | ||||
| 	MOVD R6, 40(RSP) | ||||
| 	MOVD R4, 48(RSP) | ||||
| 	CALL runtime·memmove(SB) | ||||
| 
 | ||||
| 	// Restore local variables: unspill registers from the stack and | ||||
| 	// re-calculate R8-R13. | ||||
| 	MOVD 32(RSP), R7 | ||||
| 	MOVD 40(RSP), R6 | ||||
| 	MOVD 48(RSP), R4 | ||||
| 	MOVD dst_base+0(FP), R8 | ||||
| 	MOVD dst_len+8(FP), R9 | ||||
| 	MOVD R8, R10 | ||||
| 	ADD  R9, R10, R10 | ||||
| 	MOVD src_base+24(FP), R11 | ||||
| 	MOVD src_len+32(FP), R12 | ||||
| 	MOVD R11, R13 | ||||
| 	ADD  R12, R13, R13 | ||||
| 
 | ||||
| 	// d += length | ||||
| 	// s += length | ||||
| 	ADD R4, R7, R7 | ||||
| 	ADD R4, R6, R6 | ||||
| 	B   loop | ||||
| 
 | ||||
| tagLit60Plus: | ||||
| 	// !!! This fragment does the | ||||
| 	// | ||||
| 	// s += x - 58; if uint(s) > uint(len(src)) { etc }
 | ||||
| 	// | ||||
| 	// checks. In the asm version, we code it once instead of once per switch case. | ||||
| 	ADD  R4, R6, R6 | ||||
| 	SUB  $58, R6, R6 | ||||
| 	MOVD R6, R3 | ||||
| 	SUB  R11, R3, R3 | ||||
| 	CMP  R12, R3 | ||||
| 	BGT  errCorrupt | ||||
| 
 | ||||
| 	// case x == 60: | ||||
| 	MOVW $61, R1 | ||||
| 	CMPW R1, R4 | ||||
| 	BEQ  tagLit61 | ||||
| 	BGT  tagLit62Plus | ||||
| 
 | ||||
| 	// x = uint32(src[s-1]) | ||||
| 	MOVBU -1(R6), R4 | ||||
| 	B     doLit | ||||
| 
 | ||||
| tagLit61: | ||||
| 	// case x == 61: | ||||
| 	// x = uint32(src[s-2]) | uint32(src[s-1])<<8 | ||||
| 	MOVHU -2(R6), R4 | ||||
| 	B     doLit | ||||
| 
 | ||||
| tagLit62Plus: | ||||
| 	CMPW $62, R4 | ||||
| 	BHI  tagLit63 | ||||
| 
 | ||||
| 	// case x == 62: | ||||
| 	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 | ||||
| 	MOVHU -3(R6), R4 | ||||
| 	MOVBU -1(R6), R3 | ||||
| 	ORR   R3<<16, R4 | ||||
| 	B     doLit | ||||
| 
 | ||||
| tagLit63: | ||||
| 	// case x == 63: | ||||
| 	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 | ||||
| 	MOVWU -4(R6), R4 | ||||
| 	B     doLit | ||||
| 
 | ||||
| 	// The code above handles literal tags. | ||||
| 	// ---------------------------------------- | ||||
| 	// The code below handles copy tags. | ||||
| 
 | ||||
| tagCopy4: | ||||
| 	// case tagCopy4: | ||||
| 	// s += 5 | ||||
| 	ADD $5, R6, R6 | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVD R6, R3 | ||||
| 	SUB  R11, R3, R3 | ||||
| 	CMP  R12, R3 | ||||
| 	BGT  errCorrupt | ||||
| 
 | ||||
| 	// length = 1 + int(src[s-5])>>2 | ||||
| 	MOVD $1, R1 | ||||
| 	ADD  R4>>2, R1, R4 | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) | ||||
| 	MOVWU -4(R6), R5 | ||||
| 	B     doCopy | ||||
| 
 | ||||
| tagCopy2: | ||||
| 	// case tagCopy2: | ||||
| 	// s += 3 | ||||
| 	ADD $3, R6, R6 | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVD R6, R3 | ||||
| 	SUB  R11, R3, R3 | ||||
| 	CMP  R12, R3 | ||||
| 	BGT  errCorrupt | ||||
| 
 | ||||
| 	// length = 1 + int(src[s-3])>>2 | ||||
| 	MOVD $1, R1 | ||||
| 	ADD  R4>>2, R1, R4 | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) | ||||
| 	MOVHU -2(R6), R5 | ||||
| 	B     doCopy | ||||
| 
 | ||||
| tagCopy: | ||||
| 	// We have a copy tag. We assume that: | ||||
| 	//	- R3 == src[s] & 0x03 | ||||
| 	//	- R4 == src[s] | ||||
| 	CMP $2, R3 | ||||
| 	BEQ tagCopy2 | ||||
| 	BGT tagCopy4 | ||||
| 
 | ||||
| 	// case tagCopy1: | ||||
| 	// s += 2 | ||||
| 	ADD $2, R6, R6 | ||||
| 
 | ||||
| 	// if uint(s) > uint(len(src)) { etc } | ||||
| 	MOVD R6, R3 | ||||
| 	SUB  R11, R3, R3 | ||||
| 	CMP  R12, R3 | ||||
| 	BGT  errCorrupt | ||||
| 
 | ||||
| 	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) | ||||
| 	MOVD  R4, R5 | ||||
| 	AND   $0xe0, R5 | ||||
| 	MOVBU -1(R6), R3 | ||||
| 	ORR   R5<<3, R3, R5 | ||||
| 
 | ||||
| 	// length = 4 + int(src[s-2])>>2&0x7 | ||||
| 	MOVD $7, R1 | ||||
| 	AND  R4>>2, R1, R4 | ||||
| 	ADD  $4, R4, R4 | ||||
| 
 | ||||
| doCopy: | ||||
| 	// This is the end of the outer "switch", when we have a copy tag. | ||||
| 	// | ||||
| 	// We assume that: | ||||
| 	//	- R4 == length && R4 > 0 | ||||
| 	//	- R5 == offset | ||||
| 
 | ||||
| 	// if offset <= 0 { etc } | ||||
| 	MOVD $0, R1 | ||||
| 	CMP  R1, R5 | ||||
| 	BLE  errCorrupt | ||||
| 
 | ||||
| 	// if d < offset { etc } | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R8, R3, R3 | ||||
| 	CMP  R5, R3 | ||||
| 	BLT  errCorrupt | ||||
| 
 | ||||
| 	// if length > len(dst)-d { etc } | ||||
| 	MOVD R10, R3 | ||||
| 	SUB  R7, R3, R3 | ||||
| 	CMP  R3, R4 | ||||
| 	BGT  errCorrupt | ||||
| 
 | ||||
| 	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
 | ||||
| 	// | ||||
| 	// Set: | ||||
| 	//	- R14 = len(dst)-d | ||||
| 	//	- R15 = &dst[d-offset] | ||||
| 	MOVD R10, R14 | ||||
| 	SUB  R7, R14, R14 | ||||
| 	MOVD R7, R15 | ||||
| 	SUB  R5, R15, R15 | ||||
| 
 | ||||
| 	// !!! Try a faster technique for short (16 or fewer bytes) forward copies. | ||||
| 	// | ||||
| 	// First, try using two 8-byte load/stores, similar to the doLit technique | ||||
| 	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is | ||||
| 	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores | ||||
| 	// and not one 16-byte load/store, and the first store has to be before the | ||||
| 	// second load, due to the overlap if offset is in the range [8, 16). | ||||
| 	// | ||||
| 	// if length > 16 || offset < 8 || len(dst)-d < 16 { | ||||
| 	//   goto slowForwardCopy | ||||
| 	// } | ||||
| 	// copy 16 bytes | ||||
| 	// d += length | ||||
| 	CMP  $16, R4 | ||||
| 	BGT  slowForwardCopy | ||||
| 	CMP  $8, R5 | ||||
| 	BLT  slowForwardCopy | ||||
| 	CMP  $16, R14 | ||||
| 	BLT  slowForwardCopy | ||||
| 	MOVD 0(R15), R2 | ||||
| 	MOVD R2, 0(R7) | ||||
| 	MOVD 8(R15), R3 | ||||
| 	MOVD R3, 8(R7) | ||||
| 	ADD  R4, R7, R7 | ||||
| 	B    loop | ||||
| 
 | ||||
| slowForwardCopy: | ||||
| 	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we | ||||
| 	// can still try 8-byte load stores, provided we can overrun up to 10 extra | ||||
| 	// bytes. As above, the overrun will be fixed up by subsequent iterations | ||||
| 	// of the outermost loop. | ||||
| 	// | ||||
| 	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its | ||||
| 	// commentary says: | ||||
| 	// | ||||
| 	// ---- | ||||
| 	// | ||||
| 	// The main part of this loop is a simple copy of eight bytes at a time | ||||
| 	// until we've copied (at least) the requested amount of bytes.  However, | ||||
| 	// if d and d-offset are less than eight bytes apart (indicating a | ||||
| 	// repeating pattern of length < 8), we first need to expand the pattern in | ||||
| 	// order to get the correct results. For instance, if the buffer looks like | ||||
| 	// this, with the eight-byte <d-offset> and <d> patterns marked as | ||||
| 	// intervals: | ||||
| 	// | ||||
| 	//    abxxxxxxxxxxxx | ||||
| 	//    [------]           d-offset | ||||
| 	//      [------]         d | ||||
| 	// | ||||
| 	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern | ||||
| 	// once, after which we can move <d> two bytes without moving <d-offset>: | ||||
| 	// | ||||
| 	//    ababxxxxxxxxxx | ||||
| 	//    [------]           d-offset | ||||
| 	//        [------]       d | ||||
| 	// | ||||
| 	// and repeat the exercise until the two no longer overlap. | ||||
| 	// | ||||
| 	// This allows us to do very well in the special case of one single byte | ||||
| 	// repeated many times, without taking a big hit for more general cases. | ||||
| 	// | ||||
| 	// The worst case of extra writing past the end of the match occurs when | ||||
| 	// offset == 1 and length == 1; the last copy will read from byte positions
 | ||||
| 	// [0..7] and write to [4..11], whereas it was only supposed to write to | ||||
| 	// position 1. Thus, ten excess bytes. | ||||
| 	// | ||||
| 	// ---- | ||||
| 	// | ||||
| 	// That "10 byte overrun" worst case is confirmed by Go's | ||||
| 	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy | ||||
| 	// and finishSlowForwardCopy algorithm. | ||||
| 	// | ||||
| 	// if length > len(dst)-d-10 { | ||||
| 	//   goto verySlowForwardCopy | ||||
| 	// } | ||||
| 	SUB $10, R14, R14 | ||||
| 	CMP R14, R4 | ||||
| 	BGT verySlowForwardCopy | ||||
| 
 | ||||
| makeOffsetAtLeast8: | ||||
| 	// !!! As above, expand the pattern so that offset >= 8 and we can use | ||||
| 	// 8-byte load/stores. | ||||
| 	// | ||||
| 	// for offset < 8 { | ||||
| 	//   copy 8 bytes from dst[d-offset:] to dst[d:] | ||||
| 	//   length -= offset | ||||
| 	//   d      += offset | ||||
| 	//   offset += offset | ||||
| 	//   // The two previous lines together means that d-offset, and therefore | ||||
| 	//   // R15, is unchanged. | ||||
| 	// } | ||||
| 	CMP  $8, R5 | ||||
| 	BGE  fixUpSlowForwardCopy | ||||
| 	MOVD (R15), R3 | ||||
| 	MOVD R3, (R7) | ||||
| 	SUB  R5, R4, R4 | ||||
| 	ADD  R5, R7, R7 | ||||
| 	ADD  R5, R5, R5 | ||||
| 	B    makeOffsetAtLeast8 | ||||
| 
 | ||||
| fixUpSlowForwardCopy: | ||||
| 	// !!! Add length (which might be negative now) to d (implied by R7 being | ||||
| 	// &dst[d]) so that d ends up at the right place when we jump back to the | ||||
| 	// top of the loop. Before we do that, though, we save R7 to R2 so that, if | ||||
| 	// length is positive, copying the remaining length bytes will write to the | ||||
| 	// right place. | ||||
| 	MOVD R7, R2 | ||||
| 	ADD  R4, R7, R7 | ||||
| 
 | ||||
| finishSlowForwardCopy: | ||||
| 	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative | ||||
| 	// length means that we overrun, but as above, that will be fixed up by | ||||
| 	// subsequent iterations of the outermost loop. | ||||
| 	MOVD $0, R1 | ||||
| 	CMP  R1, R4 | ||||
| 	BLE  loop | ||||
| 	MOVD (R15), R3 | ||||
| 	MOVD R3, (R2) | ||||
| 	ADD  $8, R15, R15 | ||||
| 	ADD  $8, R2, R2 | ||||
| 	SUB  $8, R4, R4 | ||||
| 	B    finishSlowForwardCopy | ||||
| 
 | ||||
| verySlowForwardCopy: | ||||
| 	// verySlowForwardCopy is a simple implementation of forward copy. In C | ||||
| 	// parlance, this is a do/while loop instead of a while loop, since we know | ||||
| 	// that length > 0. In Go syntax: | ||||
| 	// | ||||
| 	// for { | ||||
| 	//   dst[d] = dst[d - offset] | ||||
| 	//   d++ | ||||
| 	//   length-- | ||||
| 	//   if length == 0 { | ||||
| 	//     break | ||||
| 	//   } | ||||
| 	// } | ||||
| 	MOVB (R15), R3 | ||||
| 	MOVB R3, (R7) | ||||
| 	ADD  $1, R15, R15 | ||||
| 	ADD  $1, R7, R7 | ||||
| 	SUB  $1, R4, R4 | ||||
| 	CBNZ R4, verySlowForwardCopy | ||||
| 	B    loop | ||||
| 
 | ||||
| 	// The code above handles copy tags. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| end: | ||||
| 	// This is the end of the "for s < len(src)". | ||||
| 	// | ||||
| 	// if d != len(dst) { etc } | ||||
| 	CMP R10, R7 | ||||
| 	BNE errCorrupt | ||||
| 
 | ||||
| 	// return 0 | ||||
| 	MOVD $0, ret+48(FP) | ||||
| 	RET | ||||
| 
 | ||||
| errCorrupt: | ||||
| 	// return decodeErrCodeCorrupt | ||||
| 	MOVD $1, R2 | ||||
| 	MOVD R2, ret+48(FP) | ||||
| 	RET | ||||
							
								
								
									
										15
									
								
								vendor/github.com/golang/snappy/decode_asm.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/golang/snappy/decode_asm.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | |||
| // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| // +build amd64 arm64 | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| // decode has the same semantics as in decode_other.go. | ||||
| // | ||||
| //go:noescape | ||||
| func decode(dst, src []byte) int | ||||
							
								
								
									
										115
									
								
								vendor/github.com/golang/snappy/decode_other.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								vendor/github.com/golang/snappy/decode_other.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,115 @@ | |||
| // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !amd64,!arm64 appengine !gc noasm | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| // decode writes the decoding of src to dst. It assumes that the varint-encoded | ||||
| // length of the decompressed bytes has already been read, and that len(dst) | ||||
| // equals that length. | ||||
| // | ||||
| // It returns 0 on success or a decodeErrCodeXxx error code on failure. | ||||
| func decode(dst, src []byte) int { | ||||
| 	var d, s, offset, length int | ||||
| 	for s < len(src) { | ||||
| 		switch src[s] & 0x03 { | ||||
| 		case tagLiteral: | ||||
| 			x := uint32(src[s] >> 2) | ||||
| 			switch { | ||||
| 			case x < 60: | ||||
| 				s++ | ||||
| 			case x == 60: | ||||
| 				s += 2 | ||||
| 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 					return decodeErrCodeCorrupt | ||||
| 				} | ||||
| 				x = uint32(src[s-1]) | ||||
| 			case x == 61: | ||||
| 				s += 3 | ||||
| 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 					return decodeErrCodeCorrupt | ||||
| 				} | ||||
| 				x = uint32(src[s-2]) | uint32(src[s-1])<<8 | ||||
| 			case x == 62: | ||||
| 				s += 4 | ||||
| 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 					return decodeErrCodeCorrupt | ||||
| 				} | ||||
| 				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 | ||||
| 			case x == 63: | ||||
| 				s += 5 | ||||
| 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 					return decodeErrCodeCorrupt | ||||
| 				} | ||||
| 				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 | ||||
| 			} | ||||
| 			length = int(x) + 1 | ||||
| 			if length <= 0 { | ||||
| 				return decodeErrCodeUnsupportedLiteralLength | ||||
| 			} | ||||
| 			if length > len(dst)-d || length > len(src)-s { | ||||
| 				return decodeErrCodeCorrupt | ||||
| 			} | ||||
| 			copy(dst[d:], src[s:s+length]) | ||||
| 			d += length | ||||
| 			s += length | ||||
| 			continue | ||||
| 
 | ||||
| 		case tagCopy1: | ||||
| 			s += 2 | ||||
| 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 				return decodeErrCodeCorrupt | ||||
| 			} | ||||
| 			length = 4 + int(src[s-2])>>2&0x7 | ||||
| 			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) | ||||
| 
 | ||||
| 		case tagCopy2: | ||||
| 			s += 3 | ||||
| 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 				return decodeErrCodeCorrupt | ||||
| 			} | ||||
| 			length = 1 + int(src[s-3])>>2 | ||||
| 			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) | ||||
| 
 | ||||
| 		case tagCopy4: | ||||
| 			s += 5 | ||||
| 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. | ||||
| 				return decodeErrCodeCorrupt | ||||
| 			} | ||||
| 			length = 1 + int(src[s-5])>>2 | ||||
| 			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) | ||||
| 		} | ||||
| 
 | ||||
| 		if offset <= 0 || d < offset || length > len(dst)-d { | ||||
| 			return decodeErrCodeCorrupt | ||||
| 		} | ||||
| 		// Copy from an earlier sub-slice of dst to a later sub-slice. | ||||
| 		// If no overlap, use the built-in copy: | ||||
| 		if offset >= length { | ||||
| 			copy(dst[d:d+length], dst[d-offset:]) | ||||
| 			d += length | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		// Unlike the built-in copy function, this byte-by-byte copy always runs | ||||
| 		// forwards, even if the slices overlap. Conceptually, this is: | ||||
| 		// | ||||
| 		// d += forwardCopy(dst[d:d+length], dst[d-offset:]) | ||||
| 		// | ||||
| 		// We align the slices into a and b and show the compiler they are the same size. | ||||
| 		// This allows the loop to run without bounds checks. | ||||
| 		a := dst[d : d+length] | ||||
| 		b := dst[d-offset:] | ||||
| 		b = b[:len(a)] | ||||
| 		for i := range a { | ||||
| 			a[i] = b[i] | ||||
| 		} | ||||
| 		d += length | ||||
| 	} | ||||
| 	if d != len(dst) { | ||||
| 		return decodeErrCodeCorrupt | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
							
								
								
									
										289
									
								
								vendor/github.com/golang/snappy/encode.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										289
									
								
								vendor/github.com/golang/snappy/encode.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,289 @@ | |||
| // Copyright 2011 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"io" | ||||
| ) | ||||
| 
 | ||||
| // Encode returns the encoded form of src. The returned slice may be a sub- | ||||
| // slice of dst if dst was large enough to hold the entire encoded block. | ||||
| // Otherwise, a newly allocated slice will be returned. | ||||
| // | ||||
| // The dst and src must not overlap. It is valid to pass a nil dst. | ||||
| // | ||||
| // Encode handles the Snappy block format, not the Snappy stream format. | ||||
| func Encode(dst, src []byte) []byte { | ||||
| 	if n := MaxEncodedLen(len(src)); n < 0 { | ||||
| 		panic(ErrTooLarge) | ||||
| 	} else if len(dst) < n { | ||||
| 		dst = make([]byte, n) | ||||
| 	} | ||||
| 
 | ||||
| 	// The block starts with the varint-encoded length of the decompressed bytes. | ||||
| 	d := binary.PutUvarint(dst, uint64(len(src))) | ||||
| 
 | ||||
| 	for len(src) > 0 { | ||||
| 		p := src | ||||
| 		src = nil | ||||
| 		if len(p) > maxBlockSize { | ||||
| 			p, src = p[:maxBlockSize], p[maxBlockSize:] | ||||
| 		} | ||||
| 		if len(p) < minNonLiteralBlockSize { | ||||
| 			d += emitLiteral(dst[d:], p) | ||||
| 		} else { | ||||
| 			d += encodeBlock(dst[d:], p) | ||||
| 		} | ||||
| 	} | ||||
| 	return dst[:d] | ||||
| } | ||||
| 
 | ||||
| // inputMargin is the minimum number of extra input bytes to keep, inside | ||||
| // encodeBlock's inner loop. On some architectures, this margin lets us | ||||
| // implement a fast path for emitLiteral, where the copy of short (<= 16 byte) | ||||
| // literals can be implemented as a single load to and store from a 16-byte | ||||
| // register. That literal's actual length can be as short as 1 byte, so this | ||||
| // can copy up to 15 bytes too much, but that's OK as subsequent iterations of | ||||
| // the encoding loop will fix up the copy overrun, and this inputMargin ensures | ||||
| // that we don't overrun the dst and src buffers. | ||||
| const inputMargin = 16 - 1 | ||||
| 
 | ||||
| // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that | ||||
| // could be encoded with a copy tag. This is the minimum with respect to the | ||||
| // algorithm used by encodeBlock, not a minimum enforced by the file format. | ||||
| // | ||||
| // The encoded output must start with at least a 1 byte literal, as there are | ||||
| // no previous bytes to copy. A minimal (1 byte) copy after that, generated | ||||
| // from an emitCopy call in encodeBlock's main loop, would require at least | ||||
| // another inputMargin bytes, for the reason above: we want any emitLiteral | ||||
| // calls inside encodeBlock's main loop to use the fast path if possible, which | ||||
| // requires being able to overrun by inputMargin bytes. Thus, | ||||
| // minNonLiteralBlockSize equals 1 + 1 + inputMargin. | ||||
| // | ||||
| // The C++ code doesn't use this exact threshold, but it could, as discussed at | ||||
| // https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion | ||||
| // The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an | ||||
| // optimization. It should not affect the encoded form. This is tested by | ||||
| // TestSameEncodingAsCppShortCopies. | ||||
| const minNonLiteralBlockSize = 1 + 1 + inputMargin | ||||
| 
 | ||||
| // MaxEncodedLen returns the maximum length of a snappy block, given its | ||||
| // uncompressed length. | ||||
| // | ||||
| // It will return a negative value if srcLen is too large to encode. | ||||
| func MaxEncodedLen(srcLen int) int { | ||||
| 	n := uint64(srcLen) | ||||
| 	if n > 0xffffffff { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	// Compressed data can be defined as: | ||||
| 	//    compressed := item* literal* | ||||
| 	//    item       := literal* copy | ||||
| 	// | ||||
| 	// The trailing literal sequence has a space blowup of at most 62/60 | ||||
| 	// since a literal of length 60 needs one tag byte + one extra byte | ||||
| 	// for length information. | ||||
| 	// | ||||
| 	// Item blowup is trickier to measure. Suppose the "copy" op copies | ||||
| 	// 4 bytes of data. Because of a special check in the encoding code, | ||||
| 	// we produce a 4-byte copy only if the offset is < 65536. Therefore | ||||
| 	// the copy op takes 3 bytes to encode, and this type of item leads | ||||
| 	// to at most the 62/60 blowup for representing literals. | ||||
| 	// | ||||
| 	// Suppose the "copy" op copies 5 bytes of data. If the offset is big | ||||
| 	// enough, it will take 5 bytes to encode the copy op. Therefore the | ||||
| 	// worst case here is a one-byte literal followed by a five-byte copy. | ||||
| 	// That is, 6 bytes of input turn into 7 bytes of "compressed" data. | ||||
| 	// | ||||
| 	// This last factor dominates the blowup, so the final estimate is: | ||||
| 	n = 32 + n + n/6 | ||||
| 	if n > 0xffffffff { | ||||
| 		return -1 | ||||
| 	} | ||||
| 	return int(n) | ||||
| } | ||||
| 
 | ||||
| var errClosed = errors.New("snappy: Writer is closed") | ||||
| 
 | ||||
| // NewWriter returns a new Writer that compresses to w. | ||||
| // | ||||
| // The Writer returned does not buffer writes. There is no need to Flush or | ||||
| // Close such a Writer. | ||||
| // | ||||
| // Deprecated: the Writer returned is not suitable for many small writes, only | ||||
| // for few large writes. Use NewBufferedWriter instead, which is efficient | ||||
| // regardless of the frequency and shape of the writes, and remember to Close | ||||
| // that Writer when done. | ||||
| func NewWriter(w io.Writer) *Writer { | ||||
| 	return &Writer{ | ||||
| 		w:    w, | ||||
| 		obuf: make([]byte, obufLen), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // NewBufferedWriter returns a new Writer that compresses to w, using the | ||||
| // framing format described at | ||||
| // https://github.com/google/snappy/blob/master/framing_format.txt | ||||
| // | ||||
| // The Writer returned buffers writes. Users must call Close to guarantee all | ||||
| // data has been forwarded to the underlying io.Writer. They may also call | ||||
| // Flush zero or more times before calling Close. | ||||
| func NewBufferedWriter(w io.Writer) *Writer { | ||||
| 	return &Writer{ | ||||
| 		w:    w, | ||||
| 		ibuf: make([]byte, 0, maxBlockSize), | ||||
| 		obuf: make([]byte, obufLen), | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Writer is an io.Writer that can write Snappy-compressed bytes. | ||||
| // | ||||
| // Writer handles the Snappy stream format, not the Snappy block format. | ||||
| type Writer struct { | ||||
| 	w   io.Writer | ||||
| 	err error | ||||
| 
 | ||||
| 	// ibuf is a buffer for the incoming (uncompressed) bytes. | ||||
| 	// | ||||
| 	// Its use is optional. For backwards compatibility, Writers created by the | ||||
| 	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and | ||||
| 	// therefore do not need to be Flush'ed or Close'd. | ||||
| 	ibuf []byte | ||||
| 
 | ||||
| 	// obuf is a buffer for the outgoing (compressed) bytes. | ||||
| 	obuf []byte | ||||
| 
 | ||||
| 	// wroteStreamHeader is whether we have written the stream header. | ||||
| 	wroteStreamHeader bool | ||||
| } | ||||
| 
 | ||||
| // Reset discards the writer's state and switches the Snappy writer to write to | ||||
| // w. This permits reusing a Writer rather than allocating a new one. | ||||
| func (w *Writer) Reset(writer io.Writer) { | ||||
| 	w.w = writer | ||||
| 	w.err = nil | ||||
| 	if w.ibuf != nil { | ||||
| 		w.ibuf = w.ibuf[:0] | ||||
| 	} | ||||
| 	w.wroteStreamHeader = false | ||||
| } | ||||
| 
 | ||||
| // Write satisfies the io.Writer interface. | ||||
| func (w *Writer) Write(p []byte) (nRet int, errRet error) { | ||||
| 	if w.ibuf == nil { | ||||
| 		// Do not buffer incoming bytes. This does not perform or compress well | ||||
| 		// if the caller of Writer.Write writes many small slices. This | ||||
| 		// behavior is therefore deprecated, but still supported for backwards | ||||
| 		// compatibility with code that doesn't explicitly Flush or Close. | ||||
| 		return w.write(p) | ||||
| 	} | ||||
| 
 | ||||
| 	// The remainder of this method is based on bufio.Writer.Write from the | ||||
| 	// standard library. | ||||
| 
 | ||||
| 	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { | ||||
| 		var n int | ||||
| 		if len(w.ibuf) == 0 { | ||||
| 			// Large write, empty buffer. | ||||
| 			// Write directly from p to avoid copy. | ||||
| 			n, _ = w.write(p) | ||||
| 		} else { | ||||
| 			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) | ||||
| 			w.ibuf = w.ibuf[:len(w.ibuf)+n] | ||||
| 			w.Flush() | ||||
| 		} | ||||
| 		nRet += n | ||||
| 		p = p[n:] | ||||
| 	} | ||||
| 	if w.err != nil { | ||||
| 		return nRet, w.err | ||||
| 	} | ||||
| 	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) | ||||
| 	w.ibuf = w.ibuf[:len(w.ibuf)+n] | ||||
| 	nRet += n | ||||
| 	return nRet, nil | ||||
| } | ||||
| 
 | ||||
| func (w *Writer) write(p []byte) (nRet int, errRet error) { | ||||
| 	if w.err != nil { | ||||
| 		return 0, w.err | ||||
| 	} | ||||
| 	for len(p) > 0 { | ||||
| 		obufStart := len(magicChunk) | ||||
| 		if !w.wroteStreamHeader { | ||||
| 			w.wroteStreamHeader = true | ||||
| 			copy(w.obuf, magicChunk) | ||||
| 			obufStart = 0 | ||||
| 		} | ||||
| 
 | ||||
| 		var uncompressed []byte | ||||
| 		if len(p) > maxBlockSize { | ||||
| 			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] | ||||
| 		} else { | ||||
| 			uncompressed, p = p, nil | ||||
| 		} | ||||
| 		checksum := crc(uncompressed) | ||||
| 
 | ||||
| 		// Compress the buffer, discarding the result if the improvement | ||||
| 		// isn't at least 12.5%. | ||||
| 		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) | ||||
| 		chunkType := uint8(chunkTypeCompressedData) | ||||
| 		chunkLen := 4 + len(compressed) | ||||
| 		obufEnd := obufHeaderLen + len(compressed) | ||||
| 		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { | ||||
| 			chunkType = chunkTypeUncompressedData | ||||
| 			chunkLen = 4 + len(uncompressed) | ||||
| 			obufEnd = obufHeaderLen | ||||
| 		} | ||||
| 
 | ||||
| 		// Fill in the per-chunk header that comes before the body. | ||||
| 		w.obuf[len(magicChunk)+0] = chunkType | ||||
| 		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) | ||||
| 		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) | ||||
| 		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) | ||||
| 		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) | ||||
| 		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) | ||||
| 		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) | ||||
| 		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) | ||||
| 
 | ||||
| 		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { | ||||
| 			w.err = err | ||||
| 			return nRet, err | ||||
| 		} | ||||
| 		if chunkType == chunkTypeUncompressedData { | ||||
| 			if _, err := w.w.Write(uncompressed); err != nil { | ||||
| 				w.err = err | ||||
| 				return nRet, err | ||||
| 			} | ||||
| 		} | ||||
| 		nRet += len(uncompressed) | ||||
| 	} | ||||
| 	return nRet, nil | ||||
| } | ||||
| 
 | ||||
| // Flush flushes the Writer to its underlying io.Writer. | ||||
| func (w *Writer) Flush() error { | ||||
| 	if w.err != nil { | ||||
| 		return w.err | ||||
| 	} | ||||
| 	if len(w.ibuf) == 0 { | ||||
| 		return nil | ||||
| 	} | ||||
| 	w.write(w.ibuf) | ||||
| 	w.ibuf = w.ibuf[:0] | ||||
| 	return w.err | ||||
| } | ||||
| 
 | ||||
| // Close calls Flush and then closes the Writer. | ||||
| func (w *Writer) Close() error { | ||||
| 	w.Flush() | ||||
| 	ret := w.err | ||||
| 	if w.err == nil { | ||||
| 		w.err = errClosed | ||||
| 	} | ||||
| 	return ret | ||||
| } | ||||
							
								
								
									
										730
									
								
								vendor/github.com/golang/snappy/encode_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										730
									
								
								vendor/github.com/golang/snappy/encode_amd64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,730 @@ | |||
| // Copyright 2016 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a | ||||
| // Go toolchain regression. See https://github.com/golang/go/issues/15426 and | ||||
| // https://github.com/golang/snappy/issues/29 | ||||
| // | ||||
| // As a workaround, the package was built with a known good assembler, and | ||||
| // those instructions were disassembled by "objdump -d" to yield the | ||||
| //	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15 | ||||
| // style comments, in AT&T asm syntax. Note that rsp here is a physical | ||||
| // register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). | ||||
| // The instructions were then encoded as "BYTE $0x.." sequences, which assemble | ||||
| // fine on Go 1.6. | ||||
| 
 | ||||
| // The asm code generally follows the pure Go code in encode_other.go, except | ||||
| // where marked with a "!!!". | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func emitLiteral(dst, lit []byte) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- AX	len(lit) | ||||
| //	- BX	n | ||||
| //	- DX	return value | ||||
| //	- DI	&dst[i] | ||||
| //	- R10	&lit[0] | ||||
| // | ||||
| // The 24 bytes of stack space is to call runtime·memmove. | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R10 for the | ||||
| // source pointer, matches the allocation used at the call site in encodeBlock, | ||||
| // which makes it easier to manually inline this function. | ||||
| TEXT ·emitLiteral(SB), NOSPLIT, $24-56 | ||||
| 	MOVQ dst_base+0(FP), DI | ||||
| 	MOVQ lit_base+24(FP), R10 | ||||
| 	MOVQ lit_len+32(FP), AX | ||||
| 	MOVQ AX, DX | ||||
| 	MOVL AX, BX | ||||
| 	SUBL $1, BX | ||||
| 
 | ||||
| 	CMPL BX, $60 | ||||
| 	JLT  oneByte | ||||
| 	CMPL BX, $256 | ||||
| 	JLT  twoBytes | ||||
| 
 | ||||
| threeBytes: | ||||
| 	MOVB $0xf4, 0(DI) | ||||
| 	MOVW BX, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	ADDQ $3, DX | ||||
| 	JMP  memmove | ||||
| 
 | ||||
| twoBytes: | ||||
| 	MOVB $0xf0, 0(DI) | ||||
| 	MOVB BX, 1(DI) | ||||
| 	ADDQ $2, DI | ||||
| 	ADDQ $2, DX | ||||
| 	JMP  memmove | ||||
| 
 | ||||
| oneByte: | ||||
| 	SHLB $2, BX | ||||
| 	MOVB BX, 0(DI) | ||||
| 	ADDQ $1, DI | ||||
| 	ADDQ $1, DX | ||||
| 
 | ||||
| memmove: | ||||
| 	MOVQ DX, ret+48(FP) | ||||
| 
 | ||||
| 	// copy(dst[i:], lit) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push | ||||
| 	// DI, R10 and AX as arguments. | ||||
| 	MOVQ DI, 0(SP) | ||||
| 	MOVQ R10, 8(SP) | ||||
| 	MOVQ AX, 16(SP) | ||||
| 	CALL runtime·memmove(SB) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func emitCopy(dst []byte, offset, length int) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- AX	length | ||||
| //	- SI	&dst[0] | ||||
| //	- DI	&dst[i] | ||||
| //	- R11	offset | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R11 for the | ||||
| // offset, matches the allocation used at the call site in encodeBlock, which | ||||
| // makes it easier to manually inline this function. | ||||
| TEXT ·emitCopy(SB), NOSPLIT, $0-48 | ||||
| 	MOVQ dst_base+0(FP), DI | ||||
| 	MOVQ DI, SI | ||||
| 	MOVQ offset+24(FP), R11 | ||||
| 	MOVQ length+32(FP), AX | ||||
| 
 | ||||
| loop0: | ||||
| 	// for length >= 68 { etc } | ||||
| 	CMPL AX, $68 | ||||
| 	JLT  step1 | ||||
| 
 | ||||
| 	// Emit a length 64 copy, encoded as 3 bytes. | ||||
| 	MOVB $0xfe, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	SUBL $64, AX | ||||
| 	JMP  loop0 | ||||
| 
 | ||||
| step1: | ||||
| 	// if length > 64 { etc } | ||||
| 	CMPL AX, $64 | ||||
| 	JLE  step2 | ||||
| 
 | ||||
| 	// Emit a length 60 copy, encoded as 3 bytes. | ||||
| 	MOVB $0xee, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	SUBL $60, AX | ||||
| 
 | ||||
| step2: | ||||
| 	// if length >= 12 || offset >= 2048 { goto step3 } | ||||
| 	CMPL AX, $12 | ||||
| 	JGE  step3 | ||||
| 	CMPL R11, $2048 | ||||
| 	JGE  step3 | ||||
| 
 | ||||
| 	// Emit the remaining copy, encoded as 2 bytes. | ||||
| 	MOVB R11, 1(DI) | ||||
| 	SHRL $8, R11 | ||||
| 	SHLB $5, R11 | ||||
| 	SUBB $4, AX | ||||
| 	SHLB $2, AX | ||||
| 	ORB  AX, R11 | ||||
| 	ORB  $1, R11 | ||||
| 	MOVB R11, 0(DI) | ||||
| 	ADDQ $2, DI | ||||
| 
 | ||||
| 	// Return the number of bytes written. | ||||
| 	SUBQ SI, DI | ||||
| 	MOVQ DI, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| step3: | ||||
| 	// Emit the remaining copy, encoded as 3 bytes. | ||||
| 	SUBL $1, AX | ||||
| 	SHLB $2, AX | ||||
| 	ORB  $2, AX | ||||
| 	MOVB AX, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 
 | ||||
| 	// Return the number of bytes written. | ||||
| 	SUBQ SI, DI | ||||
| 	MOVQ DI, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func extendMatch(src []byte, i, j int) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- DX	&src[0] | ||||
| //	- SI	&src[j] | ||||
| //	- R13	&src[len(src) - 8] | ||||
| //	- R14	&src[len(src)] | ||||
| //	- R15	&src[i] | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R15 for a source | ||||
| // pointer, matches the allocation used at the call site in encodeBlock, which | ||||
| // makes it easier to manually inline this function. | ||||
| TEXT ·extendMatch(SB), NOSPLIT, $0-48 | ||||
| 	MOVQ src_base+0(FP), DX | ||||
| 	MOVQ src_len+8(FP), R14 | ||||
| 	MOVQ i+24(FP), R15 | ||||
| 	MOVQ j+32(FP), SI | ||||
| 	ADDQ DX, R14 | ||||
| 	ADDQ DX, R15 | ||||
| 	ADDQ DX, SI | ||||
| 	MOVQ R14, R13 | ||||
| 	SUBQ $8, R13 | ||||
| 
 | ||||
| cmp8: | ||||
| 	// As long as we are 8 or more bytes before the end of src, we can load and | ||||
| 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat. | ||||
| 	CMPQ SI, R13 | ||||
| 	JA   cmp1 | ||||
| 	MOVQ (R15), AX | ||||
| 	MOVQ (SI), BX | ||||
| 	CMPQ AX, BX | ||||
| 	JNE  bsf | ||||
| 	ADDQ $8, R15 | ||||
| 	ADDQ $8, SI | ||||
| 	JMP  cmp8 | ||||
| 
 | ||||
| bsf: | ||||
| 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return | ||||
| 	// the index of the first byte that differs. The BSF instruction finds the | ||||
| 	// least significant 1 bit, the amd64 architecture is little-endian, and | ||||
| 	// the shift by 3 converts a bit index to a byte index. | ||||
| 	XORQ AX, BX | ||||
| 	BSFQ BX, BX | ||||
| 	SHRQ $3, BX | ||||
| 	ADDQ BX, SI | ||||
| 
 | ||||
| 	// Convert from &src[ret] to ret. | ||||
| 	SUBQ DX, SI | ||||
| 	MOVQ SI, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| cmp1: | ||||
| 	// In src's tail, compare 1 byte at a time. | ||||
| 	CMPQ SI, R14 | ||||
| 	JAE  extendMatchEnd | ||||
| 	MOVB (R15), AX | ||||
| 	MOVB (SI), BX | ||||
| 	CMPB AX, BX | ||||
| 	JNE  extendMatchEnd | ||||
| 	ADDQ $1, R15 | ||||
| 	ADDQ $1, SI | ||||
| 	JMP  cmp1 | ||||
| 
 | ||||
| extendMatchEnd: | ||||
| 	// Convert from &src[ret] to ret. | ||||
| 	SUBQ DX, SI | ||||
| 	MOVQ SI, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func encodeBlock(dst, src []byte) (d int) | ||||
| // | ||||
| // All local variables fit into registers, other than "var table". The register | ||||
| // allocation: | ||||
| //	- AX	.	. | ||||
| //	- BX	.	. | ||||
| //	- CX	56	shift (note that amd64 shifts by non-immediates must use CX). | ||||
| //	- DX	64	&src[0], tableSize | ||||
| //	- SI	72	&src[s] | ||||
| //	- DI	80	&dst[d] | ||||
| //	- R9	88	sLimit | ||||
| //	- R10	.	&src[nextEmit] | ||||
| //	- R11	96	prevHash, currHash, nextHash, offset | ||||
| //	- R12	104	&src[base], skip | ||||
| //	- R13	.	&src[nextS], &src[len(src) - 8] | ||||
| //	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x | ||||
| //	- R15	112	candidate | ||||
| // | ||||
| // The second column (56, 64, etc) is the stack offset to spill the registers | ||||
| // when calling other functions. We could pack this slightly tighter, but it's | ||||
| // simpler to have a dedicated spill map independent of the function called. | ||||
| // | ||||
| // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An | ||||
| // extra 56 bytes, to call other functions, and an extra 64 bytes, to spill | ||||
| // local variables (registers) during calls gives 32768 + 56 + 64 = 32888. | ||||
| TEXT ·encodeBlock(SB), 0, $32888-56 | ||||
| 	MOVQ dst_base+0(FP), DI | ||||
| 	MOVQ src_base+24(FP), SI | ||||
| 	MOVQ src_len+32(FP), R14 | ||||
| 
 | ||||
| 	// shift, tableSize := uint32(32-8), 1<<8 | ||||
| 	MOVQ $24, CX | ||||
| 	MOVQ $256, DX | ||||
| 
 | ||||
| calcShift: | ||||
| 	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 | ||||
| 	//	shift-- | ||||
| 	// } | ||||
| 	CMPQ DX, $16384 | ||||
| 	JGE  varTable | ||||
| 	CMPQ DX, R14 | ||||
| 	JGE  varTable | ||||
| 	SUBQ $1, CX | ||||
| 	SHLQ $1, DX | ||||
| 	JMP  calcShift | ||||
| 
 | ||||
| varTable: | ||||
| 	// var table [maxTableSize]uint16 | ||||
| 	// | ||||
| 	// In the asm code, unlike the Go code, we can zero-initialize only the | ||||
| 	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU | ||||
| 	// writes 16 bytes, so we can do only tableSize/8 writes instead of the | ||||
| 	// 2048 writes that would zero-initialize all of table's 32768 bytes. | ||||
| 	SHRQ $3, DX | ||||
| 	LEAQ table-32768(SP), BX | ||||
| 	PXOR X0, X0 | ||||
| 
 | ||||
| memclr: | ||||
| 	MOVOU X0, 0(BX) | ||||
| 	ADDQ  $16, BX | ||||
| 	SUBQ  $1, DX | ||||
| 	JNZ   memclr | ||||
| 
 | ||||
| 	// !!! DX = &src[0] | ||||
| 	MOVQ SI, DX | ||||
| 
 | ||||
| 	// sLimit := len(src) - inputMargin | ||||
| 	MOVQ R14, R9 | ||||
| 	SUBQ $15, R9 | ||||
| 
 | ||||
| 	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't | ||||
| 	// change for the rest of the function. | ||||
| 	MOVQ CX, 56(SP) | ||||
| 	MOVQ DX, 64(SP) | ||||
| 	MOVQ R9, 88(SP) | ||||
| 
 | ||||
| 	// nextEmit := 0 | ||||
| 	MOVQ DX, R10 | ||||
| 
 | ||||
| 	// s := 1 | ||||
| 	ADDQ $1, SI | ||||
| 
 | ||||
| 	// nextHash := hash(load32(src, s), shift) | ||||
| 	MOVL  0(SI), R11 | ||||
| 	IMULL $0x1e35a7bd, R11 | ||||
| 	SHRL  CX, R11 | ||||
| 
 | ||||
| outer: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// skip := 32 | ||||
| 	MOVQ $32, R12 | ||||
| 
 | ||||
| 	// nextS := s | ||||
| 	MOVQ SI, R13 | ||||
| 
 | ||||
| 	// candidate := 0 | ||||
| 	MOVQ $0, R15 | ||||
| 
 | ||||
| inner0: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// s := nextS | ||||
| 	MOVQ R13, SI | ||||
| 
 | ||||
| 	// bytesBetweenHashLookups := skip >> 5 | ||||
| 	MOVQ R12, R14 | ||||
| 	SHRQ $5, R14 | ||||
| 
 | ||||
| 	// nextS = s + bytesBetweenHashLookups | ||||
| 	ADDQ R14, R13 | ||||
| 
 | ||||
| 	// skip += bytesBetweenHashLookups | ||||
| 	ADDQ R14, R12 | ||||
| 
 | ||||
| 	// if nextS > sLimit { goto emitRemainder } | ||||
| 	MOVQ R13, AX | ||||
| 	SUBQ DX, AX | ||||
| 	CMPQ AX, R9 | ||||
| 	JA   emitRemainder | ||||
| 
 | ||||
| 	// candidate = int(table[nextHash]) | ||||
| 	// XXX: MOVWQZX table-32768(SP)(R11*2), R15 | ||||
| 	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15 | ||||
| 	BYTE $0x4e | ||||
| 	BYTE $0x0f | ||||
| 	BYTE $0xb7 | ||||
| 	BYTE $0x7c | ||||
| 	BYTE $0x5c | ||||
| 	BYTE $0x78 | ||||
| 
 | ||||
| 	// table[nextHash] = uint16(s) | ||||
| 	MOVQ SI, AX | ||||
| 	SUBQ DX, AX | ||||
| 
 | ||||
| 	// XXX: MOVW AX, table-32768(SP)(R11*2) | ||||
| 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2) | ||||
| 	BYTE $0x66 | ||||
| 	BYTE $0x42 | ||||
| 	BYTE $0x89 | ||||
| 	BYTE $0x44 | ||||
| 	BYTE $0x5c | ||||
| 	BYTE $0x78 | ||||
| 
 | ||||
| 	// nextHash = hash(load32(src, nextS), shift) | ||||
| 	MOVL  0(R13), R11 | ||||
| 	IMULL $0x1e35a7bd, R11 | ||||
| 	SHRL  CX, R11 | ||||
| 
 | ||||
| 	// if load32(src, s) != load32(src, candidate) { continue } break | ||||
| 	MOVL 0(SI), AX | ||||
| 	MOVL (DX)(R15*1), BX | ||||
| 	CMPL AX, BX | ||||
| 	JNE  inner0 | ||||
| 
 | ||||
| fourByteMatch: | ||||
| 	// As per the encode_other.go code: | ||||
| 	// | ||||
| 	// A 4-byte match has been found. We'll later see etc. | ||||
| 
 | ||||
| 	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment | ||||
| 	// on inputMargin in encode.go. | ||||
| 	MOVQ SI, AX | ||||
| 	SUBQ R10, AX | ||||
| 	CMPQ AX, $16 | ||||
| 	JLE  emitLiteralFastPath | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the emitLiteral call. | ||||
| 	// | ||||
| 	// d += emitLiteral(dst[d:], src[nextEmit:s]) | ||||
| 
 | ||||
| 	MOVL AX, BX | ||||
| 	SUBL $1, BX | ||||
| 
 | ||||
| 	CMPL BX, $60 | ||||
| 	JLT  inlineEmitLiteralOneByte | ||||
| 	CMPL BX, $256 | ||||
| 	JLT  inlineEmitLiteralTwoBytes | ||||
| 
 | ||||
| inlineEmitLiteralThreeBytes: | ||||
| 	MOVB $0xf4, 0(DI) | ||||
| 	MOVW BX, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	JMP  inlineEmitLiteralMemmove | ||||
| 
 | ||||
| inlineEmitLiteralTwoBytes: | ||||
| 	MOVB $0xf0, 0(DI) | ||||
| 	MOVB BX, 1(DI) | ||||
| 	ADDQ $2, DI | ||||
| 	JMP  inlineEmitLiteralMemmove | ||||
| 
 | ||||
| inlineEmitLiteralOneByte: | ||||
| 	SHLB $2, BX | ||||
| 	MOVB BX, 0(DI) | ||||
| 	ADDQ $1, DI | ||||
| 
 | ||||
| inlineEmitLiteralMemmove: | ||||
| 	// Spill local variables (registers) onto the stack; call; unspill.
 | ||||
| 	// | ||||
| 	// copy(dst[i:], lit) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push | ||||
| 	// DI, R10 and AX as arguments. | ||||
| 	MOVQ DI, 0(SP) | ||||
| 	MOVQ R10, 8(SP) | ||||
| 	MOVQ AX, 16(SP) | ||||
| 	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)". | ||||
| 	MOVQ SI, 72(SP) | ||||
| 	MOVQ DI, 80(SP) | ||||
| 	MOVQ R15, 112(SP) | ||||
| 	CALL runtime·memmove(SB) | ||||
| 	MOVQ 56(SP), CX | ||||
| 	MOVQ 64(SP), DX | ||||
| 	MOVQ 72(SP), SI | ||||
| 	MOVQ 80(SP), DI | ||||
| 	MOVQ 88(SP), R9 | ||||
| 	MOVQ 112(SP), R15 | ||||
| 	JMP  inner1 | ||||
| 
 | ||||
| inlineEmitLiteralEnd: | ||||
| 	// End inline of the emitLiteral call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| emitLiteralFastPath: | ||||
| 	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". | ||||
| 	MOVB AX, BX | ||||
| 	SUBB $1, BX | ||||
| 	SHLB $2, BX | ||||
| 	MOVB BX, (DI) | ||||
| 	ADDQ $1, DI | ||||
| 
 | ||||
| 	// !!! Implement the copy from lit to dst as a 16-byte load and store. | ||||
| 	// (Encode's documentation says that dst and src must not overlap.) | ||||
| 	// | ||||
| 	// This always copies 16 bytes, instead of only len(lit) bytes, but that's | ||||
| 	// OK. Subsequent iterations will fix up the overrun. | ||||
| 	// | ||||
| 	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or | ||||
| 	// 16-byte loads and stores. This technique probably wouldn't be as | ||||
| 	// effective on architectures that are fussier about alignment. | ||||
| 	MOVOU 0(R10), X0 | ||||
| 	MOVOU X0, 0(DI) | ||||
| 	ADDQ  AX, DI | ||||
| 
 | ||||
| inner1: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// base := s | ||||
| 	MOVQ SI, R12 | ||||
| 
 | ||||
| 	// !!! offset := base - candidate | ||||
| 	MOVQ R12, R11 | ||||
| 	SUBQ R15, R11 | ||||
| 	SUBQ DX, R11 | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the extendMatch call. | ||||
| 	// | ||||
| 	// s = extendMatch(src, candidate+4, s+4) | ||||
| 
 | ||||
| 	// !!! R14 = &src[len(src)] | ||||
| 	MOVQ src_len+32(FP), R14 | ||||
| 	ADDQ DX, R14 | ||||
| 
 | ||||
| 	// !!! R13 = &src[len(src) - 8] | ||||
| 	MOVQ R14, R13 | ||||
| 	SUBQ $8, R13 | ||||
| 
 | ||||
| 	// !!! R15 = &src[candidate + 4] | ||||
| 	ADDQ $4, R15 | ||||
| 	ADDQ DX, R15 | ||||
| 
 | ||||
| 	// !!! s += 4 | ||||
| 	ADDQ $4, SI | ||||
| 
 | ||||
| inlineExtendMatchCmp8: | ||||
| 	// As long as we are 8 or more bytes before the end of src, we can load and | ||||
| 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat. | ||||
| 	CMPQ SI, R13 | ||||
| 	JA   inlineExtendMatchCmp1 | ||||
| 	MOVQ (R15), AX | ||||
| 	MOVQ (SI), BX | ||||
| 	CMPQ AX, BX | ||||
| 	JNE  inlineExtendMatchBSF | ||||
| 	ADDQ $8, R15 | ||||
| 	ADDQ $8, SI | ||||
| 	JMP  inlineExtendMatchCmp8 | ||||
| 
 | ||||
| inlineExtendMatchBSF: | ||||
| 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return | ||||
| 	// the index of the first byte that differs. The BSF instruction finds the | ||||
| 	// least significant 1 bit, the amd64 architecture is little-endian, and | ||||
| 	// the shift by 3 converts a bit index to a byte index. | ||||
| 	XORQ AX, BX | ||||
| 	BSFQ BX, BX | ||||
| 	SHRQ $3, BX | ||||
| 	ADDQ BX, SI | ||||
| 	JMP  inlineExtendMatchEnd | ||||
| 
 | ||||
| inlineExtendMatchCmp1: | ||||
| 	// In src's tail, compare 1 byte at a time. | ||||
| 	CMPQ SI, R14 | ||||
| 	JAE  inlineExtendMatchEnd | ||||
| 	MOVB (R15), AX | ||||
| 	MOVB (SI), BX | ||||
| 	CMPB AX, BX | ||||
| 	JNE  inlineExtendMatchEnd | ||||
| 	ADDQ $1, R15 | ||||
| 	ADDQ $1, SI | ||||
| 	JMP  inlineExtendMatchCmp1 | ||||
| 
 | ||||
| inlineExtendMatchEnd: | ||||
| 	// End inline of the extendMatch call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the emitCopy call. | ||||
| 	// | ||||
| 	// d += emitCopy(dst[d:], base-candidate, s-base) | ||||
| 
 | ||||
| 	// !!! length := s - base | ||||
| 	MOVQ SI, AX | ||||
| 	SUBQ R12, AX | ||||
| 
 | ||||
| inlineEmitCopyLoop0: | ||||
| 	// for length >= 68 { etc } | ||||
| 	CMPL AX, $68 | ||||
| 	JLT  inlineEmitCopyStep1 | ||||
| 
 | ||||
| 	// Emit a length 64 copy, encoded as 3 bytes. | ||||
| 	MOVB $0xfe, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	SUBL $64, AX | ||||
| 	JMP  inlineEmitCopyLoop0 | ||||
| 
 | ||||
| inlineEmitCopyStep1: | ||||
| 	// if length > 64 { etc } | ||||
| 	CMPL AX, $64 | ||||
| 	JLE  inlineEmitCopyStep2 | ||||
| 
 | ||||
| 	// Emit a length 60 copy, encoded as 3 bytes. | ||||
| 	MOVB $0xee, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 	SUBL $60, AX | ||||
| 
 | ||||
| inlineEmitCopyStep2: | ||||
| 	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } | ||||
| 	CMPL AX, $12 | ||||
| 	JGE  inlineEmitCopyStep3 | ||||
| 	CMPL R11, $2048 | ||||
| 	JGE  inlineEmitCopyStep3 | ||||
| 
 | ||||
| 	// Emit the remaining copy, encoded as 2 bytes. | ||||
| 	MOVB R11, 1(DI) | ||||
| 	SHRL $8, R11 | ||||
| 	SHLB $5, R11 | ||||
| 	SUBB $4, AX | ||||
| 	SHLB $2, AX | ||||
| 	ORB  AX, R11 | ||||
| 	ORB  $1, R11 | ||||
| 	MOVB R11, 0(DI) | ||||
| 	ADDQ $2, DI | ||||
| 	JMP  inlineEmitCopyEnd | ||||
| 
 | ||||
| inlineEmitCopyStep3: | ||||
| 	// Emit the remaining copy, encoded as 3 bytes. | ||||
| 	SUBL $1, AX | ||||
| 	SHLB $2, AX | ||||
| 	ORB  $2, AX | ||||
| 	MOVB AX, 0(DI) | ||||
| 	MOVW R11, 1(DI) | ||||
| 	ADDQ $3, DI | ||||
| 
 | ||||
| inlineEmitCopyEnd: | ||||
| 	// End inline of the emitCopy call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| 	// nextEmit = s | ||||
| 	MOVQ SI, R10 | ||||
| 
 | ||||
| 	// if s >= sLimit { goto emitRemainder } | ||||
| 	MOVQ SI, AX | ||||
| 	SUBQ DX, AX | ||||
| 	CMPQ AX, R9 | ||||
| 	JAE  emitRemainder | ||||
| 
 | ||||
| 	// As per the encode_other.go code: | ||||
| 	// | ||||
| 	// We could immediately etc. | ||||
| 
 | ||||
| 	// x := load64(src, s-1) | ||||
| 	MOVQ -1(SI), R14 | ||||
| 
 | ||||
| 	// prevHash := hash(uint32(x>>0), shift) | ||||
| 	MOVL  R14, R11 | ||||
| 	IMULL $0x1e35a7bd, R11 | ||||
| 	SHRL  CX, R11 | ||||
| 
 | ||||
| 	// table[prevHash] = uint16(s-1) | ||||
| 	MOVQ SI, AX | ||||
| 	SUBQ DX, AX | ||||
| 	SUBQ $1, AX | ||||
| 
 | ||||
| 	// XXX: MOVW AX, table-32768(SP)(R11*2) | ||||
| 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2) | ||||
| 	BYTE $0x66 | ||||
| 	BYTE $0x42 | ||||
| 	BYTE $0x89 | ||||
| 	BYTE $0x44 | ||||
| 	BYTE $0x5c | ||||
| 	BYTE $0x78 | ||||
| 
 | ||||
| 	// currHash := hash(uint32(x>>8), shift) | ||||
| 	SHRQ  $8, R14 | ||||
| 	MOVL  R14, R11 | ||||
| 	IMULL $0x1e35a7bd, R11 | ||||
| 	SHRL  CX, R11 | ||||
| 
 | ||||
| 	// candidate = int(table[currHash]) | ||||
| 	// XXX: MOVWQZX table-32768(SP)(R11*2), R15 | ||||
| 	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15 | ||||
| 	BYTE $0x4e | ||||
| 	BYTE $0x0f | ||||
| 	BYTE $0xb7 | ||||
| 	BYTE $0x7c | ||||
| 	BYTE $0x5c | ||||
| 	BYTE $0x78 | ||||
| 
 | ||||
| 	// table[currHash] = uint16(s) | ||||
| 	ADDQ $1, AX | ||||
| 
 | ||||
| 	// XXX: MOVW AX, table-32768(SP)(R11*2) | ||||
| 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2) | ||||
| 	BYTE $0x66 | ||||
| 	BYTE $0x42 | ||||
| 	BYTE $0x89 | ||||
| 	BYTE $0x44 | ||||
| 	BYTE $0x5c | ||||
| 	BYTE $0x78 | ||||
| 
 | ||||
| 	// if uint32(x>>8) == load32(src, candidate) { continue } | ||||
| 	MOVL (DX)(R15*1), BX | ||||
| 	CMPL R14, BX | ||||
| 	JEQ  inner1 | ||||
| 
 | ||||
| 	// nextHash = hash(uint32(x>>16), shift) | ||||
| 	SHRQ  $8, R14 | ||||
| 	MOVL  R14, R11 | ||||
| 	IMULL $0x1e35a7bd, R11 | ||||
| 	SHRL  CX, R11 | ||||
| 
 | ||||
| 	// s++ | ||||
| 	ADDQ $1, SI | ||||
| 
 | ||||
| 	// break out of the inner1 for loop, i.e. continue the outer loop. | ||||
| 	JMP outer | ||||
| 
 | ||||
| emitRemainder: | ||||
| 	// if nextEmit < len(src) { etc } | ||||
| 	MOVQ src_len+32(FP), AX | ||||
| 	ADDQ DX, AX | ||||
| 	CMPQ R10, AX | ||||
| 	JEQ  encodeBlockEnd | ||||
| 
 | ||||
| 	// d += emitLiteral(dst[d:], src[nextEmit:]) | ||||
| 	// | ||||
| 	// Push args. | ||||
| 	MOVQ DI, 0(SP) | ||||
| 	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative. | ||||
| 	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative. | ||||
| 	MOVQ R10, 24(SP) | ||||
| 	SUBQ R10, AX | ||||
| 	MOVQ AX, 32(SP) | ||||
| 	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative. | ||||
| 
 | ||||
| 	// Spill local variables (registers) onto the stack; call; unspill.
 | ||||
| 	MOVQ DI, 80(SP) | ||||
| 	CALL ·emitLiteral(SB) | ||||
| 	MOVQ 80(SP), DI | ||||
| 
 | ||||
| 	// Finish the "d +=" part of "d += emitLiteral(etc)". | ||||
| 	ADDQ 48(SP), DI | ||||
| 
 | ||||
| encodeBlockEnd: | ||||
| 	MOVQ dst_base+0(FP), AX | ||||
| 	SUBQ AX, DI | ||||
| 	MOVQ DI, d+48(FP) | ||||
| 	RET | ||||
							
								
								
									
										722
									
								
								vendor/github.com/golang/snappy/encode_arm64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										722
									
								
								vendor/github.com/golang/snappy/encode_arm64.s
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,722 @@ | |||
| // Copyright 2020 The Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| 
 | ||||
| #include "textflag.h" | ||||
| 
 | ||||
| // The asm code generally follows the pure Go code in encode_other.go, except | ||||
| // where marked with a "!!!". | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func emitLiteral(dst, lit []byte) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- R3	len(lit) | ||||
| //	- R4	n | ||||
| //	- R6	return value | ||||
| //	- R8	&dst[i] | ||||
| //	- R10	&lit[0] | ||||
| // | ||||
| // The 32 bytes of stack space is to call runtime·memmove. | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R10 for the | ||||
| // source pointer, matches the allocation used at the call site in encodeBlock, | ||||
| // which makes it easier to manually inline this function. | ||||
| TEXT ·emitLiteral(SB), NOSPLIT, $32-56 | ||||
| 	MOVD dst_base+0(FP), R8 | ||||
| 	MOVD lit_base+24(FP), R10 | ||||
| 	MOVD lit_len+32(FP), R3 | ||||
| 	MOVD R3, R6 | ||||
| 	MOVW R3, R4 | ||||
| 	SUBW $1, R4, R4 | ||||
| 
 | ||||
| 	CMPW $60, R4 | ||||
| 	BLT  oneByte | ||||
| 	CMPW $256, R4 | ||||
| 	BLT  twoBytes | ||||
| 
 | ||||
| threeBytes: | ||||
| 	MOVD $0xf4, R2 | ||||
| 	MOVB R2, 0(R8) | ||||
| 	MOVW R4, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	ADD  $3, R6, R6 | ||||
| 	B    memmove | ||||
| 
 | ||||
| twoBytes: | ||||
| 	MOVD $0xf0, R2 | ||||
| 	MOVB R2, 0(R8) | ||||
| 	MOVB R4, 1(R8) | ||||
| 	ADD  $2, R8, R8 | ||||
| 	ADD  $2, R6, R6 | ||||
| 	B    memmove | ||||
| 
 | ||||
| oneByte: | ||||
| 	LSLW $2, R4, R4 | ||||
| 	MOVB R4, 0(R8) | ||||
| 	ADD  $1, R8, R8 | ||||
| 	ADD  $1, R6, R6 | ||||
| 
 | ||||
| memmove: | ||||
| 	MOVD R6, ret+48(FP) | ||||
| 
 | ||||
| 	// copy(dst[i:], lit) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push | ||||
| 	// R8, R10 and R3 as arguments. | ||||
| 	MOVD R8, 8(RSP) | ||||
| 	MOVD R10, 16(RSP) | ||||
| 	MOVD R3, 24(RSP) | ||||
| 	CALL runtime·memmove(SB) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func emitCopy(dst []byte, offset, length int) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- R3	length | ||||
| //	- R7	&dst[0] | ||||
| //	- R8	&dst[i] | ||||
| //	- R11	offset | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R11 for the | ||||
| // offset, matches the allocation used at the call site in encodeBlock, which | ||||
| // makes it easier to manually inline this function. | ||||
| TEXT ·emitCopy(SB), NOSPLIT, $0-48 | ||||
| 	MOVD dst_base+0(FP), R8 | ||||
| 	MOVD R8, R7 | ||||
| 	MOVD offset+24(FP), R11 | ||||
| 	MOVD length+32(FP), R3 | ||||
| 
 | ||||
| loop0: | ||||
| 	// for length >= 68 { etc } | ||||
| 	CMPW $68, R3 | ||||
| 	BLT  step1 | ||||
| 
 | ||||
| 	// Emit a length 64 copy, encoded as 3 bytes. | ||||
| 	MOVD $0xfe, R2 | ||||
| 	MOVB R2, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	SUB  $64, R3, R3 | ||||
| 	B    loop0 | ||||
| 
 | ||||
| step1: | ||||
| 	// if length > 64 { etc } | ||||
| 	CMP $64, R3 | ||||
| 	BLE step2 | ||||
| 
 | ||||
| 	// Emit a length 60 copy, encoded as 3 bytes. | ||||
| 	MOVD $0xee, R2 | ||||
| 	MOVB R2, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	SUB  $60, R3, R3 | ||||
| 
 | ||||
| step2: | ||||
| 	// if length >= 12 || offset >= 2048 { goto step3 } | ||||
| 	CMP  $12, R3 | ||||
| 	BGE  step3 | ||||
| 	CMPW $2048, R11 | ||||
| 	BGE  step3 | ||||
| 
 | ||||
| 	// Emit the remaining copy, encoded as 2 bytes. | ||||
| 	MOVB R11, 1(R8) | ||||
| 	LSRW $3, R11, R11 | ||||
| 	AND  $0xe0, R11, R11 | ||||
| 	SUB  $4, R3, R3 | ||||
| 	LSLW $2, R3 | ||||
| 	AND  $0xff, R3, R3 | ||||
| 	ORRW R3, R11, R11 | ||||
| 	ORRW $1, R11, R11 | ||||
| 	MOVB R11, 0(R8) | ||||
| 	ADD  $2, R8, R8 | ||||
| 
 | ||||
| 	// Return the number of bytes written. | ||||
| 	SUB  R7, R8, R8 | ||||
| 	MOVD R8, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| step3: | ||||
| 	// Emit the remaining copy, encoded as 3 bytes. | ||||
| 	SUB  $1, R3, R3 | ||||
| 	AND  $0xff, R3, R3 | ||||
| 	LSLW $2, R3, R3 | ||||
| 	ORRW $2, R3, R3 | ||||
| 	MOVB R3, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 
 | ||||
| 	// Return the number of bytes written. | ||||
| 	SUB  R7, R8, R8 | ||||
| 	MOVD R8, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func extendMatch(src []byte, i, j int) int | ||||
| // | ||||
| // All local variables fit into registers. The register allocation: | ||||
| //	- R6	&src[0] | ||||
| //	- R7	&src[j] | ||||
| //	- R13	&src[len(src) - 8] | ||||
| //	- R14	&src[len(src)] | ||||
| //	- R15	&src[i] | ||||
| // | ||||
| // The unusual register allocation of local variables, such as R15 for a source | ||||
| // pointer, matches the allocation used at the call site in encodeBlock, which | ||||
| // makes it easier to manually inline this function. | ||||
| TEXT ·extendMatch(SB), NOSPLIT, $0-48 | ||||
| 	MOVD src_base+0(FP), R6 | ||||
| 	MOVD src_len+8(FP), R14 | ||||
| 	MOVD i+24(FP), R15 | ||||
| 	MOVD j+32(FP), R7 | ||||
| 	ADD  R6, R14, R14 | ||||
| 	ADD  R6, R15, R15 | ||||
| 	ADD  R6, R7, R7 | ||||
| 	MOVD R14, R13 | ||||
| 	SUB  $8, R13, R13 | ||||
| 
 | ||||
| cmp8: | ||||
| 	// As long as we are 8 or more bytes before the end of src, we can load and | ||||
| 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat. | ||||
| 	CMP  R13, R7 | ||||
| 	BHI  cmp1 | ||||
| 	MOVD (R15), R3 | ||||
| 	MOVD (R7), R4 | ||||
| 	CMP  R4, R3 | ||||
| 	BNE  bsf | ||||
| 	ADD  $8, R15, R15 | ||||
| 	ADD  $8, R7, R7 | ||||
| 	B    cmp8 | ||||
| 
 | ||||
| bsf: | ||||
| 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return | ||||
| 	// the index of the first byte that differs. | ||||
| 	// RBIT reverses the bit order, then CLZ counts the leading zeros, the | ||||
| 	// combination of which finds the least significant bit which is set. | ||||
| 	// The arm64 architecture is little-endian, and the shift by 3 converts | ||||
| 	// a bit index to a byte index. | ||||
| 	EOR  R3, R4, R4 | ||||
| 	RBIT R4, R4 | ||||
| 	CLZ  R4, R4 | ||||
| 	ADD  R4>>3, R7, R7 | ||||
| 
 | ||||
| 	// Convert from &src[ret] to ret. | ||||
| 	SUB  R6, R7, R7 | ||||
| 	MOVD R7, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| cmp1: | ||||
| 	// In src's tail, compare 1 byte at a time. | ||||
| 	CMP  R7, R14 | ||||
| 	BLS  extendMatchEnd | ||||
| 	MOVB (R15), R3 | ||||
| 	MOVB (R7), R4 | ||||
| 	CMP  R4, R3 | ||||
| 	BNE  extendMatchEnd | ||||
| 	ADD  $1, R15, R15 | ||||
| 	ADD  $1, R7, R7 | ||||
| 	B    cmp1 | ||||
| 
 | ||||
| extendMatchEnd: | ||||
| 	// Convert from &src[ret] to ret. | ||||
| 	SUB  R6, R7, R7 | ||||
| 	MOVD R7, ret+40(FP) | ||||
| 	RET | ||||
| 
 | ||||
| // ---------------------------------------------------------------------------- | ||||
| 
 | ||||
| // func encodeBlock(dst, src []byte) (d int) | ||||
| // | ||||
| // All local variables fit into registers, other than "var table". The register | ||||
| // allocation: | ||||
| //	- R3	.	. | ||||
| //	- R4	.	. | ||||
| //	- R5	64	shift | ||||
| //	- R6	72	&src[0], tableSize | ||||
| //	- R7	80	&src[s] | ||||
| //	- R8	88	&dst[d] | ||||
| //	- R9	96	sLimit | ||||
| //	- R10	.	&src[nextEmit] | ||||
| //	- R11	104	prevHash, currHash, nextHash, offset | ||||
| //	- R12	112	&src[base], skip | ||||
| //	- R13	.	&src[nextS], &src[len(src) - 8] | ||||
| //	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x | ||||
| //	- R15	120	candidate | ||||
| //	- R16	.	hash constant, 0x1e35a7bd | ||||
| //	- R17	.	&table | ||||
| //	- .  	128	table | ||||
| // | ||||
| // The second column (64, 72, etc) is the stack offset to spill the registers | ||||
| // when calling other functions. We could pack this slightly tighter, but it's | ||||
| // simpler to have a dedicated spill map independent of the function called. | ||||
| // | ||||
| // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An | ||||
| // extra 64 bytes, to call other functions, and an extra 64 bytes, to spill | ||||
| // local variables (registers) during calls gives 32768 + 64 + 64 = 32896. | ||||
| TEXT ·encodeBlock(SB), 0, $32896-56 | ||||
| 	MOVD dst_base+0(FP), R8 | ||||
| 	MOVD src_base+24(FP), R7 | ||||
| 	MOVD src_len+32(FP), R14 | ||||
| 
 | ||||
| 	// shift, tableSize := uint32(32-8), 1<<8 | ||||
| 	MOVD  $24, R5 | ||||
| 	MOVD  $256, R6 | ||||
| 	MOVW  $0xa7bd, R16 | ||||
| 	MOVKW $(0x1e35<<16), R16 | ||||
| 
 | ||||
| calcShift: | ||||
| 	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 | ||||
| 	//	shift-- | ||||
| 	// } | ||||
| 	MOVD $16384, R2 | ||||
| 	CMP  R2, R6 | ||||
| 	BGE  varTable | ||||
| 	CMP  R14, R6 | ||||
| 	BGE  varTable | ||||
| 	SUB  $1, R5, R5 | ||||
| 	LSL  $1, R6, R6 | ||||
| 	B    calcShift | ||||
| 
 | ||||
| varTable: | ||||
| 	// var table [maxTableSize]uint16 | ||||
| 	// | ||||
| 	// In the asm code, unlike the Go code, we can zero-initialize only the | ||||
| 	// first tableSize elements. Each uint16 element is 2 bytes and each | ||||
| 	// iterations writes 64 bytes, so we can do only tableSize/32 writes | ||||
| 	// instead of the 2048 writes that would zero-initialize all of table's | ||||
| 	// 32768 bytes. This clear could overrun the first tableSize elements, but | ||||
| 	// it won't overrun the allocated stack size. | ||||
| 	ADD  $128, RSP, R17 | ||||
| 	MOVD R17, R4 | ||||
| 
 | ||||
| 	// !!! R6 = &src[tableSize] | ||||
| 	ADD R6<<1, R17, R6 | ||||
| 
 | ||||
| memclr: | ||||
| 	STP.P (ZR, ZR), 64(R4) | ||||
| 	STP   (ZR, ZR), -48(R4) | ||||
| 	STP   (ZR, ZR), -32(R4) | ||||
| 	STP   (ZR, ZR), -16(R4) | ||||
| 	CMP   R4, R6 | ||||
| 	BHI   memclr | ||||
| 
 | ||||
| 	// !!! R6 = &src[0] | ||||
| 	MOVD R7, R6 | ||||
| 
 | ||||
| 	// sLimit := len(src) - inputMargin | ||||
| 	MOVD R14, R9 | ||||
| 	SUB  $15, R9, R9 | ||||
| 
 | ||||
| 	// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't | ||||
| 	// change for the rest of the function. | ||||
| 	MOVD R5, 64(RSP) | ||||
| 	MOVD R6, 72(RSP) | ||||
| 	MOVD R9, 96(RSP) | ||||
| 
 | ||||
| 	// nextEmit := 0 | ||||
| 	MOVD R6, R10 | ||||
| 
 | ||||
| 	// s := 1 | ||||
| 	ADD $1, R7, R7 | ||||
| 
 | ||||
| 	// nextHash := hash(load32(src, s), shift) | ||||
| 	MOVW 0(R7), R11 | ||||
| 	MULW R16, R11, R11 | ||||
| 	LSRW R5, R11, R11 | ||||
| 
 | ||||
| outer: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// skip := 32 | ||||
| 	MOVD $32, R12 | ||||
| 
 | ||||
| 	// nextS := s | ||||
| 	MOVD R7, R13 | ||||
| 
 | ||||
| 	// candidate := 0 | ||||
| 	MOVD $0, R15 | ||||
| 
 | ||||
| inner0: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// s := nextS | ||||
| 	MOVD R13, R7 | ||||
| 
 | ||||
| 	// bytesBetweenHashLookups := skip >> 5 | ||||
| 	MOVD R12, R14 | ||||
| 	LSR  $5, R14, R14 | ||||
| 
 | ||||
| 	// nextS = s + bytesBetweenHashLookups | ||||
| 	ADD R14, R13, R13 | ||||
| 
 | ||||
| 	// skip += bytesBetweenHashLookups | ||||
| 	ADD R14, R12, R12 | ||||
| 
 | ||||
| 	// if nextS > sLimit { goto emitRemainder } | ||||
| 	MOVD R13, R3 | ||||
| 	SUB  R6, R3, R3 | ||||
| 	CMP  R9, R3 | ||||
| 	BHI  emitRemainder | ||||
| 
 | ||||
| 	// candidate = int(table[nextHash]) | ||||
| 	MOVHU 0(R17)(R11<<1), R15 | ||||
| 
 | ||||
| 	// table[nextHash] = uint16(s) | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R6, R3, R3 | ||||
| 
 | ||||
| 	MOVH R3, 0(R17)(R11<<1) | ||||
| 
 | ||||
| 	// nextHash = hash(load32(src, nextS), shift) | ||||
| 	MOVW 0(R13), R11 | ||||
| 	MULW R16, R11 | ||||
| 	LSRW R5, R11, R11 | ||||
| 
 | ||||
| 	// if load32(src, s) != load32(src, candidate) { continue } break | ||||
| 	MOVW 0(R7), R3 | ||||
| 	MOVW (R6)(R15*1), R4 | ||||
| 	CMPW R4, R3 | ||||
| 	BNE  inner0 | ||||
| 
 | ||||
| fourByteMatch: | ||||
| 	// As per the encode_other.go code: | ||||
| 	// | ||||
| 	// A 4-byte match has been found. We'll later see etc. | ||||
| 
 | ||||
| 	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment | ||||
| 	// on inputMargin in encode.go. | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R10, R3, R3 | ||||
| 	CMP  $16, R3 | ||||
| 	BLE  emitLiteralFastPath | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the emitLiteral call. | ||||
| 	// | ||||
| 	// d += emitLiteral(dst[d:], src[nextEmit:s]) | ||||
| 
 | ||||
| 	MOVW R3, R4 | ||||
| 	SUBW $1, R4, R4 | ||||
| 
 | ||||
| 	MOVW $60, R2 | ||||
| 	CMPW R2, R4 | ||||
| 	BLT  inlineEmitLiteralOneByte | ||||
| 	MOVW $256, R2 | ||||
| 	CMPW R2, R4 | ||||
| 	BLT  inlineEmitLiteralTwoBytes | ||||
| 
 | ||||
| inlineEmitLiteralThreeBytes: | ||||
| 	MOVD $0xf4, R1 | ||||
| 	MOVB R1, 0(R8) | ||||
| 	MOVW R4, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	B    inlineEmitLiteralMemmove | ||||
| 
 | ||||
| inlineEmitLiteralTwoBytes: | ||||
| 	MOVD $0xf0, R1 | ||||
| 	MOVB R1, 0(R8) | ||||
| 	MOVB R4, 1(R8) | ||||
| 	ADD  $2, R8, R8 | ||||
| 	B    inlineEmitLiteralMemmove | ||||
| 
 | ||||
| inlineEmitLiteralOneByte: | ||||
| 	LSLW $2, R4, R4 | ||||
| 	MOVB R4, 0(R8) | ||||
| 	ADD  $1, R8, R8 | ||||
| 
 | ||||
| inlineEmitLiteralMemmove: | ||||
| 	// Spill local variables (registers) onto the stack; call; unspill.
 | ||||
| 	// | ||||
| 	// copy(dst[i:], lit) | ||||
| 	// | ||||
| 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push | ||||
| 	// R8, R10 and R3 as arguments. | ||||
| 	MOVD R8, 8(RSP) | ||||
| 	MOVD R10, 16(RSP) | ||||
| 	MOVD R3, 24(RSP) | ||||
| 
 | ||||
| 	// Finish the "d +=" part of "d += emitLiteral(etc)". | ||||
| 	ADD   R3, R8, R8 | ||||
| 	MOVD  R7, 80(RSP) | ||||
| 	MOVD  R8, 88(RSP) | ||||
| 	MOVD  R15, 120(RSP) | ||||
| 	CALL  runtime·memmove(SB) | ||||
| 	MOVD  64(RSP), R5 | ||||
| 	MOVD  72(RSP), R6 | ||||
| 	MOVD  80(RSP), R7 | ||||
| 	MOVD  88(RSP), R8 | ||||
| 	MOVD  96(RSP), R9 | ||||
| 	MOVD  120(RSP), R15 | ||||
| 	ADD   $128, RSP, R17 | ||||
| 	MOVW  $0xa7bd, R16 | ||||
| 	MOVKW $(0x1e35<<16), R16 | ||||
| 	B     inner1 | ||||
| 
 | ||||
| inlineEmitLiteralEnd: | ||||
| 	// End inline of the emitLiteral call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| emitLiteralFastPath: | ||||
| 	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". | ||||
| 	MOVB R3, R4 | ||||
| 	SUBW $1, R4, R4 | ||||
| 	AND  $0xff, R4, R4 | ||||
| 	LSLW $2, R4, R4 | ||||
| 	MOVB R4, (R8) | ||||
| 	ADD  $1, R8, R8 | ||||
| 
 | ||||
| 	// !!! Implement the copy from lit to dst as a 16-byte load and store. | ||||
| 	// (Encode's documentation says that dst and src must not overlap.) | ||||
| 	// | ||||
| 	// This always copies 16 bytes, instead of only len(lit) bytes, but that's | ||||
| 	// OK. Subsequent iterations will fix up the overrun. | ||||
| 	// | ||||
| 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or | ||||
| 	// 16-byte loads and stores. This technique probably wouldn't be as | ||||
| 	// effective on architectures that are fussier about alignment. | ||||
| 	LDP 0(R10), (R0, R1) | ||||
| 	STP (R0, R1), 0(R8) | ||||
| 	ADD R3, R8, R8 | ||||
| 
 | ||||
| inner1: | ||||
| 	// for { etc } | ||||
| 
 | ||||
| 	// base := s | ||||
| 	MOVD R7, R12 | ||||
| 
 | ||||
| 	// !!! offset := base - candidate | ||||
| 	MOVD R12, R11 | ||||
| 	SUB  R15, R11, R11 | ||||
| 	SUB  R6, R11, R11 | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the extendMatch call. | ||||
| 	// | ||||
| 	// s = extendMatch(src, candidate+4, s+4) | ||||
| 
 | ||||
| 	// !!! R14 = &src[len(src)] | ||||
| 	MOVD src_len+32(FP), R14 | ||||
| 	ADD  R6, R14, R14 | ||||
| 
 | ||||
| 	// !!! R13 = &src[len(src) - 8] | ||||
| 	MOVD R14, R13 | ||||
| 	SUB  $8, R13, R13 | ||||
| 
 | ||||
| 	// !!! R15 = &src[candidate + 4] | ||||
| 	ADD $4, R15, R15 | ||||
| 	ADD R6, R15, R15 | ||||
| 
 | ||||
| 	// !!! s += 4 | ||||
| 	ADD $4, R7, R7 | ||||
| 
 | ||||
| inlineExtendMatchCmp8: | ||||
| 	// As long as we are 8 or more bytes before the end of src, we can load and | ||||
| 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat. | ||||
| 	CMP  R13, R7 | ||||
| 	BHI  inlineExtendMatchCmp1 | ||||
| 	MOVD (R15), R3 | ||||
| 	MOVD (R7), R4 | ||||
| 	CMP  R4, R3 | ||||
| 	BNE  inlineExtendMatchBSF | ||||
| 	ADD  $8, R15, R15 | ||||
| 	ADD  $8, R7, R7 | ||||
| 	B    inlineExtendMatchCmp8 | ||||
| 
 | ||||
| inlineExtendMatchBSF: | ||||
| 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return | ||||
| 	// the index of the first byte that differs. | ||||
| 	// RBIT reverses the bit order, then CLZ counts the leading zeros, the | ||||
| 	// combination of which finds the least significant bit which is set. | ||||
| 	// The arm64 architecture is little-endian, and the shift by 3 converts | ||||
| 	// a bit index to a byte index. | ||||
| 	EOR  R3, R4, R4 | ||||
| 	RBIT R4, R4 | ||||
| 	CLZ  R4, R4 | ||||
| 	ADD  R4>>3, R7, R7 | ||||
| 	B    inlineExtendMatchEnd | ||||
| 
 | ||||
| inlineExtendMatchCmp1: | ||||
| 	// In src's tail, compare 1 byte at a time. | ||||
| 	CMP  R7, R14 | ||||
| 	BLS  inlineExtendMatchEnd | ||||
| 	MOVB (R15), R3 | ||||
| 	MOVB (R7), R4 | ||||
| 	CMP  R4, R3 | ||||
| 	BNE  inlineExtendMatchEnd | ||||
| 	ADD  $1, R15, R15 | ||||
| 	ADD  $1, R7, R7 | ||||
| 	B    inlineExtendMatchCmp1 | ||||
| 
 | ||||
| inlineExtendMatchEnd: | ||||
| 	// End inline of the extendMatch call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| 	// ---------------------------------------- | ||||
| 	// Begin inline of the emitCopy call. | ||||
| 	// | ||||
| 	// d += emitCopy(dst[d:], base-candidate, s-base) | ||||
| 
 | ||||
| 	// !!! length := s - base | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R12, R3, R3 | ||||
| 
 | ||||
| inlineEmitCopyLoop0: | ||||
| 	// for length >= 68 { etc } | ||||
| 	MOVW $68, R2 | ||||
| 	CMPW R2, R3 | ||||
| 	BLT  inlineEmitCopyStep1 | ||||
| 
 | ||||
| 	// Emit a length 64 copy, encoded as 3 bytes. | ||||
| 	MOVD $0xfe, R1 | ||||
| 	MOVB R1, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	SUBW $64, R3, R3 | ||||
| 	B    inlineEmitCopyLoop0 | ||||
| 
 | ||||
| inlineEmitCopyStep1: | ||||
| 	// if length > 64 { etc } | ||||
| 	MOVW $64, R2 | ||||
| 	CMPW R2, R3 | ||||
| 	BLE  inlineEmitCopyStep2 | ||||
| 
 | ||||
| 	// Emit a length 60 copy, encoded as 3 bytes. | ||||
| 	MOVD $0xee, R1 | ||||
| 	MOVB R1, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 	SUBW $60, R3, R3 | ||||
| 
 | ||||
| inlineEmitCopyStep2: | ||||
| 	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } | ||||
| 	MOVW $12, R2 | ||||
| 	CMPW R2, R3 | ||||
| 	BGE  inlineEmitCopyStep3 | ||||
| 	MOVW $2048, R2 | ||||
| 	CMPW R2, R11 | ||||
| 	BGE  inlineEmitCopyStep3 | ||||
| 
 | ||||
| 	// Emit the remaining copy, encoded as 2 bytes. | ||||
| 	MOVB R11, 1(R8) | ||||
| 	LSRW $8, R11, R11 | ||||
| 	LSLW $5, R11, R11 | ||||
| 	SUBW $4, R3, R3 | ||||
| 	AND  $0xff, R3, R3 | ||||
| 	LSLW $2, R3, R3 | ||||
| 	ORRW R3, R11, R11 | ||||
| 	ORRW $1, R11, R11 | ||||
| 	MOVB R11, 0(R8) | ||||
| 	ADD  $2, R8, R8 | ||||
| 	B    inlineEmitCopyEnd | ||||
| 
 | ||||
| inlineEmitCopyStep3: | ||||
| 	// Emit the remaining copy, encoded as 3 bytes. | ||||
| 	SUBW $1, R3, R3 | ||||
| 	LSLW $2, R3, R3 | ||||
| 	ORRW $2, R3, R3 | ||||
| 	MOVB R3, 0(R8) | ||||
| 	MOVW R11, 1(R8) | ||||
| 	ADD  $3, R8, R8 | ||||
| 
 | ||||
| inlineEmitCopyEnd: | ||||
| 	// End inline of the emitCopy call. | ||||
| 	// ---------------------------------------- | ||||
| 
 | ||||
| 	// nextEmit = s | ||||
| 	MOVD R7, R10 | ||||
| 
 | ||||
| 	// if s >= sLimit { goto emitRemainder } | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R6, R3, R3 | ||||
| 	CMP  R3, R9 | ||||
| 	BLS  emitRemainder | ||||
| 
 | ||||
| 	// As per the encode_other.go code: | ||||
| 	// | ||||
| 	// We could immediately etc. | ||||
| 
 | ||||
| 	// x := load64(src, s-1) | ||||
| 	MOVD -1(R7), R14 | ||||
| 
 | ||||
| 	// prevHash := hash(uint32(x>>0), shift) | ||||
| 	MOVW R14, R11 | ||||
| 	MULW R16, R11, R11 | ||||
| 	LSRW R5, R11, R11 | ||||
| 
 | ||||
| 	// table[prevHash] = uint16(s-1) | ||||
| 	MOVD R7, R3 | ||||
| 	SUB  R6, R3, R3 | ||||
| 	SUB  $1, R3, R3 | ||||
| 
 | ||||
| 	MOVHU R3, 0(R17)(R11<<1) | ||||
| 
 | ||||
| 	// currHash := hash(uint32(x>>8), shift) | ||||
| 	LSR  $8, R14, R14 | ||||
| 	MOVW R14, R11 | ||||
| 	MULW R16, R11, R11 | ||||
| 	LSRW R5, R11, R11 | ||||
| 
 | ||||
| 	// candidate = int(table[currHash]) | ||||
| 	MOVHU 0(R17)(R11<<1), R15 | ||||
| 
 | ||||
| 	// table[currHash] = uint16(s) | ||||
| 	ADD   $1, R3, R3 | ||||
| 	MOVHU R3, 0(R17)(R11<<1) | ||||
| 
 | ||||
| 	// if uint32(x>>8) == load32(src, candidate) { continue } | ||||
| 	MOVW (R6)(R15*1), R4 | ||||
| 	CMPW R4, R14 | ||||
| 	BEQ  inner1 | ||||
| 
 | ||||
| 	// nextHash = hash(uint32(x>>16), shift) | ||||
| 	LSR  $8, R14, R14 | ||||
| 	MOVW R14, R11 | ||||
| 	MULW R16, R11, R11 | ||||
| 	LSRW R5, R11, R11 | ||||
| 
 | ||||
| 	// s++ | ||||
| 	ADD $1, R7, R7 | ||||
| 
 | ||||
| 	// break out of the inner1 for loop, i.e. continue the outer loop. | ||||
| 	B outer | ||||
| 
 | ||||
| emitRemainder: | ||||
| 	// if nextEmit < len(src) { etc } | ||||
| 	MOVD src_len+32(FP), R3 | ||||
| 	ADD  R6, R3, R3 | ||||
| 	CMP  R3, R10 | ||||
| 	BEQ  encodeBlockEnd | ||||
| 
 | ||||
| 	// d += emitLiteral(dst[d:], src[nextEmit:]) | ||||
| 	// | ||||
| 	// Push args. | ||||
| 	MOVD R8, 8(RSP) | ||||
| 	MOVD $0, 16(RSP)  // Unnecessary, as the callee ignores it, but conservative. | ||||
| 	MOVD $0, 24(RSP)  // Unnecessary, as the callee ignores it, but conservative. | ||||
| 	MOVD R10, 32(RSP) | ||||
| 	SUB  R10, R3, R3 | ||||
| 	MOVD R3, 40(RSP) | ||||
| 	MOVD R3, 48(RSP)  // Unnecessary, as the callee ignores it, but conservative. | ||||
| 
 | ||||
| 	// Spill local variables (registers) onto the stack; call; unspill.
 | ||||
| 	MOVD R8, 88(RSP) | ||||
| 	CALL ·emitLiteral(SB) | ||||
| 	MOVD 88(RSP), R8 | ||||
| 
 | ||||
| 	// Finish the "d +=" part of "d += emitLiteral(etc)". | ||||
| 	MOVD 56(RSP), R1 | ||||
| 	ADD  R1, R8, R8 | ||||
| 
 | ||||
| encodeBlockEnd: | ||||
| 	MOVD dst_base+0(FP), R3 | ||||
| 	SUB  R3, R8, R8 | ||||
| 	MOVD R8, d+48(FP) | ||||
| 	RET | ||||
							
								
								
									
										30
									
								
								vendor/github.com/golang/snappy/encode_asm.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								vendor/github.com/golang/snappy/encode_asm.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !appengine | ||||
| // +build gc | ||||
| // +build !noasm | ||||
| // +build amd64 arm64 | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| // emitLiteral has the same semantics as in encode_other.go. | ||||
| // | ||||
| //go:noescape | ||||
| func emitLiteral(dst, lit []byte) int | ||||
| 
 | ||||
| // emitCopy has the same semantics as in encode_other.go. | ||||
| // | ||||
| //go:noescape | ||||
| func emitCopy(dst []byte, offset, length int) int | ||||
| 
 | ||||
| // extendMatch has the same semantics as in encode_other.go. | ||||
| // | ||||
| //go:noescape | ||||
| func extendMatch(src []byte, i, j int) int | ||||
| 
 | ||||
| // encodeBlock has the same semantics as in encode_other.go. | ||||
| // | ||||
| //go:noescape | ||||
| func encodeBlock(dst, src []byte) (d int) | ||||
							
								
								
									
										238
									
								
								vendor/github.com/golang/snappy/encode_other.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								vendor/github.com/golang/snappy/encode_other.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,238 @@ | |||
| // Copyright 2016 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // +build !amd64,!arm64 appengine !gc noasm | ||||
| 
 | ||||
| package snappy | ||||
| 
 | ||||
| func load32(b []byte, i int) uint32 { | ||||
| 	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. | ||||
| 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | ||||
| } | ||||
| 
 | ||||
| func load64(b []byte, i int) uint64 { | ||||
| 	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. | ||||
| 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | | ||||
| 		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 | ||||
| } | ||||
| 
 | ||||
| // emitLiteral writes a literal chunk and returns the number of bytes written. | ||||
| // | ||||
| // It assumes that: | ||||
| //	dst is long enough to hold the encoded bytes | ||||
| //	1 <= len(lit) && len(lit) <= 65536 | ||||
| func emitLiteral(dst, lit []byte) int { | ||||
| 	i, n := 0, uint(len(lit)-1) | ||||
| 	switch { | ||||
| 	case n < 60: | ||||
| 		dst[0] = uint8(n)<<2 | tagLiteral | ||||
| 		i = 1 | ||||
| 	case n < 1<<8: | ||||
| 		dst[0] = 60<<2 | tagLiteral | ||||
| 		dst[1] = uint8(n) | ||||
| 		i = 2 | ||||
| 	default: | ||||
| 		dst[0] = 61<<2 | tagLiteral | ||||
| 		dst[1] = uint8(n) | ||||
| 		dst[2] = uint8(n >> 8) | ||||
| 		i = 3 | ||||
| 	} | ||||
| 	return i + copy(dst[i:], lit) | ||||
| } | ||||
| 
 | ||||
| // emitCopy writes a copy chunk and returns the number of bytes written. | ||||
| // | ||||
| // It assumes that: | ||||
| //	dst is long enough to hold the encoded bytes | ||||
| //	1 <= offset && offset <= 65535 | ||||
| //	4 <= length && length <= 65535 | ||||
| func emitCopy(dst []byte, offset, length int) int { | ||||
| 	i := 0 | ||||
| 	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The | ||||
| 	// threshold for this loop is a little higher (at 68 = 64 + 4), and the | ||||
| 	// length emitted down below is is a little lower (at 60 = 64 - 4), because | ||||
| 	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed | ||||
| 	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as | ||||
| 	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as | ||||
| 	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a | ||||
| 	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an | ||||
| 	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. | ||||
| 	for length >= 68 { | ||||
| 		// Emit a length 64 copy, encoded as 3 bytes. | ||||
| 		dst[i+0] = 63<<2 | tagCopy2 | ||||
| 		dst[i+1] = uint8(offset) | ||||
| 		dst[i+2] = uint8(offset >> 8) | ||||
| 		i += 3 | ||||
| 		length -= 64 | ||||
| 	} | ||||
| 	if length > 64 { | ||||
| 		// Emit a length 60 copy, encoded as 3 bytes. | ||||
| 		dst[i+0] = 59<<2 | tagCopy2 | ||||
| 		dst[i+1] = uint8(offset) | ||||
| 		dst[i+2] = uint8(offset >> 8) | ||||
| 		i += 3 | ||||
| 		length -= 60 | ||||
| 	} | ||||
| 	if length >= 12 || offset >= 2048 { | ||||
| 		// Emit the remaining copy, encoded as 3 bytes. | ||||
| 		dst[i+0] = uint8(length-1)<<2 | tagCopy2 | ||||
| 		dst[i+1] = uint8(offset) | ||||
| 		dst[i+2] = uint8(offset >> 8) | ||||
| 		return i + 3 | ||||
| 	} | ||||
| 	// Emit the remaining copy, encoded as 2 bytes. | ||||
| 	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 | ||||
| 	dst[i+1] = uint8(offset) | ||||
| 	return i + 2 | ||||
| } | ||||
| 
 | ||||
| // extendMatch returns the largest k such that k <= len(src) and that | ||||
| // src[i:i+k-j] and src[j:k] have the same contents. | ||||
| // | ||||
| // It assumes that: | ||||
| //	0 <= i && i < j && j <= len(src) | ||||
| func extendMatch(src []byte, i, j int) int { | ||||
| 	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { | ||||
| 	} | ||||
| 	return j | ||||
| } | ||||
| 
 | ||||
| func hash(u, shift uint32) uint32 { | ||||
| 	return (u * 0x1e35a7bd) >> shift | ||||
| } | ||||
| 
 | ||||
| // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It | ||||
| // assumes that the varint-encoded length of the decompressed bytes has already | ||||
| // been written. | ||||
| // | ||||
| // It also assumes that: | ||||
| //	len(dst) >= MaxEncodedLen(len(src)) && | ||||
| // 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize | ||||
| func encodeBlock(dst, src []byte) (d int) { | ||||
| 	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. | ||||
| 	// The table element type is uint16, as s < sLimit and sLimit < len(src) | ||||
| 	// and len(src) <= maxBlockSize and maxBlockSize == 65536. | ||||
| 	const ( | ||||
| 		maxTableSize = 1 << 14 | ||||
| 		// tableMask is redundant, but helps the compiler eliminate bounds | ||||
| 		// checks. | ||||
| 		tableMask = maxTableSize - 1 | ||||
| 	) | ||||
| 	shift := uint32(32 - 8) | ||||
| 	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { | ||||
| 		shift-- | ||||
| 	} | ||||
| 	// In Go, all array elements are zero-initialized, so there is no advantage | ||||
| 	// to a smaller tableSize per se. However, it matches the C++ algorithm, | ||||
| 	// and in the asm versions of this code, we can get away with zeroing only | ||||
| 	// the first tableSize elements. | ||||
| 	var table [maxTableSize]uint16 | ||||
| 
 | ||||
| 	// sLimit is when to stop looking for offset/length copies. The inputMargin | ||||
| 	// lets us use a fast path for emitLiteral in the main loop, while we are | ||||
| 	// looking for copies. | ||||
| 	sLimit := len(src) - inputMargin | ||||
| 
 | ||||
| 	// nextEmit is where in src the next emitLiteral should start from. | ||||
| 	nextEmit := 0 | ||||
| 
 | ||||
| 	// The encoded form must start with a literal, as there are no previous | ||||
| 	// bytes to copy, so we start looking for hash matches at s == 1. | ||||
| 	s := 1 | ||||
| 	nextHash := hash(load32(src, s), shift) | ||||
| 
 | ||||
| 	for { | ||||
| 		// Copied from the C++ snappy implementation: | ||||
| 		// | ||||
| 		// Heuristic match skipping: If 32 bytes are scanned with no matches | ||||
| 		// found, start looking only at every other byte. If 32 more bytes are | ||||
| 		// scanned (or skipped), look at every third byte, etc.. When a match | ||||
| 		// is found, immediately go back to looking at every byte. This is a | ||||
| 		// small loss (~5% performance, ~0.1% density) for compressible data | ||||
| 		// due to more bookkeeping, but for non-compressible data (such as | ||||
| 		// JPEG) it's a huge win since the compressor quickly "realizes" the | ||||
| 		// data is incompressible and doesn't bother looking for matches | ||||
| 		// everywhere. | ||||
| 		// | ||||
| 		// The "skip" variable keeps track of how many bytes there are since | ||||
| 		// the last match; dividing it by 32 (ie. right-shifting by five) gives | ||||
| 		// the number of bytes to move ahead for each iteration. | ||||
| 		skip := 32 | ||||
| 
 | ||||
| 		nextS := s | ||||
| 		candidate := 0 | ||||
| 		for { | ||||
| 			s = nextS | ||||
| 			bytesBetweenHashLookups := skip >> 5 | ||||
| 			nextS = s + bytesBetweenHashLookups | ||||
| 			skip += bytesBetweenHashLookups | ||||
| 			if nextS > sLimit { | ||||
| 				goto emitRemainder | ||||
| 			} | ||||
| 			candidate = int(table[nextHash&tableMask]) | ||||
| 			table[nextHash&tableMask] = uint16(s) | ||||
| 			nextHash = hash(load32(src, nextS), shift) | ||||
| 			if load32(src, s) == load32(src, candidate) { | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		// A 4-byte match has been found. We'll later see if more than 4 bytes | ||||
| 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit | ||||
| 		// them as literal bytes. | ||||
| 		d += emitLiteral(dst[d:], src[nextEmit:s]) | ||||
| 
 | ||||
| 		// Call emitCopy, and then see if another emitCopy could be our next | ||||
| 		// move. Repeat until we find no match for the input immediately after | ||||
| 		// what was consumed by the last emitCopy call. | ||||
| 		// | ||||
| 		// If we exit this loop normally then we need to call emitLiteral next, | ||||
| 		// though we don't yet know how big the literal will be. We handle that | ||||
| 		// by proceeding to the next iteration of the main loop. We also can | ||||
| 		// exit this loop via goto if we get close to exhausting the input. | ||||
| 		for { | ||||
| 			// Invariant: we have a 4-byte match at s, and no need to emit any | ||||
| 			// literal bytes prior to s. | ||||
| 			base := s | ||||
| 
 | ||||
| 			// Extend the 4-byte match as long as possible. | ||||
| 			// | ||||
| 			// This is an inlined version of: | ||||
| 			//	s = extendMatch(src, candidate+4, s+4) | ||||
| 			s += 4 | ||||
| 			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { | ||||
| 			} | ||||
| 
 | ||||
| 			d += emitCopy(dst[d:], base-candidate, s-base) | ||||
| 			nextEmit = s | ||||
| 			if s >= sLimit { | ||||
| 				goto emitRemainder | ||||
| 			} | ||||
| 
 | ||||
| 			// We could immediately start working at s now, but to improve | ||||
| 			// compression we first update the hash table at s-1 and at s. If | ||||
| 			// another emitCopy is not our next move, also calculate nextHash | ||||
| 			// at s+1. At least on GOARCH=amd64, these three hash calculations | ||||
| 			// are faster as one load64 call (with some shifts) instead of | ||||
| 			// three load32 calls. | ||||
| 			x := load64(src, s-1) | ||||
| 			prevHash := hash(uint32(x>>0), shift) | ||||
| 			table[prevHash&tableMask] = uint16(s - 1) | ||||
| 			currHash := hash(uint32(x>>8), shift) | ||||
| 			candidate = int(table[currHash&tableMask]) | ||||
| 			table[currHash&tableMask] = uint16(s) | ||||
| 			if uint32(x>>8) != load32(src, candidate) { | ||||
| 				nextHash = hash(uint32(x>>16), shift) | ||||
| 				s++ | ||||
| 				break | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| emitRemainder: | ||||
| 	if nextEmit < len(src) { | ||||
| 		d += emitLiteral(dst[d:], src[nextEmit:]) | ||||
| 	} | ||||
| 	return d | ||||
| } | ||||
							
								
								
									
										98
									
								
								vendor/github.com/golang/snappy/snappy.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								vendor/github.com/golang/snappy/snappy.go
									
										
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,98 @@ | |||
| // Copyright 2011 The Snappy-Go Authors. All rights reserved. | ||||
| // Use of this source code is governed by a BSD-style | ||||
| // license that can be found in the LICENSE file. | ||||
| 
 | ||||
| // Package snappy implements the Snappy compression format. It aims for very | ||||
| // high speeds and reasonable compression. | ||||
| // | ||||
| // There are actually two Snappy formats: block and stream. They are related, | ||||
| // but different: trying to decompress block-compressed data as a Snappy stream | ||||
| // will fail, and vice versa. The block format is the Decode and Encode | ||||
| // functions and the stream format is the Reader and Writer types. | ||||
| // | ||||
| // The block format, the more common case, is used when the complete size (the | ||||
| // number of bytes) of the original data is known upfront, at the time | ||||
| // compression starts. The stream format, also known as the framing format, is | ||||
| // for when that isn't always true. | ||||
| // | ||||
| // The canonical, C++ implementation is at https://github.com/google/snappy and | ||||
| // it only implements the block format. | ||||
| package snappy // import "github.com/golang/snappy" | ||||
| 
 | ||||
| import ( | ||||
| 	"hash/crc32" | ||||
| ) | ||||
| 
 | ||||
| /* | ||||
| Each encoded block begins with the varint-encoded length of the decoded data, | ||||
| followed by a sequence of chunks. Chunks begin and end on byte boundaries. The | ||||
| first byte of each chunk is broken into its 2 least and 6 most significant bits | ||||
| called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. | ||||
| Zero means a literal tag. All other values mean a copy tag. | ||||
| 
 | ||||
| For literal tags: | ||||
|   - If m < 60, the next 1 + m bytes are literal bytes. | ||||
|   - Otherwise, let n be the little-endian unsigned integer denoted by the next | ||||
|     m - 59 bytes. The next 1 + n bytes after that are literal bytes. | ||||
| 
 | ||||
| For copy tags, length bytes are copied from offset bytes ago, in the style of | ||||
| Lempel-Ziv compression algorithms. In particular: | ||||
|   - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). | ||||
|     The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 | ||||
|     of the offset. The next byte is bits 0-7 of the offset. | ||||
|   - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). | ||||
|     The length is 1 + m. The offset is the little-endian unsigned integer | ||||
|     denoted by the next 2 bytes. | ||||
|   - For l == 3, this tag is a legacy format that is no longer issued by most | ||||
|     encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in | ||||
|     [1, 65). The length is 1 + m. The offset is the little-endian unsigned | ||||
|     integer denoted by the next 4 bytes. | ||||
| */ | ||||
| const ( | ||||
| 	tagLiteral = 0x00 | ||||
| 	tagCopy1   = 0x01 | ||||
| 	tagCopy2   = 0x02 | ||||
| 	tagCopy4   = 0x03 | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	checksumSize    = 4 | ||||
| 	chunkHeaderSize = 4 | ||||
| 	magicChunk      = "\xff\x06\x00\x00" + magicBody | ||||
| 	magicBody       = "sNaPpY" | ||||
| 
 | ||||
| 	// maxBlockSize is the maximum size of the input to encodeBlock. It is not | ||||
| 	// part of the wire format per se, but some parts of the encoder assume | ||||
| 	// that an offset fits into a uint16. | ||||
| 	// | ||||
| 	// Also, for the framing format (Writer type instead of Encode function), | ||||
| 	// https://github.com/google/snappy/blob/master/framing_format.txt says | ||||
| 	// that "the uncompressed data in a chunk must be no longer than 65536 | ||||
| 	// bytes". | ||||
| 	maxBlockSize = 65536 | ||||
| 
 | ||||
| 	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is | ||||
| 	// hard coded to be a const instead of a variable, so that obufLen can also | ||||
| 	// be a const. Their equivalence is confirmed by | ||||
| 	// TestMaxEncodedLenOfMaxBlockSize. | ||||
| 	maxEncodedLenOfMaxBlockSize = 76490 | ||||
| 
 | ||||
| 	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize | ||||
| 	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	chunkTypeCompressedData   = 0x00 | ||||
| 	chunkTypeUncompressedData = 0x01 | ||||
| 	chunkTypePadding          = 0xfe | ||||
| 	chunkTypeStreamIdentifier = 0xff | ||||
| ) | ||||
| 
 | ||||
| var crcTable = crc32.MakeTable(crc32.Castagnoli) | ||||
| 
 | ||||
| // crc implements the checksum specified in section 3 of | ||||
| // https://github.com/google/snappy/blob/master/framing_format.txt | ||||
| func crc(b []byte) uint32 { | ||||
| 	c := crc32.Update(0, crcTable, b) | ||||
| 	return uint32(c>>15|c<<17) + 0xa282ead8 | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue