mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 15:52:25 -05:00 
			
		
		
		
	Bumps [github.com/minio/minio-go/v7](https://github.com/minio/minio-go) from 7.0.77 to 7.0.78. - [Release notes](https://github.com/minio/minio-go/releases) - [Commits](https://github.com/minio/minio-go/compare/v7.0.77...v7.0.78) --- updated-dependencies: - dependency-name: github.com/minio/minio-go/v7 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
		
			
				
	
	
		
			21303 lines
		
	
	
	
		
			525 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			21303 lines
		
	
	
	
		
			525 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
 | |
| 
 | |
| //go:build !appengine && !noasm && gc && !noasm
 | |
| 
 | |
| #include "textflag.h"
 | |
| 
 | |
| // func _dummy_()
 | |
| TEXT ·_dummy_(SB), $0
 | |
| #ifdef GOAMD64_v4
 | |
| #ifndef GOAMD64_v3
 | |
| #define GOAMD64_v3
 | |
| #endif
 | |
| #endif
 | |
| 	RET
 | |
| 
 | |
| // func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBlockAsm(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBlockAsm:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBlockAsm
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBlockAsm:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBlockAsm
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeBlockAsm
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), R8
 | |
| 	MOVL  DI, SI
 | |
| 	SUBL  16(SP), SI
 | |
| 	JZ    repeat_extend_back_end_encodeBlockAsm
 | |
| 
 | |
| repeat_extend_back_loop_encodeBlockAsm:
 | |
| 	CMPL DI, R8
 | |
| 	JBE  repeat_extend_back_end_encodeBlockAsm
 | |
| 	MOVB -1(BX)(SI*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeBlockAsm
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL SI
 | |
| 	JNZ  repeat_extend_back_loop_encodeBlockAsm
 | |
| 
 | |
| repeat_extend_back_end_encodeBlockAsm:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 5(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeBlockAsm:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeBlockAsm
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeBlockAsm
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_repeat_emit_encodeBlockAsm
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_repeat_emit_encodeBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| four_bytes_repeat_emit_encodeBlockAsm:
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| three_bytes_repeat_emit_encodeBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| two_bytes_repeat_emit_encodeBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeBlockAsm
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| one_byte_repeat_emit_encodeBlockAsm:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeBlockAsm:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm
 | |
| 
 | |
| memmove_long_repeat_emit_encodeBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R13*1), R11
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R13*1), X4
 | |
| 	MOVOU -16(R10)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R9, R13
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeBlockAsm:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R9
 | |
| 	SUBL DX, R9
 | |
| 	LEAQ (BX)(DX*1), R10
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeBlockAsm:
 | |
| 	CMPL R9, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeBlockAsm
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	MOVQ 8(R10)(R12*1), R13
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
 | |
| 	XORQ 8(SI)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm
 | |
| 	LEAL -16(R9), R9
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeBlockAsm:
 | |
| 	CMPL R9, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeBlockAsm
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
 | |
| 	LEAL -8(R9), R9
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeBlockAsm:
 | |
| 	CMPL R9, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeBlockAsm
 | |
| 	MOVL (R10)(R12*1), R11
 | |
| 	CMPL (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm
 | |
| 	LEAL -4(R9), R9
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeBlockAsm:
 | |
| 	CMPL R9, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeBlockAsm
 | |
| 	JB   repeat_extend_forward_end_encodeBlockAsm
 | |
| 	MOVW (R10)(R12*1), R11
 | |
| 	CMPW (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R9
 | |
| 	JZ   repeat_extend_forward_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeBlockAsm:
 | |
| 	MOVB (R10)(R12*1), R11
 | |
| 	CMPB (SI)(R12*1), R11
 | |
| 	JNE  repeat_extend_forward_end_encodeBlockAsm
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| repeat_extend_forward_end_encodeBlockAsm:
 | |
| 	ADDL  R12, DX
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  DI, SI
 | |
| 	MOVL  16(SP), DI
 | |
| 	TESTL R8, R8
 | |
| 	JZ    repeat_as_copy_encodeBlockAsm
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_repeat_encodeBlockAsm:
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_match_repeat_encodeBlockAsm
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_repeat_encodeBlockAsm
 | |
| 
 | |
| cant_repeat_two_offset_match_repeat_encodeBlockAsm:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_match_repeat_encodeBlockAsm
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_match_repeat_encodeBlockAsm
 | |
| 	CMPL SI, $0x0100ffff
 | |
| 	JB   repeat_five_match_repeat_encodeBlockAsm
 | |
| 	LEAL -16842747(SI), SI
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_repeat_encodeBlockAsm
 | |
| 
 | |
| repeat_five_match_repeat_encodeBlockAsm:
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_four_match_repeat_encodeBlockAsm:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_three_match_repeat_encodeBlockAsm:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_match_repeat_encodeBlockAsm:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_repeat_encodeBlockAsm:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_as_copy_encodeBlockAsm:
 | |
| 	// emitCopy
 | |
| 	CMPL DI, $0x00010000
 | |
| 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL DI, 1(CX)
 | |
| 	LEAL -64(SI), SI
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL SI, $0x04
 | |
| 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 	CMPL SI, $0x0100ffff
 | |
| 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 	LEAL -16842747(SI), SI
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
 | |
| 
 | |
| repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| four_bytes_remain_repeat_as_copy_encodeBlockAsm:
 | |
| 	TESTL SI, SI
 | |
| 	JZ    repeat_end_emit_encodeBlockAsm
 | |
| 	XORL  R8, R8
 | |
| 	LEAL  -1(R8)(SI*4), SI
 | |
| 	MOVB  SI, (CX)
 | |
| 	MOVL  DI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| two_byte_offset_repeat_as_copy_encodeBlockAsm:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	MOVL DI, R9
 | |
| 	SHRL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, SI
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(SI), SI
 | |
| 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0100ffff
 | |
| 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 	LEAL -16842747(SI), SI
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| long_offset_short_repeat_as_copy_encodeBlockAsm:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL SI, $0x0100ffff
 | |
| 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 	LEAL -16842747(SI), SI
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
 | |
| 
 | |
| repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeBlockAsm:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeBlockAsm:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeBlockAsm
 | |
| 
 | |
| no_repeat_found_encodeBlockAsm:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeBlockAsm
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeBlockAsm
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBlockAsm
 | |
| 
 | |
| candidate3_match_encodeBlockAsm:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeBlockAsm
 | |
| 
 | |
| candidate2_match_encodeBlockAsm:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeBlockAsm:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBlockAsm
 | |
| 
 | |
| match_extend_back_loop_encodeBlockAsm:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBlockAsm
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBlockAsm
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBlockAsm
 | |
| 	JMP  match_extend_back_loop_encodeBlockAsm
 | |
| 
 | |
| match_extend_back_end_encodeBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 5(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBlockAsm
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBlockAsm
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeBlockAsm
 | |
| 	CMPL R8, $0x01000000
 | |
| 	JB   four_bytes_match_emit_encodeBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL R8, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm
 | |
| 
 | |
| four_bytes_match_emit_encodeBlockAsm:
 | |
| 	MOVL R8, R10
 | |
| 	SHRL $0x10, R10
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	MOVB R10, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm
 | |
| 
 | |
| three_bytes_match_emit_encodeBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm
 | |
| 
 | |
| two_bytes_match_emit_encodeBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeBlockAsm
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm
 | |
| 
 | |
| one_byte_match_emit_encodeBlockAsm:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBlockAsm:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBlockAsm
 | |
| 
 | |
| memmove_long_match_emit_encodeBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBlockAsm:
 | |
| match_nolit_loop_encodeBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBlockAsm:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBlockAsm
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBlockAsm:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBlockAsm
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBlockAsm:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBlockAsm
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBlockAsm
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBlockAsm:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBlockAsm
 | |
| 	JB   match_nolit_end_encodeBlockAsm
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBlockAsm
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeBlockAsm
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBlockAsm:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeBlockAsm
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeBlockAsm:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeBlockAsm
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	LEAL -64(R10), R10
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R10, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeBlockAsm
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
 | |
| 
 | |
| repeat_five_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeBlockAsm:
 | |
| 	TESTL R10, R10
 | |
| 	JZ    match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 	XORL  DI, DI
 | |
| 	LEAL  -1(DI)(R10*4), R10
 | |
| 	MOVB  R10, (CX)
 | |
| 	MOVL  SI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeBlockAsm:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBlockAsm
 | |
| 	MOVL $0x00000001, DI
 | |
| 	LEAL 16(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	MOVL SI, R8
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBlockAsm:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
 | |
| 
 | |
| repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBlockAsm:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBlockAsm:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBlockAsm:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBlockAsm
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBlockAsm:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x10, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x32, R8
 | |
| 	SHLQ  $0x10, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x32, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeBlockAsm
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeBlockAsm
 | |
| 
 | |
| emit_remainder_encodeBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 5(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBlockAsm
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBlockAsm
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeBlockAsm
 | |
| 	CMPL DX, $0x01000000
 | |
| 	JB   four_bytes_emit_remainder_encodeBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL DX, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| four_bytes_emit_remainder_encodeBlockAsm:
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBlockAsm
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| one_byte_emit_remainder_encodeBlockAsm:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBlockAsm:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBlockAsm:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBlockAsm4MB(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBlockAsm4MB:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBlockAsm4MB
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBlockAsm4MB:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBlockAsm4MB
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeBlockAsm4MB
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), R8
 | |
| 	MOVL  DI, SI
 | |
| 	SUBL  16(SP), SI
 | |
| 	JZ    repeat_extend_back_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_extend_back_loop_encodeBlockAsm4MB:
 | |
| 	CMPL DI, R8
 | |
| 	JBE  repeat_extend_back_end_encodeBlockAsm4MB
 | |
| 	MOVB -1(BX)(SI*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeBlockAsm4MB
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL SI
 | |
| 	JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_extend_back_end_encodeBlockAsm4MB:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 4(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeBlockAsm4MB:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_repeat_emit_encodeBlockAsm4MB
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| three_bytes_repeat_emit_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| two_bytes_repeat_emit_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeBlockAsm4MB
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| one_byte_repeat_emit_encodeBlockAsm4MB:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
 | |
| 
 | |
| memmove_long_repeat_emit_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R13*1), R11
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R13*1), X4
 | |
| 	MOVOU -16(R10)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R9, R13
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeBlockAsm4MB:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R9
 | |
| 	SUBL DX, R9
 | |
| 	LEAQ (BX)(DX*1), R10
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
 | |
| 	CMPL R9, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeBlockAsm4MB
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	MOVQ 8(R10)(R12*1), R13
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
 | |
| 	XORQ 8(SI)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
 | |
| 	LEAL -16(R9), R9
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeBlockAsm4MB:
 | |
| 	CMPL R9, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeBlockAsm4MB
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
 | |
| 	LEAL -8(R9), R9
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeBlockAsm4MB:
 | |
| 	CMPL R9, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeBlockAsm4MB
 | |
| 	MOVL (R10)(R12*1), R11
 | |
| 	CMPL (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm4MB
 | |
| 	LEAL -4(R9), R9
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeBlockAsm4MB:
 | |
| 	CMPL R9, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeBlockAsm4MB
 | |
| 	JB   repeat_extend_forward_end_encodeBlockAsm4MB
 | |
| 	MOVW (R10)(R12*1), R11
 | |
| 	CMPW (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm4MB
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R9
 | |
| 	JZ   repeat_extend_forward_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeBlockAsm4MB:
 | |
| 	MOVB (R10)(R12*1), R11
 | |
| 	CMPB (SI)(R12*1), R11
 | |
| 	JNE  repeat_extend_forward_end_encodeBlockAsm4MB
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| repeat_extend_forward_end_encodeBlockAsm4MB:
 | |
| 	ADDL  R12, DX
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  DI, SI
 | |
| 	MOVL  16(SP), DI
 | |
| 	TESTL R8, R8
 | |
| 	JZ    repeat_as_copy_encodeBlockAsm4MB
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_match_repeat_encodeBlockAsm4MB
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_repeat_encodeBlockAsm4MB
 | |
| 
 | |
| cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_match_repeat_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_match_repeat_encodeBlockAsm4MB
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_repeat_encodeBlockAsm4MB:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_repeat_encodeBlockAsm4MB:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_repeat_encodeBlockAsm4MB:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_repeat_encodeBlockAsm4MB:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	// emitCopy
 | |
| 	CMPL DI, $0x00010000
 | |
| 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL DI, 1(CX)
 | |
| 	LEAL -64(SI), SI
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL SI, $0x04
 | |
| 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	TESTL SI, SI
 | |
| 	JZ    repeat_end_emit_encodeBlockAsm4MB
 | |
| 	XORL  R8, R8
 | |
| 	LEAL  -1(R8)(SI*4), SI
 | |
| 	MOVB  SI, (CX)
 | |
| 	MOVL  DI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, SI
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(SI), SI
 | |
| 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL SI, $0x00010100
 | |
| 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 | |
| 	LEAL -65536(SI), SI
 | |
| 	MOVL SI, DI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	SARL $0x10, DI
 | |
| 	MOVB DI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeBlockAsm4MB:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeBlockAsm4MB
 | |
| 
 | |
| no_repeat_found_encodeBlockAsm4MB:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeBlockAsm4MB
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeBlockAsm4MB
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeBlockAsm4MB
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBlockAsm4MB
 | |
| 
 | |
| candidate3_match_encodeBlockAsm4MB:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeBlockAsm4MB
 | |
| 
 | |
| candidate2_match_encodeBlockAsm4MB:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeBlockAsm4MB:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBlockAsm4MB
 | |
| 
 | |
| match_extend_back_loop_encodeBlockAsm4MB:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBlockAsm4MB
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBlockAsm4MB
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBlockAsm4MB
 | |
| 	JMP  match_extend_back_loop_encodeBlockAsm4MB
 | |
| 
 | |
| match_extend_back_end_encodeBlockAsm4MB:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 4(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBlockAsm4MB:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBlockAsm4MB
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBlockAsm4MB
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeBlockAsm4MB
 | |
| 	MOVL R8, R10
 | |
| 	SHRL $0x10, R10
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	MOVB R10, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| three_bytes_match_emit_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| two_bytes_match_emit_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeBlockAsm4MB
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| one_byte_match_emit_encodeBlockAsm4MB:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBlockAsm4MB:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
 | |
| 
 | |
| memmove_long_match_emit_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBlockAsm4MB:
 | |
| match_nolit_loop_encodeBlockAsm4MB:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBlockAsm4MB
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm4MB
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBlockAsm4MB:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBlockAsm4MB
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBlockAsm4MB:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBlockAsm4MB
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBlockAsm4MB
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBlockAsm4MB:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBlockAsm4MB
 | |
| 	JB   match_nolit_end_encodeBlockAsm4MB
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBlockAsm4MB
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeBlockAsm4MB
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBlockAsm4MB:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeBlockAsm4MB
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeBlockAsm4MB:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeBlockAsm4MB
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	LEAL -64(R10), R10
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R10, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeBlockAsm4MB
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeBlockAsm4MB:
 | |
| 	TESTL R10, R10
 | |
| 	JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 	XORL  DI, DI
 | |
| 	LEAL  -1(DI)(R10*4), R10
 | |
| 	MOVB  R10, (CX)
 | |
| 	MOVL  SI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeBlockAsm4MB:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBlockAsm4MB
 | |
| 	MOVL $0x00000001, DI
 | |
| 	LEAL 16(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBlockAsm4MB:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, SI
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	SARL $0x10, SI
 | |
| 	MOVB SI, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBlockAsm4MB:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBlockAsm4MB:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBlockAsm4MB
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBlockAsm4MB:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x10, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x32, R8
 | |
| 	SHLQ  $0x10, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x32, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeBlockAsm4MB
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeBlockAsm4MB
 | |
| 
 | |
| emit_remainder_encodeBlockAsm4MB:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 4(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBlockAsm4MB:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBlockAsm4MB
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBlockAsm4MB
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeBlockAsm4MB
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBlockAsm4MB
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| one_byte_emit_remainder_encodeBlockAsm4MB:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBlockAsm4MB:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBlockAsm4MB:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBlockAsm12B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000080, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBlockAsm12B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBlockAsm12B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBlockAsm12B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBlockAsm12B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x000000cf1bbcdcbb, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x18, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x18, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x18, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeBlockAsm12B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), R8
 | |
| 	MOVL  DI, SI
 | |
| 	SUBL  16(SP), SI
 | |
| 	JZ    repeat_extend_back_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_extend_back_loop_encodeBlockAsm12B:
 | |
| 	CMPL DI, R8
 | |
| 	JBE  repeat_extend_back_end_encodeBlockAsm12B
 | |
| 	MOVB -1(BX)(SI*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeBlockAsm12B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL SI
 | |
| 	JNZ  repeat_extend_back_loop_encodeBlockAsm12B
 | |
| 
 | |
| repeat_extend_back_end_encodeBlockAsm12B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeBlockAsm12B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeBlockAsm12B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeBlockAsm12B
 | |
| 	JB   three_bytes_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeBlockAsm12B
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| one_byte_repeat_emit_encodeBlockAsm12B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeBlockAsm12B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R13*1), R11
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R13*1), X4
 | |
| 	MOVOU -16(R10)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R9, R13
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeBlockAsm12B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R9
 | |
| 	SUBL DX, R9
 | |
| 	LEAQ (BX)(DX*1), R10
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
 | |
| 	CMPL R9, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeBlockAsm12B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	MOVQ 8(R10)(R12*1), R13
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
 | |
| 	XORQ 8(SI)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm12B
 | |
| 	LEAL -16(R9), R9
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeBlockAsm12B:
 | |
| 	CMPL R9, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeBlockAsm12B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
 | |
| 	LEAL -8(R9), R9
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeBlockAsm12B:
 | |
| 	CMPL R9, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeBlockAsm12B
 | |
| 	MOVL (R10)(R12*1), R11
 | |
| 	CMPL (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm12B
 | |
| 	LEAL -4(R9), R9
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeBlockAsm12B:
 | |
| 	CMPL R9, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeBlockAsm12B
 | |
| 	JB   repeat_extend_forward_end_encodeBlockAsm12B
 | |
| 	MOVW (R10)(R12*1), R11
 | |
| 	CMPW (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm12B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R9
 | |
| 	JZ   repeat_extend_forward_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeBlockAsm12B:
 | |
| 	MOVB (R10)(R12*1), R11
 | |
| 	CMPB (SI)(R12*1), R11
 | |
| 	JNE  repeat_extend_forward_end_encodeBlockAsm12B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| repeat_extend_forward_end_encodeBlockAsm12B:
 | |
| 	ADDL  R12, DX
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  DI, SI
 | |
| 	MOVL  16(SP), DI
 | |
| 	TESTL R8, R8
 | |
| 	JZ    repeat_as_copy_encodeBlockAsm12B
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_match_repeat_encodeBlockAsm12B
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_repeat_encodeBlockAsm12B
 | |
| 
 | |
| cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_match_repeat_encodeBlockAsm12B
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_three_match_repeat_encodeBlockAsm12B:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_match_repeat_encodeBlockAsm12B:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_repeat_encodeBlockAsm12B:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_as_copy_encodeBlockAsm12B:
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm12B
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, SI
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(SI), SI
 | |
| 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| long_offset_short_repeat_as_copy_encodeBlockAsm12B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeBlockAsm12B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeBlockAsm12B
 | |
| 
 | |
| no_repeat_found_encodeBlockAsm12B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeBlockAsm12B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeBlockAsm12B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeBlockAsm12B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBlockAsm12B
 | |
| 
 | |
| candidate3_match_encodeBlockAsm12B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeBlockAsm12B
 | |
| 
 | |
| candidate2_match_encodeBlockAsm12B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeBlockAsm12B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBlockAsm12B
 | |
| 
 | |
| match_extend_back_loop_encodeBlockAsm12B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBlockAsm12B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBlockAsm12B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBlockAsm12B
 | |
| 	JMP  match_extend_back_loop_encodeBlockAsm12B
 | |
| 
 | |
| match_extend_back_end_encodeBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBlockAsm12B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBlockAsm12B
 | |
| 	JB   three_bytes_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| three_bytes_match_emit_encodeBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| two_bytes_match_emit_encodeBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeBlockAsm12B
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| one_byte_match_emit_encodeBlockAsm12B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBlockAsm12B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBlockAsm12B
 | |
| 
 | |
| memmove_long_match_emit_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBlockAsm12B:
 | |
| match_nolit_loop_encodeBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBlockAsm12B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm12B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBlockAsm12B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBlockAsm12B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBlockAsm12B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBlockAsm12B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBlockAsm12B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBlockAsm12B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBlockAsm12B
 | |
| 	JB   match_nolit_end_encodeBlockAsm12B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBlockAsm12B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeBlockAsm12B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBlockAsm12B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeBlockAsm12B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeBlockAsm12B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBlockAsm12B
 | |
| 	MOVL $0x00000001, DI
 | |
| 	LEAL 16(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBlockAsm12B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBlockAsm12B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBlockAsm12B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBlockAsm12B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBlockAsm12B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBlockAsm12B:
 | |
| 	MOVQ  $0x000000cf1bbcdcbb, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x18, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x34, R8
 | |
| 	SHLQ  $0x18, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x34, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeBlockAsm12B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeBlockAsm12B
 | |
| 
 | |
| emit_remainder_encodeBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBlockAsm12B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBlockAsm12B
 | |
| 	JB   three_bytes_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBlockAsm12B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBlockAsm12B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBlockAsm12B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBlockAsm12B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBlockAsm10B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000020, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBlockAsm10B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBlockAsm10B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBlockAsm10B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBlockAsm10B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeBlockAsm10B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), R8
 | |
| 	MOVL  DI, SI
 | |
| 	SUBL  16(SP), SI
 | |
| 	JZ    repeat_extend_back_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_extend_back_loop_encodeBlockAsm10B:
 | |
| 	CMPL DI, R8
 | |
| 	JBE  repeat_extend_back_end_encodeBlockAsm10B
 | |
| 	MOVB -1(BX)(SI*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeBlockAsm10B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL SI
 | |
| 	JNZ  repeat_extend_back_loop_encodeBlockAsm10B
 | |
| 
 | |
| repeat_extend_back_end_encodeBlockAsm10B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeBlockAsm10B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeBlockAsm10B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeBlockAsm10B
 | |
| 	JB   three_bytes_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeBlockAsm10B
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| one_byte_repeat_emit_encodeBlockAsm10B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeBlockAsm10B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R13*1), R11
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R13*1), X4
 | |
| 	MOVOU -16(R10)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R9, R13
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeBlockAsm10B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R9
 | |
| 	SUBL DX, R9
 | |
| 	LEAQ (BX)(DX*1), R10
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
 | |
| 	CMPL R9, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeBlockAsm10B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	MOVQ 8(R10)(R12*1), R13
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
 | |
| 	XORQ 8(SI)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm10B
 | |
| 	LEAL -16(R9), R9
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeBlockAsm10B:
 | |
| 	CMPL R9, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeBlockAsm10B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
 | |
| 	LEAL -8(R9), R9
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeBlockAsm10B:
 | |
| 	CMPL R9, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeBlockAsm10B
 | |
| 	MOVL (R10)(R12*1), R11
 | |
| 	CMPL (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm10B
 | |
| 	LEAL -4(R9), R9
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeBlockAsm10B:
 | |
| 	CMPL R9, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeBlockAsm10B
 | |
| 	JB   repeat_extend_forward_end_encodeBlockAsm10B
 | |
| 	MOVW (R10)(R12*1), R11
 | |
| 	CMPW (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm10B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R9
 | |
| 	JZ   repeat_extend_forward_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeBlockAsm10B:
 | |
| 	MOVB (R10)(R12*1), R11
 | |
| 	CMPB (SI)(R12*1), R11
 | |
| 	JNE  repeat_extend_forward_end_encodeBlockAsm10B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| repeat_extend_forward_end_encodeBlockAsm10B:
 | |
| 	ADDL  R12, DX
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  DI, SI
 | |
| 	MOVL  16(SP), DI
 | |
| 	TESTL R8, R8
 | |
| 	JZ    repeat_as_copy_encodeBlockAsm10B
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_match_repeat_encodeBlockAsm10B
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_repeat_encodeBlockAsm10B
 | |
| 
 | |
| cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_match_repeat_encodeBlockAsm10B
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_three_match_repeat_encodeBlockAsm10B:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_match_repeat_encodeBlockAsm10B:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_repeat_encodeBlockAsm10B:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_as_copy_encodeBlockAsm10B:
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm10B
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, SI
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(SI), SI
 | |
| 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| long_offset_short_repeat_as_copy_encodeBlockAsm10B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, R8
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeBlockAsm10B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeBlockAsm10B
 | |
| 
 | |
| no_repeat_found_encodeBlockAsm10B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeBlockAsm10B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeBlockAsm10B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeBlockAsm10B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBlockAsm10B
 | |
| 
 | |
| candidate3_match_encodeBlockAsm10B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeBlockAsm10B
 | |
| 
 | |
| candidate2_match_encodeBlockAsm10B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeBlockAsm10B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBlockAsm10B
 | |
| 
 | |
| match_extend_back_loop_encodeBlockAsm10B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBlockAsm10B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBlockAsm10B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBlockAsm10B
 | |
| 	JMP  match_extend_back_loop_encodeBlockAsm10B
 | |
| 
 | |
| match_extend_back_end_encodeBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBlockAsm10B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBlockAsm10B
 | |
| 	JB   three_bytes_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| three_bytes_match_emit_encodeBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| two_bytes_match_emit_encodeBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeBlockAsm10B
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| one_byte_match_emit_encodeBlockAsm10B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBlockAsm10B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBlockAsm10B
 | |
| 
 | |
| memmove_long_match_emit_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBlockAsm10B:
 | |
| match_nolit_loop_encodeBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBlockAsm10B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm10B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBlockAsm10B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBlockAsm10B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBlockAsm10B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBlockAsm10B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBlockAsm10B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBlockAsm10B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBlockAsm10B
 | |
| 	JB   match_nolit_end_encodeBlockAsm10B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBlockAsm10B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeBlockAsm10B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBlockAsm10B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeBlockAsm10B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeBlockAsm10B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBlockAsm10B
 | |
| 	MOVL $0x00000001, DI
 | |
| 	LEAL 16(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBlockAsm10B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R10, DI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBlockAsm10B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBlockAsm10B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBlockAsm10B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBlockAsm10B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBlockAsm10B:
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x20, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x36, R8
 | |
| 	SHLQ  $0x20, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x36, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeBlockAsm10B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeBlockAsm10B
 | |
| 
 | |
| emit_remainder_encodeBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBlockAsm10B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBlockAsm10B
 | |
| 	JB   three_bytes_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBlockAsm10B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBlockAsm10B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBlockAsm10B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBlockAsm10B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBlockAsm8B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000008, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBlockAsm8B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBlockAsm8B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBlockAsm8B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x04, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBlockAsm8B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x38, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x38, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeBlockAsm8B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), R8
 | |
| 	MOVL  DI, SI
 | |
| 	SUBL  16(SP), SI
 | |
| 	JZ    repeat_extend_back_end_encodeBlockAsm8B
 | |
| 
 | |
| repeat_extend_back_loop_encodeBlockAsm8B:
 | |
| 	CMPL DI, R8
 | |
| 	JBE  repeat_extend_back_end_encodeBlockAsm8B
 | |
| 	MOVB -1(BX)(SI*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeBlockAsm8B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL SI
 | |
| 	JNZ  repeat_extend_back_loop_encodeBlockAsm8B
 | |
| 
 | |
| repeat_extend_back_end_encodeBlockAsm8B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeBlockAsm8B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeBlockAsm8B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeBlockAsm8B
 | |
| 	JB   three_bytes_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeBlockAsm8B
 | |
| 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| one_byte_repeat_emit_encodeBlockAsm8B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeBlockAsm8B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R13*1), R11
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R13*1), X4
 | |
| 	MOVOU -16(R10)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R9, R13
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeBlockAsm8B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R9
 | |
| 	SUBL DX, R9
 | |
| 	LEAQ (BX)(DX*1), R10
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
 | |
| 	CMPL R9, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeBlockAsm8B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	MOVQ 8(R10)(R12*1), R13
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
 | |
| 	XORQ 8(SI)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm8B
 | |
| 	LEAL -16(R9), R9
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeBlockAsm8B:
 | |
| 	CMPL R9, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeBlockAsm8B
 | |
| 	MOVQ (R10)(R12*1), R11
 | |
| 	XORQ (SI)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
 | |
| 	LEAL -8(R9), R9
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  repeat_extend_forward_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeBlockAsm8B:
 | |
| 	CMPL R9, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeBlockAsm8B
 | |
| 	MOVL (R10)(R12*1), R11
 | |
| 	CMPL (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm8B
 | |
| 	LEAL -4(R9), R9
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeBlockAsm8B:
 | |
| 	CMPL R9, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeBlockAsm8B
 | |
| 	JB   repeat_extend_forward_end_encodeBlockAsm8B
 | |
| 	MOVW (R10)(R12*1), R11
 | |
| 	CMPW (SI)(R12*1), R11
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm8B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R9
 | |
| 	JZ   repeat_extend_forward_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeBlockAsm8B:
 | |
| 	MOVB (R10)(R12*1), R11
 | |
| 	CMPB (SI)(R12*1), R11
 | |
| 	JNE  repeat_extend_forward_end_encodeBlockAsm8B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| repeat_extend_forward_end_encodeBlockAsm8B:
 | |
| 	ADDL  R12, DX
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  DI, SI
 | |
| 	MOVL  16(SP), DI
 | |
| 	TESTL R8, R8
 | |
| 	JZ    repeat_as_copy_encodeBlockAsm8B
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, DI
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_match_repeat_encodeBlockAsm8B
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
 | |
| 
 | |
| cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_match_repeat_encodeBlockAsm8B
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_three_match_repeat_encodeBlockAsm8B:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_two_match_repeat_encodeBlockAsm8B:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_as_copy_encodeBlockAsm8B:
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm8B
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, SI
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(SI), SI
 | |
| 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	MOVL SI, DI
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| long_offset_short_repeat_as_copy_encodeBlockAsm8B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL SI, DI
 | |
| 	LEAL -4(SI), SI
 | |
| 	CMPL DI, $0x08
 | |
| 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 | |
| 	CMPL DI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 | |
| 	CMPL SI, $0x00000104
 | |
| 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 | |
| 	LEAL -256(SI), SI
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW SI, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 | |
| 	LEAL -4(SI), SI
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB SI, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 | |
| 	SHLL $0x02, SI
 | |
| 	ORL  $0x01, SI
 | |
| 	MOVW SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(SI*4), SI
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SARL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeBlockAsm8B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeBlockAsm8B
 | |
| 
 | |
| no_repeat_found_encodeBlockAsm8B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeBlockAsm8B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeBlockAsm8B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeBlockAsm8B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBlockAsm8B
 | |
| 
 | |
| candidate3_match_encodeBlockAsm8B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeBlockAsm8B
 | |
| 
 | |
| candidate2_match_encodeBlockAsm8B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeBlockAsm8B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBlockAsm8B
 | |
| 
 | |
| match_extend_back_loop_encodeBlockAsm8B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBlockAsm8B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBlockAsm8B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBlockAsm8B
 | |
| 	JMP  match_extend_back_loop_encodeBlockAsm8B
 | |
| 
 | |
| match_extend_back_end_encodeBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBlockAsm8B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBlockAsm8B
 | |
| 	JB   three_bytes_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| three_bytes_match_emit_encodeBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| two_bytes_match_emit_encodeBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeBlockAsm8B
 | |
| 	JMP  memmove_long_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| one_byte_match_emit_encodeBlockAsm8B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBlockAsm8B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBlockAsm8B
 | |
| 
 | |
| memmove_long_match_emit_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBlockAsm8B:
 | |
| match_nolit_loop_encodeBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBlockAsm8B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm8B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBlockAsm8B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBlockAsm8B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBlockAsm8B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBlockAsm8B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBlockAsm8B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBlockAsm8B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBlockAsm8B
 | |
| 	JB   match_nolit_end_encodeBlockAsm8B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBlockAsm8B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeBlockAsm8B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBlockAsm8B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeBlockAsm8B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeBlockAsm8B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBlockAsm8B
 | |
| 	MOVL $0x00000001, DI
 | |
| 	LEAL 16(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	MOVL R10, SI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBlockAsm8B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R10, SI
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R10, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R10, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 	XORQ DI, DI
 | |
| 	LEAL 1(DI)(R10*4), R10
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SARL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, R10
 | |
| 	MOVB R10, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBlockAsm8B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBlockAsm8B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBlockAsm8B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBlockAsm8B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBlockAsm8B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBlockAsm8B:
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x20, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x38, R8
 | |
| 	SHLQ  $0x20, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x38, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeBlockAsm8B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeBlockAsm8B
 | |
| 
 | |
| emit_remainder_encodeBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBlockAsm8B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBlockAsm8B
 | |
| 	JB   three_bytes_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBlockAsm8B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBlockAsm8B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBlockAsm8B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBlockAsm8B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBetterBlockAsm(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00001200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBetterBlockAsm:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBetterBlockAsm
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -6(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBetterBlockAsm:
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	SHRL $0x07, SI
 | |
| 	CMPL SI, $0x63
 | |
| 	JBE  check_maxskip_ok_encodeBetterBlockAsm
 | |
| 	LEAL 100(DX), SI
 | |
| 	JMP  check_maxskip_cont_encodeBetterBlockAsm
 | |
| 
 | |
| check_maxskip_ok_encodeBetterBlockAsm:
 | |
| 	LEAL 1(DX)(SI*1), SI
 | |
| 
 | |
| check_maxskip_cont_encodeBetterBlockAsm:
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBetterBlockAsm
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  524288(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 524288(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeBetterBlockAsm
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeBetterBlockAsm
 | |
| 
 | |
| no_short_found_encodeBetterBlockAsm:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeBetterBlockAsm
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeBetterBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm
 | |
| 
 | |
| candidateS_match_encodeBetterBlockAsm:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeBetterBlockAsm:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBetterBlockAsm
 | |
| 
 | |
| match_extend_back_loop_encodeBetterBlockAsm:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBetterBlockAsm
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBetterBlockAsm
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBetterBlockAsm
 | |
| 	JMP  match_extend_back_loop_encodeBetterBlockAsm
 | |
| 
 | |
| match_extend_back_end_encodeBetterBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 5(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBetterBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBetterBlockAsm:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBetterBlockAsm:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBetterBlockAsm:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm
 | |
| 	JB   match_nolit_end_encodeBetterBlockAsm
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeBetterBlockAsm
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBetterBlockAsm:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeBetterBlockAsm
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeBetterBlockAsm:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL 16(SP), R8
 | |
| 	JEQ  match_is_repeat_encodeBetterBlockAsm
 | |
| 	CMPL R12, $0x01
 | |
| 	JA   match_length_ok_encodeBetterBlockAsm
 | |
| 	CMPL R8, $0x0000ffff
 | |
| 	JBE  match_length_ok_encodeBetterBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	INCL DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm
 | |
| 
 | |
| match_length_ok_encodeBetterBlockAsm:
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_match_emit_encodeBetterBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| four_bytes_match_emit_encodeBetterBlockAsm:
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| three_bytes_match_emit_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| two_bytes_match_emit_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeBetterBlockAsm
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| one_byte_match_emit_encodeBetterBlockAsm:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBetterBlockAsm:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
 | |
| 
 | |
| memmove_long_match_emit_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBetterBlockAsm:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL R8, 1(CX)
 | |
| 	LEAL -64(R12), R12
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R12, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 	CMPL R12, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 	LEAL -16842747(R12), R12
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
 | |
| 
 | |
| repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeBetterBlockAsm:
 | |
| 	TESTL R12, R12
 | |
| 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 	XORL  SI, SI
 | |
| 	LEAL  -1(SI)(R12*4), R12
 | |
| 	MOVB  R12, (CX)
 | |
| 	MOVL  R8, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeBetterBlockAsm:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	MOVL R8, R9
 | |
| 	SHRL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R12
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R12), R12
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 	CMPL R12, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 	LEAL -16842747(R12), R12
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBetterBlockAsm:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 	CMPL R12, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 	LEAL -16842747(R12), R12
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
 | |
| 
 | |
| repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBetterBlockAsm:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| match_is_repeat_encodeBetterBlockAsm:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| four_bytes_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| three_bytes_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| two_bytes_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| one_byte_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| memmove_long_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 	CMPL R12, $0x0100ffff
 | |
| 	JB   repeat_five_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 	LEAL -16842747(R12), R12
 | |
| 	MOVL $0xfffb001d, (CX)
 | |
| 	MOVB $0xff, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 | |
| 
 | |
| repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBetterBlockAsm:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBetterBlockAsm
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBetterBlockAsm:
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	SHLQ  $0x08, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x2f, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x32, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 524288(AX)(R11*4)
 | |
| 	MOVL  R14, 524288(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeBetterBlockAsm:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeBetterBlockAsm
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x08, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x2f, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeBetterBlockAsm
 | |
| 
 | |
| emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 5(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBetterBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBetterBlockAsm
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm
 | |
| 	CMPL DX, $0x01000000
 | |
| 	JB   four_bytes_emit_remainder_encodeBetterBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL DX, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| four_bytes_emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBetterBlockAsm
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| one_byte_emit_remainder_encodeBetterBlockAsm:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBetterBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBetterBlockAsm:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBetterBlockAsm4MB(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00001200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBetterBlockAsm4MB:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBetterBlockAsm4MB
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -6(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBetterBlockAsm4MB:
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	SHRL $0x07, SI
 | |
| 	CMPL SI, $0x63
 | |
| 	JBE  check_maxskip_ok_encodeBetterBlockAsm4MB
 | |
| 	LEAL 100(DX), SI
 | |
| 	JMP  check_maxskip_cont_encodeBetterBlockAsm4MB
 | |
| 
 | |
| check_maxskip_ok_encodeBetterBlockAsm4MB:
 | |
| 	LEAL 1(DX)(SI*1), SI
 | |
| 
 | |
| check_maxskip_cont_encodeBetterBlockAsm4MB:
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  524288(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 524288(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm4MB
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeBetterBlockAsm4MB
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeBetterBlockAsm4MB
 | |
| 
 | |
| no_short_found_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeBetterBlockAsm4MB
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeBetterBlockAsm4MB
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm4MB
 | |
| 
 | |
| candidateS_match_encodeBetterBlockAsm4MB:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm4MB
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeBetterBlockAsm4MB:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| match_extend_back_loop_encodeBetterBlockAsm4MB:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBetterBlockAsm4MB
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBetterBlockAsm4MB
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBetterBlockAsm4MB
 | |
| 	JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
 | |
| 
 | |
| match_extend_back_end_encodeBetterBlockAsm4MB:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 4(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBetterBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBetterBlockAsm4MB:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	JB   match_nolit_end_encodeBetterBlockAsm4MB
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeBetterBlockAsm4MB
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeBetterBlockAsm4MB:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL 16(SP), R8
 | |
| 	JEQ  match_is_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL R12, $0x01
 | |
| 	JA   match_length_ok_encodeBetterBlockAsm4MB
 | |
| 	CMPL R8, $0x0000ffff
 | |
| 	JBE  match_length_ok_encodeBetterBlockAsm4MB
 | |
| 	MOVL 20(SP), DX
 | |
| 	INCL DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm4MB
 | |
| 
 | |
| match_length_ok_encodeBetterBlockAsm4MB:
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBetterBlockAsm4MB
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBetterBlockAsm4MB
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeBetterBlockAsm4MB
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| three_bytes_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| two_bytes_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeBetterBlockAsm4MB
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| one_byte_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| memmove_long_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL R8, 1(CX)
 | |
| 	LEAL -64(R12), R12
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R12, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	TESTL R12, R12
 | |
| 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 	XORL  SI, SI
 | |
| 	LEAL  -1(SI)(R12*4), R12
 | |
| 	MOVB  R12, (CX)
 | |
| 	MOVL  R8, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R12
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R12), R12
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| match_is_repeat_encodeBetterBlockAsm4MB:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
 | |
| 	CMPL R12, $0x00010100
 | |
| 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
 | |
| 	LEAL -65536(R12), R12
 | |
| 	MOVL R12, R8
 | |
| 	MOVW $0x001d, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	SARL $0x10, R8
 | |
| 	MOVB R8, 4(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 | |
| 
 | |
| repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBetterBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	SHLQ  $0x08, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x2f, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x32, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 524288(AX)(R11*4)
 | |
| 	MOVL  R14, 524288(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeBetterBlockAsm4MB:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeBetterBlockAsm4MB
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x08, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x2f, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 4(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBetterBlockAsm4MB
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| one_byte_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBetterBlockAsm12B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000280, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBetterBlockAsm12B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBetterBlockAsm12B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -6(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBetterBlockAsm12B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBetterBlockAsm12B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  65536(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 65536(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm12B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeBetterBlockAsm12B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeBetterBlockAsm12B
 | |
| 
 | |
| no_short_found_encodeBetterBlockAsm12B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeBetterBlockAsm12B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeBetterBlockAsm12B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm12B
 | |
| 
 | |
| candidateS_match_encodeBetterBlockAsm12B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm12B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeBetterBlockAsm12B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| match_extend_back_loop_encodeBetterBlockAsm12B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBetterBlockAsm12B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBetterBlockAsm12B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBetterBlockAsm12B
 | |
| 	JMP  match_extend_back_loop_encodeBetterBlockAsm12B
 | |
| 
 | |
| match_extend_back_end_encodeBetterBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBetterBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm12B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm12B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm12B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm12B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm12B
 | |
| 	JB   match_nolit_end_encodeBetterBlockAsm12B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm12B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeBetterBlockAsm12B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeBetterBlockAsm12B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL 16(SP), R8
 | |
| 	JEQ  match_is_repeat_encodeBetterBlockAsm12B
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBetterBlockAsm12B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBetterBlockAsm12B
 | |
| 	JB   three_bytes_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| three_bytes_match_emit_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| two_bytes_match_emit_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeBetterBlockAsm12B
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| one_byte_match_emit_encodeBetterBlockAsm12B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm12B
 | |
| 
 | |
| memmove_long_match_emit_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBetterBlockAsm12B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm12B
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R12
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R12), R12
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| match_is_repeat_encodeBetterBlockAsm12B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 | |
| 
 | |
| repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBetterBlockAsm12B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBetterBlockAsm12B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x32, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x34, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 65536(AX)(R11*4)
 | |
| 	MOVL  R14, 65536(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeBetterBlockAsm12B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeBetterBlockAsm12B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBetterBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBetterBlockAsm12B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm12B
 | |
| 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBetterBlockAsm12B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBetterBlockAsm10B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x000000a0, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBetterBlockAsm10B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBetterBlockAsm10B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -6(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBetterBlockAsm10B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBetterBlockAsm10B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  16384(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 16384(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm10B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeBetterBlockAsm10B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeBetterBlockAsm10B
 | |
| 
 | |
| no_short_found_encodeBetterBlockAsm10B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeBetterBlockAsm10B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeBetterBlockAsm10B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm10B
 | |
| 
 | |
| candidateS_match_encodeBetterBlockAsm10B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm10B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeBetterBlockAsm10B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| match_extend_back_loop_encodeBetterBlockAsm10B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBetterBlockAsm10B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBetterBlockAsm10B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBetterBlockAsm10B
 | |
| 	JMP  match_extend_back_loop_encodeBetterBlockAsm10B
 | |
| 
 | |
| match_extend_back_end_encodeBetterBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBetterBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm10B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm10B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm10B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm10B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm10B
 | |
| 	JB   match_nolit_end_encodeBetterBlockAsm10B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm10B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeBetterBlockAsm10B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeBetterBlockAsm10B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL 16(SP), R8
 | |
| 	JEQ  match_is_repeat_encodeBetterBlockAsm10B
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBetterBlockAsm10B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBetterBlockAsm10B
 | |
| 	JB   three_bytes_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| three_bytes_match_emit_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| two_bytes_match_emit_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeBetterBlockAsm10B
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| one_byte_match_emit_encodeBetterBlockAsm10B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm10B
 | |
| 
 | |
| memmove_long_match_emit_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBetterBlockAsm10B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm10B
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R12
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R12), R12
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| match_is_repeat_encodeBetterBlockAsm10B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 | |
| 
 | |
| repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBetterBlockAsm10B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBetterBlockAsm10B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x34, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x36, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 16384(AX)(R11*4)
 | |
| 	MOVL  R14, 16384(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeBetterBlockAsm10B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeBetterBlockAsm10B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBetterBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBetterBlockAsm10B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm10B
 | |
| 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBetterBlockAsm10B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeBetterBlockAsm8B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000028, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeBetterBlockAsm8B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeBetterBlockAsm8B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -6(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeBetterBlockAsm8B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x04, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeBetterBlockAsm8B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  4096(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 4096(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm8B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeBetterBlockAsm8B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeBetterBlockAsm8B
 | |
| 
 | |
| no_short_found_encodeBetterBlockAsm8B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeBetterBlockAsm8B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeBetterBlockAsm8B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeBetterBlockAsm8B
 | |
| 
 | |
| candidateS_match_encodeBetterBlockAsm8B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeBetterBlockAsm8B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeBetterBlockAsm8B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| match_extend_back_loop_encodeBetterBlockAsm8B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeBetterBlockAsm8B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeBetterBlockAsm8B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeBetterBlockAsm8B
 | |
| 	JMP  match_extend_back_loop_encodeBetterBlockAsm8B
 | |
| 
 | |
| match_extend_back_end_encodeBetterBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeBetterBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm8B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm8B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm8B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm8B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm8B
 | |
| 	JB   match_nolit_end_encodeBetterBlockAsm8B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm8B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeBetterBlockAsm8B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeBetterBlockAsm8B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL 16(SP), R8
 | |
| 	JEQ  match_is_repeat_encodeBetterBlockAsm8B
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm8B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeBetterBlockAsm8B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeBetterBlockAsm8B
 | |
| 	JB   three_bytes_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| three_bytes_match_emit_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| two_bytes_match_emit_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeBetterBlockAsm8B
 | |
| 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| one_byte_match_emit_encodeBetterBlockAsm8B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
 | |
| 	CMPQ R9, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (R10), R11
 | |
| 	MOVL -4(R10)(R9*1), R10
 | |
| 	MOVL R11, (CX)
 | |
| 	MOVL R10, -4(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm8B
 | |
| 
 | |
| memmove_long_match_emit_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeBetterBlockAsm8B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm8B
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	SUBL $0x08, R12
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R12), R12
 | |
| 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| long_offset_short_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm8B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| match_is_repeat_encodeBetterBlockAsm8B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x04
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
 | |
| 	CMPQ R8, $0x08
 | |
| 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
 | |
| 	MOVL (R9), R10
 | |
| 	MOVL R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (R9), R10
 | |
| 	MOVL -4(R9)(R8*1), R9
 | |
| 	MOVL R10, (CX)
 | |
| 	MOVL R9, -4(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R10, R13
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R13*1), R10
 | |
| 	LEAQ  -32(CX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R13*1), X4
 | |
| 	MOVOU -16(R9)(R13*1), X5
 | |
| 	MOVOA X4, -32(CX)(R13*1)
 | |
| 	MOVOA X5, -16(CX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R8, R13
 | |
| 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	MOVL R12, SI
 | |
| 	LEAL -4(R12), R12
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
 | |
| 
 | |
| cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
 | |
| 	CMPL R12, $0x00000104
 | |
| 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
 | |
| 	LEAL -256(R12), R12
 | |
| 	MOVW $0x0019, (CX)
 | |
| 	MOVW R12, 2(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
 | |
| 	LEAL -4(R12), R12
 | |
| 	MOVW $0x0015, (CX)
 | |
| 	MOVB R12, 2(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 
 | |
| repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
 | |
| 	SHLL $0x02, R12
 | |
| 	ORL  $0x01, R12
 | |
| 	MOVW R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(R12*4), R12
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SARL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, R12
 | |
| 	MOVB R12, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeBetterBlockAsm8B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeBetterBlockAsm8B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x36, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x38, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 4096(AX)(R11*4)
 | |
| 	MOVL  R14, 4096(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeBetterBlockAsm8B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeBetterBlockAsm8B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeBetterBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeBetterBlockAsm8B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeBetterBlockAsm8B
 | |
| 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeBetterBlockAsm8B
 | |
| 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| one_byte_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBlockAsm(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBlockAsm:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBlockAsm
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBlockAsm:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBlockAsm
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeSnappyBlockAsm
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm
 | |
| 
 | |
| repeat_extend_back_loop_encodeSnappyBlockAsm:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm
 | |
| 
 | |
| repeat_extend_back_end_encodeSnappyBlockAsm:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 5(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeSnappyBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeSnappyBlockAsm:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_repeat_emit_encodeSnappyBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	MOVL SI, R10
 | |
| 	SHRL $0x10, R10
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R10, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| three_bytes_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| two_bytes_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeSnappyBlockAsm
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| one_byte_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| memmove_long_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R12*1), X4
 | |
| 	MOVOU -16(R9)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R8, R12
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm
 | |
| 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_encodeSnappyBlockAsm:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL DI, $0x00010000
 | |
| 	JB   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL DI, 1(CX)
 | |
| 	LEAL -64(SI), SI
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL SI, $0x04
 | |
| 	JB   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
 | |
| 	TESTL SI, SI
 | |
| 	JZ    repeat_end_emit_encodeSnappyBlockAsm
 | |
| 	XORL  R8, R8
 | |
| 	LEAL  -1(R8)(SI*4), SI
 | |
| 	MOVB  SI, (CX)
 | |
| 	MOVL  DI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   repeat_end_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeSnappyBlockAsm:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm
 | |
| 
 | |
| no_repeat_found_encodeSnappyBlockAsm:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeSnappyBlockAsm
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeSnappyBlockAsm
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeSnappyBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm
 | |
| 
 | |
| candidate3_match_encodeSnappyBlockAsm:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeSnappyBlockAsm
 | |
| 
 | |
| candidate2_match_encodeSnappyBlockAsm:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBlockAsm:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBlockAsm
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBlockAsm:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBlockAsm
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBlockAsm
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBlockAsm
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBlockAsm
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 5(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBlockAsm
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBlockAsm
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBlockAsm
 | |
| 	CMPL R8, $0x01000000
 | |
| 	JB   four_bytes_match_emit_encodeSnappyBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL R8, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_match_emit_encodeSnappyBlockAsm:
 | |
| 	MOVL R8, R10
 | |
| 	SHRL $0x10, R10
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	MOVB R10, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBlockAsm
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBlockAsm:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBlockAsm:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBlockAsm:
 | |
| match_nolit_loop_encodeSnappyBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm
 | |
| 	JB   match_nolit_end_encodeSnappyBlockAsm
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeSnappyBlockAsm
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBlockAsm:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeSnappyBlockAsm
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeSnappyBlockAsm:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	LEAL -64(R10), R10
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R10, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
 | |
| 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
 | |
| 	TESTL R10, R10
 | |
| 	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
 | |
| 	XORL  DI, DI
 | |
| 	LEAL  -1(DI)(R10*4), R10
 | |
| 	MOVB  R10, (CX)
 | |
| 	MOVL  SI, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeSnappyBlockAsm:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBlockAsm:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBlockAsm:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBlockAsm
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBlockAsm:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x10, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x32, R8
 | |
| 	SHLQ  $0x10, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x32, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeSnappyBlockAsm
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 5(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm
 | |
| 	CMPL DX, $0x01000000
 | |
| 	JB   four_bytes_emit_remainder_encodeSnappyBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL DX, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| four_bytes_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBlockAsm
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBlockAsm64K(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBlockAsm64K:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBlockAsm64K
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBlockAsm64K:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeSnappyBlockAsm64K
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| repeat_extend_back_loop_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm64K
 | |
| 
 | |
| repeat_extend_back_end_encodeSnappyBlockAsm64K:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeSnappyBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeSnappyBlockAsm64K:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| one_byte_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R12*1), X4
 | |
| 	MOVOU -16(R9)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R8, R12
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm64K
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_encodeSnappyBlockAsm64K:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm64K
 | |
| 
 | |
| no_repeat_found_encodeSnappyBlockAsm64K:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeSnappyBlockAsm64K
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeSnappyBlockAsm64K
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeSnappyBlockAsm64K
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm64K
 | |
| 
 | |
| candidate3_match_encodeSnappyBlockAsm64K:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeSnappyBlockAsm64K
 | |
| 
 | |
| candidate2_match_encodeSnappyBlockAsm64K:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBlockAsm64K:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBlockAsm64K
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBlockAsm64K
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBlockAsm64K:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBlockAsm64K:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBlockAsm64K
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBlockAsm64K
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBlockAsm64K
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
 | |
| match_nolit_loop_encodeSnappyBlockAsm64K:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	JB   match_nolit_end_encodeSnappyBlockAsm64K
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeSnappyBlockAsm64K
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeSnappyBlockAsm64K:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x10, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x32, R8
 | |
| 	SHLQ  $0x10, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x32, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeSnappyBlockAsm64K
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBlockAsm12B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000080, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBlockAsm12B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBlockAsm12B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBlockAsm12B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x000000cf1bbcdcbb, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x18, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x18, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x18, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeSnappyBlockAsm12B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| repeat_extend_back_loop_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B
 | |
| 
 | |
| repeat_extend_back_end_encodeSnappyBlockAsm12B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeSnappyBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeSnappyBlockAsm12B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| one_byte_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R12*1), X4
 | |
| 	MOVOU -16(R9)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R8, R12
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm12B
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_encodeSnappyBlockAsm12B:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm12B
 | |
| 
 | |
| no_repeat_found_encodeSnappyBlockAsm12B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeSnappyBlockAsm12B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeSnappyBlockAsm12B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeSnappyBlockAsm12B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm12B
 | |
| 
 | |
| candidate3_match_encodeSnappyBlockAsm12B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeSnappyBlockAsm12B
 | |
| 
 | |
| candidate2_match_encodeSnappyBlockAsm12B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBlockAsm12B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBlockAsm12B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBlockAsm12B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBlockAsm12B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
 | |
| match_nolit_loop_encodeSnappyBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	JB   match_nolit_end_encodeSnappyBlockAsm12B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeSnappyBlockAsm12B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeSnappyBlockAsm12B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ  $0x000000cf1bbcdcbb, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x18, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x34, R8
 | |
| 	SHLQ  $0x18, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x34, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBlockAsm10B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000020, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBlockAsm10B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBlockAsm10B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBlockAsm10B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeSnappyBlockAsm10B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| repeat_extend_back_loop_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B
 | |
| 
 | |
| repeat_extend_back_end_encodeSnappyBlockAsm10B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeSnappyBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeSnappyBlockAsm10B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| one_byte_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R12*1), X4
 | |
| 	MOVOU -16(R9)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R8, R12
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm10B
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_encodeSnappyBlockAsm10B:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm10B
 | |
| 
 | |
| no_repeat_found_encodeSnappyBlockAsm10B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeSnappyBlockAsm10B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeSnappyBlockAsm10B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeSnappyBlockAsm10B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm10B
 | |
| 
 | |
| candidate3_match_encodeSnappyBlockAsm10B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeSnappyBlockAsm10B
 | |
| 
 | |
| candidate2_match_encodeSnappyBlockAsm10B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBlockAsm10B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBlockAsm10B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBlockAsm10B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBlockAsm10B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
 | |
| match_nolit_loop_encodeSnappyBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	JB   match_nolit_end_encodeSnappyBlockAsm10B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeSnappyBlockAsm10B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeSnappyBlockAsm10B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x20, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x36, R8
 | |
| 	SHLQ  $0x20, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x36, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBlockAsm8B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000008, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBlockAsm8B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBlockAsm8B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBlockAsm8B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x04, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x38, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x38, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_encodeSnappyBlockAsm8B
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| repeat_extend_back_loop_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B
 | |
| 
 | |
| repeat_extend_back_end_encodeSnappyBlockAsm8B:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_encodeSnappyBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_encodeSnappyBlockAsm8B:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| one_byte_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (R9), R10
 | |
| 	MOVQ -8(R9)(R8*1), R9
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ R9, -8(CX)(R8*1)
 | |
| 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU -16(R9)(R8*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R8*1)
 | |
| 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(R8*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R9), X0
 | |
| 	MOVOU 16(R9), X1
 | |
| 	MOVOU -32(R9)(R8*1), X2
 | |
| 	MOVOU -16(R9)(R8*1), X3
 | |
| 	MOVQ  R8, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R9)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R9)(R12*1), X4
 | |
| 	MOVOU -16(R9)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R8, R12
 | |
| 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R8*1)
 | |
| 	MOVOU X3, -16(CX)(R8*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm8B
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_encodeSnappyBlockAsm8B:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB DI, 1(CX)
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (CX)
 | |
| 	MOVW DI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm8B
 | |
| 
 | |
| no_repeat_found_encodeSnappyBlockAsm8B:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_encodeSnappyBlockAsm8B
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_encodeSnappyBlockAsm8B
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_encodeSnappyBlockAsm8B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBlockAsm8B
 | |
| 
 | |
| candidate3_match_encodeSnappyBlockAsm8B:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_encodeSnappyBlockAsm8B
 | |
| 
 | |
| candidate2_match_encodeSnappyBlockAsm8B:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBlockAsm8B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), R8
 | |
| 	CMPL R8, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBlockAsm8B
 | |
| 	CMPL R8, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBlockAsm8B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB R8, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL R8, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBlockAsm8B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	SHLB $0x02, R8
 | |
| 	MOVB R8, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ R10, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (DI), R10
 | |
| 	MOVQ -8(DI)(R9*1), DI
 | |
| 	MOVQ R10, (CX)
 | |
| 	MOVQ DI, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU -16(DI)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ R8, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), R8
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DI), X0
 | |
| 	MOVOU 16(DI), X1
 | |
| 	MOVOU -32(DI)(R9*1), X2
 | |
| 	MOVOU -16(DI)(R9*1), X3
 | |
| 	MOVQ  R9, R11
 | |
| 	SHRQ  $0x05, R11
 | |
| 	MOVQ  CX, R10
 | |
| 	ANDL  $0x0000001f, R10
 | |
| 	MOVQ  $0x00000040, R12
 | |
| 	SUBQ  R10, R12
 | |
| 	DECQ  R11
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DI)(R12*1), R10
 | |
| 	LEAQ  -32(CX)(R12*1), R13
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R10), X4
 | |
| 	MOVOU 16(R10), X5
 | |
| 	MOVOA X4, (R13)
 | |
| 	MOVOA X5, 16(R13)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	ADDQ  $0x20, R10
 | |
| 	ADDQ  $0x20, R12
 | |
| 	DECQ  R11
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DI)(R12*1), X4
 | |
| 	MOVOU -16(DI)(R12*1), X5
 | |
| 	MOVOA X4, -32(CX)(R12*1)
 | |
| 	MOVOA X5, -16(CX)(R12*1)
 | |
| 	ADDQ  $0x20, R12
 | |
| 	CMPQ  R9, R12
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  R8, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
 | |
| match_nolit_loop_encodeSnappyBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	JB   match_nolit_end_encodeSnappyBlockAsm8B
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_encodeSnappyBlockAsm8B
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_encodeSnappyBlockAsm8B:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB SI, 1(CX)
 | |
| 	SHRL $0x08, SI
 | |
| 	SHLL $0x05, SI
 | |
| 	ORL  SI, DI
 | |
| 	MOVB DI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x20, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x38, R8
 | |
| 	SHLQ  $0x20, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x38, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00001200, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBetterBlockAsm:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBetterBlockAsm
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	SHRL $0x07, SI
 | |
| 	CMPL SI, $0x63
 | |
| 	JBE  check_maxskip_ok_encodeSnappyBetterBlockAsm
 | |
| 	LEAL 100(DX), SI
 | |
| 	JMP  check_maxskip_cont_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| check_maxskip_ok_encodeSnappyBetterBlockAsm:
 | |
| 	LEAL 1(DX)(SI*1), SI
 | |
| 
 | |
| check_maxskip_cont_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  524288(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 524288(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeSnappyBetterBlockAsm
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| no_short_found_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeSnappyBetterBlockAsm
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| candidateS_match_encodeSnappyBetterBlockAsm:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 5(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	JB   match_nolit_end_encodeSnappyBetterBlockAsm
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	CMPL R12, $0x01
 | |
| 	JA   match_length_ok_encodeSnappyBetterBlockAsm
 | |
| 	CMPL R8, $0x0000ffff
 | |
| 	JBE  match_length_ok_encodeSnappyBetterBlockAsm
 | |
| 	MOVL 20(SP), DX
 | |
| 	INCL DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| match_length_ok_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL SI, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| four_bytes_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL SI, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	MOVB R11, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R8, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	MOVB $0xff, (CX)
 | |
| 	MOVL R8, 1(CX)
 | |
| 	LEAL -64(R12), R12
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R12, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	TESTL R12, R12
 | |
| 	JZ    match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 | |
| 	XORL  SI, SI
 | |
| 	LEAL  -1(SI)(R12*4), R12
 | |
| 	MOVB  R12, (CX)
 | |
| 	MOVL  R8, 1(CX)
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	SHLQ  $0x08, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x2f, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x32, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 524288(AX)(R11*4)
 | |
| 	MOVL  R14, 524288(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeSnappyBetterBlockAsm:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x2f, R10
 | |
| 	SHLQ  $0x08, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x2f, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 5(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	CMPL DX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	CMPL DX, $0x01000000
 | |
| 	JB   four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	MOVB $0xfc, (CX)
 | |
| 	MOVL DX, 1(CX)
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVL DX, BX
 | |
| 	SHRL $0x10, BX
 | |
| 	MOVB $0xf8, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	MOVB BL, 3(CX)
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000900, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x07, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x30, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x33, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  262144(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 262144(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| no_short_found_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| candidateS_match_encodeSnappyBetterBlockAsm64K:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x30, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm64K
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	JB   match_nolit_end_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ  $0x00cf1bbcdcbfa563, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x30, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x33, R11
 | |
| 	SHLQ  $0x08, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x30, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x33, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 262144(AX)(R11*4)
 | |
| 	MOVL  R14, 262144(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeSnappyBetterBlockAsm64K:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x08, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x30, R10
 | |
| 	SHLQ  $0x08, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x30, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000280, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x06, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  65536(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 65536(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| no_short_found_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| candidateS_match_encodeSnappyBetterBlockAsm12B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm12B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	JB   match_nolit_end_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x32, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x34, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 65536(AX)(R11*4)
 | |
| 	MOVL  R14, 65536(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeSnappyBetterBlockAsm12B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x32, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x32, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x000000a0, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  16384(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 16384(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| no_short_found_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| candidateS_match_encodeSnappyBetterBlockAsm10B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm10B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	JB   match_nolit_end_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x34, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x36, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 16384(AX)(R11*4)
 | |
| 	MOVL  R14, 16384(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeSnappyBetterBlockAsm10B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x34, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x34, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64
 | |
| 	MOVQ tmp+48(FP), AX
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	MOVQ $0x00000028, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+32(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  $0x00000000, 16(SP)
 | |
| 	MOVQ  src_base+24(FP), BX
 | |
| 
 | |
| search_loop_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x04, SI
 | |
| 	LEAL  1(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  $0x9e3779b1, SI
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  4096(AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	MOVL  DX, 4096(AX)(R11*4)
 | |
| 	MOVQ  (BX)(SI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	CMPQ  R10, DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
 | |
| 	CMPQ  R11, DI
 | |
| 	JNE   no_short_found_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL  R8, SI
 | |
| 	JMP   candidate_match_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| no_short_found_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R10, DI
 | |
| 	JEQ  candidate_match_encodeSnappyBetterBlockAsm8B
 | |
| 	CMPL R11, DI
 | |
| 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| candidateS_match_encodeSnappyBetterBlockAsm8B:
 | |
| 	SHRQ  $0x08, DI
 | |
| 	MOVQ  DI, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	INCL  DX
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
 | |
| 	DECL  DX
 | |
| 	MOVL  R8, SI
 | |
| 
 | |
| candidate_match_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm8B
 | |
| 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| match_extend_back_end_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL DX, DI
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+32(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R12, R12
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	MOVQ 8(R9)(R12*1), R13
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	XORQ 8(R10)(R12*1), R13
 | |
| 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R12), R12
 | |
| 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R13, R13
 | |
| 
 | |
| #else
 | |
| 	BSFQ R13, R13
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R13
 | |
| 	LEAL 8(R12)(R13*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ (R9)(R12*1), R11
 | |
| 	XORQ (R10)(R12*1), R11
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R12), R12
 | |
| 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL (R12)(R11*1), R12
 | |
| 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL (R9)(R12*1), R11
 | |
| 	CMPL (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R12), R12
 | |
| 
 | |
| matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	JB   match_nolit_end_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVW (R9)(R12*1), R11
 | |
| 	CMPW (R10)(R12*1), R11
 | |
| 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL 2(R12), R12
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVB (R9)(R12*1), R11
 | |
| 	CMPB (R10)(R12*1), R11
 | |
| 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL 1(R12), R12
 | |
| 
 | |
| match_nolit_end_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL DX, R8
 | |
| 	SUBL SI, R8
 | |
| 
 | |
| 	// Check if repeat
 | |
| 	MOVL R8, 16(SP)
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R10
 | |
| 	SUBL SI, R9
 | |
| 	LEAL -1(R9), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW SI, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB SI, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ R11, (CX)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (R10), R11
 | |
| 	MOVQ -8(R10)(R9*1), R10
 | |
| 	MOVQ R11, (CX)
 | |
| 	MOVQ R10, -8(CX)(R9*1)
 | |
| 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU -16(R10)(R9*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(R9*1)
 | |
| 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ SI, CX
 | |
| 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(R9*1), SI
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (R10), X0
 | |
| 	MOVOU 16(R10), X1
 | |
| 	MOVOU -32(R10)(R9*1), X2
 | |
| 	MOVOU -16(R10)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  CX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R11, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(R10)(R14*1), R11
 | |
| 	LEAQ  -32(CX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(R10)(R14*1), X4
 | |
| 	MOVOU -16(R10)(R14*1), X5
 | |
| 	MOVOA X4, -32(CX)(R14*1)
 | |
| 	MOVOA X5, -16(CX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(R9*1)
 | |
| 	MOVOU X3, -16(CX)(R9*1)
 | |
| 	MOVQ  SI, CX
 | |
| 
 | |
| emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
 | |
| 	ADDL R12, DX
 | |
| 	ADDL $0x04, R12
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL R12, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVB $0xee, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	LEAL -60(R12), R12
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVL R12, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R12, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB R8, 1(CX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, SI
 | |
| 	MOVB SI, (CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVW R8, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, SI
 | |
| 	MOVQ  $0x9e3779b1, R8
 | |
| 	LEAQ  1(DI), DI
 | |
| 	LEAQ  -2(DX), R9
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  1(BX)(DI*1), R11
 | |
| 	MOVQ  (BX)(R9*1), R12
 | |
| 	MOVQ  1(BX)(R9*1), R13
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R8, R11
 | |
| 	SHRQ  $0x38, R11
 | |
| 	SHLQ  $0x10, R12
 | |
| 	IMULQ SI, R12
 | |
| 	SHRQ  $0x36, R12
 | |
| 	SHLQ  $0x20, R13
 | |
| 	IMULQ R8, R13
 | |
| 	SHRQ  $0x38, R13
 | |
| 	LEAQ  1(DI), R8
 | |
| 	LEAQ  1(R9), R14
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R9, (AX)(R12*4)
 | |
| 	MOVL  R8, 4096(AX)(R11*4)
 | |
| 	MOVL  R14, 4096(AX)(R13*4)
 | |
| 	LEAQ  1(R9)(DI*1), R8
 | |
| 	SHRQ  $0x01, R8
 | |
| 	ADDQ  $0x01, DI
 | |
| 	SUBQ  $0x01, R9
 | |
| 
 | |
| index_loop_encodeSnappyBetterBlockAsm8B:
 | |
| 	CMPQ  R8, R9
 | |
| 	JAE   search_loop_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ  (BX)(DI*1), R10
 | |
| 	MOVQ  (BX)(R8*1), R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ SI, R10
 | |
| 	SHRQ  $0x36, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ SI, R11
 | |
| 	SHRQ  $0x36, R11
 | |
| 	MOVL  DI, (AX)(R10*4)
 | |
| 	MOVL  R8, (AX)(R11*4)
 | |
| 	ADDQ  $0x02, DI
 | |
| 	ADDQ  $0x02, R8
 | |
| 	JMP   index_loop_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVQ $0x00000000, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ src_len+32(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), DX
 | |
| 	CMPL DX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	CMPL DX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVB $0xf4, (CX)
 | |
| 	MOVW DX, 1(CX)
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVB $0xf0, (CX)
 | |
| 	MOVB DL, 1(CX)
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DX, $0x40
 | |
| 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	SHLB $0x02, DL
 | |
| 	MOVB DL, (CX)
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ BX, $0x03
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3
 | |
| 	CMPQ BX, $0x08
 | |
| 	JB   emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7
 | |
| 	CMPQ BX, $0x10
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
 | |
| 	CMPQ BX, $0x20
 | |
| 	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
 | |
| 	MOVB (AX), SI
 | |
| 	MOVB -1(AX)(BX*1), AL
 | |
| 	MOVB SI, (CX)
 | |
| 	MOVB AL, -1(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
 | |
| 	MOVW (AX), SI
 | |
| 	MOVB 2(AX), AL
 | |
| 	MOVW SI, (CX)
 | |
| 	MOVB AL, 2(CX)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
 | |
| 	MOVL (AX), SI
 | |
| 	MOVL -4(AX)(BX*1), AX
 | |
| 	MOVL SI, (CX)
 | |
| 	MOVL AX, -4(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
 | |
| 	MOVQ (AX), SI
 | |
| 	MOVQ -8(AX)(BX*1), AX
 | |
| 	MOVQ SI, (CX)
 | |
| 	MOVQ AX, -8(CX)(BX*1)
 | |
| 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU -16(AX)(BX*1), X1
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, -16(CX)(BX*1)
 | |
| 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 
 | |
| memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ DX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
 | |
| 
 | |
| memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	LEAQ (CX)(SI*1), DX
 | |
| 	MOVL SI, BX
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (AX), X0
 | |
| 	MOVOU 16(AX), X1
 | |
| 	MOVOU -32(AX)(BX*1), X2
 | |
| 	MOVOU -16(AX)(BX*1), X3
 | |
| 	MOVQ  BX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  CX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(AX)(R8*1), SI
 | |
| 	LEAQ  -32(CX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(AX)(R8*1), X4
 | |
| 	MOVOU -16(AX)(R8*1), X5
 | |
| 	MOVOA X4, -32(CX)(R8*1)
 | |
| 	MOVOA X5, -16(CX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  BX, R8
 | |
| 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (CX)
 | |
| 	MOVOU X1, 16(CX)
 | |
| 	MOVOU X2, -32(CX)(BX*1)
 | |
| 	MOVOU X3, -16(CX)(BX*1)
 | |
| 	MOVQ  DX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	SUBQ AX, CX
 | |
| 	MOVQ CX, ret+56(FP)
 | |
| 	RET
 | |
| 
 | |
| // func calcBlockSize(src []byte, tmp *[32768]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·calcBlockSize(SB), $24-40
 | |
| 	MOVQ tmp+24(FP), AX
 | |
| 	XORQ CX, CX
 | |
| 	MOVQ $0x00000100, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_calcBlockSize:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_calcBlockSize
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+8(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+0(FP), BX
 | |
| 
 | |
| search_loop_calcBlockSize:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x05, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_calcBlockSize
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x33, R10
 | |
| 	SHLQ  $0x10, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x33, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x10, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x33, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_calcBlockSize
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_calcBlockSize
 | |
| 
 | |
| repeat_extend_back_loop_calcBlockSize:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_calcBlockSize
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_calcBlockSize
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_calcBlockSize
 | |
| 
 | |
| repeat_extend_back_end_calcBlockSize:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 5(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_calcBlockSize
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_calcBlockSize:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_calcBlockSize
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_calcBlockSize
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_calcBlockSize
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   three_bytes_repeat_emit_calcBlockSize
 | |
| 	CMPL SI, $0x01000000
 | |
| 	JB   four_bytes_repeat_emit_calcBlockSize
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSize
 | |
| 
 | |
| four_bytes_repeat_emit_calcBlockSize:
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSize
 | |
| 
 | |
| three_bytes_repeat_emit_calcBlockSize:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSize
 | |
| 
 | |
| two_bytes_repeat_emit_calcBlockSize:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_calcBlockSize
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSize
 | |
| 
 | |
| one_byte_repeat_emit_calcBlockSize:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_calcBlockSize:
 | |
| 	LEAQ (CX)(R8*1), CX
 | |
| 	JMP  emit_literal_done_repeat_emit_calcBlockSize
 | |
| 
 | |
| memmove_long_repeat_emit_calcBlockSize:
 | |
| 	LEAQ (CX)(R8*1), CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_calcBlockSize:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+8(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_calcBlockSize:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_calcBlockSize
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_calcBlockSize
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_calcBlockSize
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_calcBlockSize:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_calcBlockSize
 | |
| 
 | |
| matchlen_match8_repeat_extend_calcBlockSize:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_calcBlockSize
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_calcBlockSize
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_calcBlockSize:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_calcBlockSize
 | |
| 
 | |
| matchlen_match4_repeat_extend_calcBlockSize:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_calcBlockSize
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_calcBlockSize
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_calcBlockSize:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_calcBlockSize
 | |
| 	JB   repeat_extend_forward_end_calcBlockSize
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_calcBlockSize
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_calcBlockSize
 | |
| 
 | |
| matchlen_match1_repeat_extend_calcBlockSize:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_calcBlockSize
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_calcBlockSize:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL DI, $0x00010000
 | |
| 	JB   two_byte_offset_repeat_as_copy_calcBlockSize
 | |
| 
 | |
| four_bytes_loop_back_repeat_as_copy_calcBlockSize:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  four_bytes_remain_repeat_as_copy_calcBlockSize
 | |
| 	LEAL -64(SI), SI
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL SI, $0x04
 | |
| 	JB   four_bytes_remain_repeat_as_copy_calcBlockSize
 | |
| 	JMP  four_bytes_loop_back_repeat_as_copy_calcBlockSize
 | |
| 
 | |
| four_bytes_remain_repeat_as_copy_calcBlockSize:
 | |
| 	TESTL SI, SI
 | |
| 	JZ    repeat_end_emit_calcBlockSize
 | |
| 	XORL  SI, SI
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   repeat_end_emit_calcBlockSize
 | |
| 
 | |
| two_byte_offset_repeat_as_copy_calcBlockSize:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSize
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_calcBlockSize
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_calcBlockSize:
 | |
| 	MOVL SI, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
 | |
| 	CMPL DI, $0x00000800
 | |
| 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_calcBlockSize
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_calcBlockSize:
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_calcBlockSize:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_calcBlockSize
 | |
| 
 | |
| no_repeat_found_calcBlockSize:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_calcBlockSize
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_calcBlockSize
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_calcBlockSize
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_calcBlockSize
 | |
| 
 | |
| candidate3_match_calcBlockSize:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_calcBlockSize
 | |
| 
 | |
| candidate2_match_calcBlockSize:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_calcBlockSize:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_calcBlockSize
 | |
| 
 | |
| match_extend_back_loop_calcBlockSize:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_calcBlockSize
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_calcBlockSize
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_calcBlockSize
 | |
| 	JMP  match_extend_back_loop_calcBlockSize
 | |
| 
 | |
| match_extend_back_end_calcBlockSize:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 5(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_calcBlockSize
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_calcBlockSize:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_calcBlockSize
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), DI
 | |
| 	CMPL DI, $0x3c
 | |
| 	JB   one_byte_match_emit_calcBlockSize
 | |
| 	CMPL DI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_calcBlockSize
 | |
| 	CMPL DI, $0x00010000
 | |
| 	JB   three_bytes_match_emit_calcBlockSize
 | |
| 	CMPL DI, $0x01000000
 | |
| 	JB   four_bytes_match_emit_calcBlockSize
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_match_emit_calcBlockSize
 | |
| 
 | |
| four_bytes_match_emit_calcBlockSize:
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_match_emit_calcBlockSize
 | |
| 
 | |
| three_bytes_match_emit_calcBlockSize:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_calcBlockSize
 | |
| 
 | |
| two_bytes_match_emit_calcBlockSize:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DI, $0x40
 | |
| 	JB   memmove_match_emit_calcBlockSize
 | |
| 	JMP  memmove_long_match_emit_calcBlockSize
 | |
| 
 | |
| one_byte_match_emit_calcBlockSize:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_calcBlockSize:
 | |
| 	LEAQ (CX)(R9*1), CX
 | |
| 	JMP  emit_literal_done_match_emit_calcBlockSize
 | |
| 
 | |
| memmove_long_match_emit_calcBlockSize:
 | |
| 	LEAQ (CX)(R9*1), CX
 | |
| 
 | |
| emit_literal_done_match_emit_calcBlockSize:
 | |
| match_nolit_loop_calcBlockSize:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+8(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_calcBlockSize:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_calcBlockSize
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_calcBlockSize
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_calcBlockSize
 | |
| 
 | |
| matchlen_bsf_16match_nolit_calcBlockSize:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_calcBlockSize
 | |
| 
 | |
| matchlen_match8_match_nolit_calcBlockSize:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_calcBlockSize
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_calcBlockSize
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_calcBlockSize:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_calcBlockSize
 | |
| 
 | |
| matchlen_match4_match_nolit_calcBlockSize:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_calcBlockSize
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_calcBlockSize
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_calcBlockSize:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_calcBlockSize
 | |
| 	JB   match_nolit_end_calcBlockSize
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_calcBlockSize
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_calcBlockSize
 | |
| 
 | |
| matchlen_match1_match_nolit_calcBlockSize:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_calcBlockSize
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_calcBlockSize:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL SI, $0x00010000
 | |
| 	JB   two_byte_offset_match_nolit_calcBlockSize
 | |
| 
 | |
| four_bytes_loop_back_match_nolit_calcBlockSize:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  four_bytes_remain_match_nolit_calcBlockSize
 | |
| 	LEAL -64(R10), R10
 | |
| 	ADDQ $0x05, CX
 | |
| 	CMPL R10, $0x04
 | |
| 	JB   four_bytes_remain_match_nolit_calcBlockSize
 | |
| 	JMP  four_bytes_loop_back_match_nolit_calcBlockSize
 | |
| 
 | |
| four_bytes_remain_match_nolit_calcBlockSize:
 | |
| 	TESTL R10, R10
 | |
| 	JZ    match_nolit_emitcopy_end_calcBlockSize
 | |
| 	XORL  SI, SI
 | |
| 	ADDQ  $0x05, CX
 | |
| 	JMP   match_nolit_emitcopy_end_calcBlockSize
 | |
| 
 | |
| two_byte_offset_match_nolit_calcBlockSize:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_calcBlockSize
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_calcBlockSize
 | |
| 
 | |
| two_byte_offset_short_match_nolit_calcBlockSize:
 | |
| 	MOVL R10, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_calcBlockSize
 | |
| 	CMPL SI, $0x00000800
 | |
| 	JAE  emit_copy_three_match_nolit_calcBlockSize
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_calcBlockSize
 | |
| 
 | |
| emit_copy_three_match_nolit_calcBlockSize:
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_calcBlockSize:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_calcBlockSize
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_calcBlockSize
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_calcBlockSize:
 | |
| 	MOVQ  $0x0000cf1bbcdcbf9b, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x10, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x33, R8
 | |
| 	SHLQ  $0x10, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x33, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_calcBlockSize
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_calcBlockSize
 | |
| 
 | |
| emit_remainder_calcBlockSize:
 | |
| 	MOVQ src_len+8(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 5(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_calcBlockSize
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_calcBlockSize:
 | |
| 	MOVQ src_len+8(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_calcBlockSize
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), AX
 | |
| 	CMPL AX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_calcBlockSize
 | |
| 	CMPL AX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_calcBlockSize
 | |
| 	CMPL AX, $0x00010000
 | |
| 	JB   three_bytes_emit_remainder_calcBlockSize
 | |
| 	CMPL AX, $0x01000000
 | |
| 	JB   four_bytes_emit_remainder_calcBlockSize
 | |
| 	ADDQ $0x05, CX
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSize
 | |
| 
 | |
| four_bytes_emit_remainder_calcBlockSize:
 | |
| 	ADDQ $0x04, CX
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSize
 | |
| 
 | |
| three_bytes_emit_remainder_calcBlockSize:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSize
 | |
| 
 | |
| two_bytes_emit_remainder_calcBlockSize:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL AX, $0x40
 | |
| 	JB   memmove_emit_remainder_calcBlockSize
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSize
 | |
| 
 | |
| one_byte_emit_remainder_calcBlockSize:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_calcBlockSize:
 | |
| 	LEAQ (CX)(SI*1), AX
 | |
| 	MOVQ AX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_calcBlockSize
 | |
| 
 | |
| memmove_long_emit_remainder_calcBlockSize:
 | |
| 	LEAQ (CX)(SI*1), AX
 | |
| 	MOVQ AX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_calcBlockSize:
 | |
| 	MOVQ CX, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| // func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
 | |
| // Requires: BMI, SSE2
 | |
| TEXT ·calcBlockSizeSmall(SB), $24-40
 | |
| 	MOVQ tmp+24(FP), AX
 | |
| 	XORQ CX, CX
 | |
| 	MOVQ $0x00000010, DX
 | |
| 	MOVQ AX, BX
 | |
| 	PXOR X0, X0
 | |
| 
 | |
| zero_loop_calcBlockSizeSmall:
 | |
| 	MOVOU X0, (BX)
 | |
| 	MOVOU X0, 16(BX)
 | |
| 	MOVOU X0, 32(BX)
 | |
| 	MOVOU X0, 48(BX)
 | |
| 	MOVOU X0, 64(BX)
 | |
| 	MOVOU X0, 80(BX)
 | |
| 	MOVOU X0, 96(BX)
 | |
| 	MOVOU X0, 112(BX)
 | |
| 	ADDQ  $0x80, BX
 | |
| 	DECQ  DX
 | |
| 	JNZ   zero_loop_calcBlockSizeSmall
 | |
| 	MOVL  $0x00000000, 12(SP)
 | |
| 	MOVQ  src_len+8(FP), DX
 | |
| 	LEAQ  -9(DX), BX
 | |
| 	LEAQ  -8(DX), SI
 | |
| 	MOVL  SI, 8(SP)
 | |
| 	SHRQ  $0x05, DX
 | |
| 	SUBL  DX, BX
 | |
| 	LEAQ  (CX)(BX*1), BX
 | |
| 	MOVQ  BX, (SP)
 | |
| 	MOVL  $0x00000001, DX
 | |
| 	MOVL  DX, 16(SP)
 | |
| 	MOVQ  src_base+0(FP), BX
 | |
| 
 | |
| search_loop_calcBlockSizeSmall:
 | |
| 	MOVL  DX, SI
 | |
| 	SUBL  12(SP), SI
 | |
| 	SHRL  $0x04, SI
 | |
| 	LEAL  4(DX)(SI*1), SI
 | |
| 	CMPL  SI, 8(SP)
 | |
| 	JAE   emit_remainder_calcBlockSizeSmall
 | |
| 	MOVQ  (BX)(DX*1), DI
 | |
| 	MOVL  SI, 20(SP)
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R10
 | |
| 	MOVQ  DI, R11
 | |
| 	SHRQ  $0x08, R11
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x37, R10
 | |
| 	SHLQ  $0x20, R11
 | |
| 	IMULQ R9, R11
 | |
| 	SHRQ  $0x37, R11
 | |
| 	MOVL  (AX)(R10*4), SI
 | |
| 	MOVL  (AX)(R11*4), R8
 | |
| 	MOVL  DX, (AX)(R10*4)
 | |
| 	LEAL  1(DX), R10
 | |
| 	MOVL  R10, (AX)(R11*4)
 | |
| 	MOVQ  DI, R10
 | |
| 	SHRQ  $0x10, R10
 | |
| 	SHLQ  $0x20, R10
 | |
| 	IMULQ R9, R10
 | |
| 	SHRQ  $0x37, R10
 | |
| 	MOVL  DX, R9
 | |
| 	SUBL  16(SP), R9
 | |
| 	MOVL  1(BX)(R9*1), R11
 | |
| 	MOVQ  DI, R9
 | |
| 	SHRQ  $0x08, R9
 | |
| 	CMPL  R9, R11
 | |
| 	JNE   no_repeat_found_calcBlockSizeSmall
 | |
| 	LEAL  1(DX), DI
 | |
| 	MOVL  12(SP), SI
 | |
| 	MOVL  DI, R8
 | |
| 	SUBL  16(SP), R8
 | |
| 	JZ    repeat_extend_back_end_calcBlockSizeSmall
 | |
| 
 | |
| repeat_extend_back_loop_calcBlockSizeSmall:
 | |
| 	CMPL DI, SI
 | |
| 	JBE  repeat_extend_back_end_calcBlockSizeSmall
 | |
| 	MOVB -1(BX)(R8*1), R9
 | |
| 	MOVB -1(BX)(DI*1), R10
 | |
| 	CMPB R9, R10
 | |
| 	JNE  repeat_extend_back_end_calcBlockSizeSmall
 | |
| 	LEAL -1(DI), DI
 | |
| 	DECL R8
 | |
| 	JNZ  repeat_extend_back_loop_calcBlockSizeSmall
 | |
| 
 | |
| repeat_extend_back_end_calcBlockSizeSmall:
 | |
| 	MOVL DI, SI
 | |
| 	SUBL 12(SP), SI
 | |
| 	LEAQ 3(CX)(SI*1), SI
 | |
| 	CMPQ SI, (SP)
 | |
| 	JB   repeat_dst_size_check_calcBlockSizeSmall
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| repeat_dst_size_check_calcBlockSizeSmall:
 | |
| 	MOVL 12(SP), SI
 | |
| 	CMPL SI, DI
 | |
| 	JEQ  emit_literal_done_repeat_emit_calcBlockSizeSmall
 | |
| 	MOVL DI, R8
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(SI*1), R9
 | |
| 	SUBL SI, R8
 | |
| 	LEAL -1(R8), SI
 | |
| 	CMPL SI, $0x3c
 | |
| 	JB   one_byte_repeat_emit_calcBlockSizeSmall
 | |
| 	CMPL SI, $0x00000100
 | |
| 	JB   two_bytes_repeat_emit_calcBlockSizeSmall
 | |
| 	JB   three_bytes_repeat_emit_calcBlockSizeSmall
 | |
| 
 | |
| three_bytes_repeat_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 | |
| 
 | |
| two_bytes_repeat_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_repeat_emit_calcBlockSizeSmall
 | |
| 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 | |
| 
 | |
| one_byte_repeat_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_repeat_emit_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(R8*1), CX
 | |
| 	JMP  emit_literal_done_repeat_emit_calcBlockSizeSmall
 | |
| 
 | |
| memmove_long_repeat_emit_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(R8*1), CX
 | |
| 
 | |
| emit_literal_done_repeat_emit_calcBlockSizeSmall:
 | |
| 	ADDL $0x05, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL 16(SP), SI
 | |
| 	MOVQ src_len+8(FP), R8
 | |
| 	SUBL DX, R8
 | |
| 	LEAQ (BX)(DX*1), R9
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R11, R11
 | |
| 
 | |
| matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
 | |
| 	CMPL R8, $0x10
 | |
| 	JB   matchlen_match8_repeat_extend_calcBlockSizeSmall
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	MOVQ 8(R9)(R11*1), R12
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
 | |
| 	XORQ 8(SI)(R11*1), R12
 | |
| 	JNZ  matchlen_bsf_16repeat_extend_calcBlockSizeSmall
 | |
| 	LEAL -16(R8), R8
 | |
| 	LEAL 16(R11), R11
 | |
| 	JMP  matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R12, R12
 | |
| 
 | |
| #else
 | |
| 	BSFQ R12, R12
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R12
 | |
| 	LEAL 8(R11)(R12*1), R11
 | |
| 	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match8_repeat_extend_calcBlockSizeSmall:
 | |
| 	CMPL R8, $0x08
 | |
| 	JB   matchlen_match4_repeat_extend_calcBlockSizeSmall
 | |
| 	MOVQ (R9)(R11*1), R10
 | |
| 	XORQ (SI)(R11*1), R10
 | |
| 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
 | |
| 	LEAL -8(R8), R8
 | |
| 	LEAL 8(R11), R11
 | |
| 	JMP  matchlen_match4_repeat_extend_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R10, R10
 | |
| 
 | |
| #else
 | |
| 	BSFQ R10, R10
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R10
 | |
| 	LEAL (R11)(R10*1), R11
 | |
| 	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match4_repeat_extend_calcBlockSizeSmall:
 | |
| 	CMPL R8, $0x04
 | |
| 	JB   matchlen_match2_repeat_extend_calcBlockSizeSmall
 | |
| 	MOVL (R9)(R11*1), R10
 | |
| 	CMPL (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match2_repeat_extend_calcBlockSizeSmall
 | |
| 	LEAL -4(R8), R8
 | |
| 	LEAL 4(R11), R11
 | |
| 
 | |
| matchlen_match2_repeat_extend_calcBlockSizeSmall:
 | |
| 	CMPL R8, $0x01
 | |
| 	JE   matchlen_match1_repeat_extend_calcBlockSizeSmall
 | |
| 	JB   repeat_extend_forward_end_calcBlockSizeSmall
 | |
| 	MOVW (R9)(R11*1), R10
 | |
| 	CMPW (SI)(R11*1), R10
 | |
| 	JNE  matchlen_match1_repeat_extend_calcBlockSizeSmall
 | |
| 	LEAL 2(R11), R11
 | |
| 	SUBL $0x02, R8
 | |
| 	JZ   repeat_extend_forward_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match1_repeat_extend_calcBlockSizeSmall:
 | |
| 	MOVB (R9)(R11*1), R10
 | |
| 	CMPB (SI)(R11*1), R10
 | |
| 	JNE  repeat_extend_forward_end_calcBlockSizeSmall
 | |
| 	LEAL 1(R11), R11
 | |
| 
 | |
| repeat_extend_forward_end_calcBlockSizeSmall:
 | |
| 	ADDL R11, DX
 | |
| 	MOVL DX, SI
 | |
| 	SUBL DI, SI
 | |
| 	MOVL 16(SP), DI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
 | |
| 	CMPL SI, $0x40
 | |
| 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
 | |
| 	LEAL -60(SI), SI
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_repeat_as_copy_calcBlockSizeSmall
 | |
| 
 | |
| two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
 | |
| 	MOVL SI, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  emit_copy_three_repeat_as_copy_calcBlockSizeSmall
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  repeat_end_emit_calcBlockSizeSmall
 | |
| 
 | |
| emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| repeat_end_emit_calcBlockSizeSmall:
 | |
| 	MOVL DX, 12(SP)
 | |
| 	JMP  search_loop_calcBlockSizeSmall
 | |
| 
 | |
| no_repeat_found_calcBlockSizeSmall:
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate_match_calcBlockSizeSmall
 | |
| 	SHRQ $0x08, DI
 | |
| 	MOVL (AX)(R10*4), SI
 | |
| 	LEAL 2(DX), R9
 | |
| 	CMPL (BX)(R8*1), DI
 | |
| 	JEQ  candidate2_match_calcBlockSizeSmall
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	SHRQ $0x08, DI
 | |
| 	CMPL (BX)(SI*1), DI
 | |
| 	JEQ  candidate3_match_calcBlockSizeSmall
 | |
| 	MOVL 20(SP), DX
 | |
| 	JMP  search_loop_calcBlockSizeSmall
 | |
| 
 | |
| candidate3_match_calcBlockSizeSmall:
 | |
| 	ADDL $0x02, DX
 | |
| 	JMP  candidate_match_calcBlockSizeSmall
 | |
| 
 | |
| candidate2_match_calcBlockSizeSmall:
 | |
| 	MOVL R9, (AX)(R10*4)
 | |
| 	INCL DX
 | |
| 	MOVL R8, SI
 | |
| 
 | |
| candidate_match_calcBlockSizeSmall:
 | |
| 	MOVL  12(SP), DI
 | |
| 	TESTL SI, SI
 | |
| 	JZ    match_extend_back_end_calcBlockSizeSmall
 | |
| 
 | |
| match_extend_back_loop_calcBlockSizeSmall:
 | |
| 	CMPL DX, DI
 | |
| 	JBE  match_extend_back_end_calcBlockSizeSmall
 | |
| 	MOVB -1(BX)(SI*1), R8
 | |
| 	MOVB -1(BX)(DX*1), R9
 | |
| 	CMPB R8, R9
 | |
| 	JNE  match_extend_back_end_calcBlockSizeSmall
 | |
| 	LEAL -1(DX), DX
 | |
| 	DECL SI
 | |
| 	JZ   match_extend_back_end_calcBlockSizeSmall
 | |
| 	JMP  match_extend_back_loop_calcBlockSizeSmall
 | |
| 
 | |
| match_extend_back_end_calcBlockSizeSmall:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL 12(SP), DI
 | |
| 	LEAQ 3(CX)(DI*1), DI
 | |
| 	CMPQ DI, (SP)
 | |
| 	JB   match_dst_size_check_calcBlockSizeSmall
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| match_dst_size_check_calcBlockSizeSmall:
 | |
| 	MOVL DX, DI
 | |
| 	MOVL 12(SP), R8
 | |
| 	CMPL R8, DI
 | |
| 	JEQ  emit_literal_done_match_emit_calcBlockSizeSmall
 | |
| 	MOVL DI, R9
 | |
| 	MOVL DI, 12(SP)
 | |
| 	LEAQ (BX)(R8*1), DI
 | |
| 	SUBL R8, R9
 | |
| 	LEAL -1(R9), DI
 | |
| 	CMPL DI, $0x3c
 | |
| 	JB   one_byte_match_emit_calcBlockSizeSmall
 | |
| 	CMPL DI, $0x00000100
 | |
| 	JB   two_bytes_match_emit_calcBlockSizeSmall
 | |
| 	JB   three_bytes_match_emit_calcBlockSizeSmall
 | |
| 
 | |
| three_bytes_match_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_match_emit_calcBlockSizeSmall
 | |
| 
 | |
| two_bytes_match_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL DI, $0x40
 | |
| 	JB   memmove_match_emit_calcBlockSizeSmall
 | |
| 	JMP  memmove_long_match_emit_calcBlockSizeSmall
 | |
| 
 | |
| one_byte_match_emit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_match_emit_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(R9*1), CX
 | |
| 	JMP  emit_literal_done_match_emit_calcBlockSizeSmall
 | |
| 
 | |
| memmove_long_match_emit_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(R9*1), CX
 | |
| 
 | |
| emit_literal_done_match_emit_calcBlockSizeSmall:
 | |
| match_nolit_loop_calcBlockSizeSmall:
 | |
| 	MOVL DX, DI
 | |
| 	SUBL SI, DI
 | |
| 	MOVL DI, 16(SP)
 | |
| 	ADDL $0x04, DX
 | |
| 	ADDL $0x04, SI
 | |
| 	MOVQ src_len+8(FP), DI
 | |
| 	SUBL DX, DI
 | |
| 	LEAQ (BX)(DX*1), R8
 | |
| 	LEAQ (BX)(SI*1), SI
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL R10, R10
 | |
| 
 | |
| matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
 | |
| 	CMPL DI, $0x10
 | |
| 	JB   matchlen_match8_match_nolit_calcBlockSizeSmall
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	MOVQ 8(R8)(R10*1), R11
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
 | |
| 	XORQ 8(SI)(R10*1), R11
 | |
| 	JNZ  matchlen_bsf_16match_nolit_calcBlockSizeSmall
 | |
| 	LEAL -16(DI), DI
 | |
| 	LEAL 16(R10), R10
 | |
| 	JMP  matchlen_loopback_16_match_nolit_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_bsf_16match_nolit_calcBlockSizeSmall:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R11, R11
 | |
| 
 | |
| #else
 | |
| 	BSFQ R11, R11
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R11
 | |
| 	LEAL 8(R10)(R11*1), R10
 | |
| 	JMP  match_nolit_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match8_match_nolit_calcBlockSizeSmall:
 | |
| 	CMPL DI, $0x08
 | |
| 	JB   matchlen_match4_match_nolit_calcBlockSizeSmall
 | |
| 	MOVQ (R8)(R10*1), R9
 | |
| 	XORQ (SI)(R10*1), R9
 | |
| 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
 | |
| 	LEAL -8(DI), DI
 | |
| 	LEAL 8(R10), R10
 | |
| 	JMP  matchlen_match4_match_nolit_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ R9, R9
 | |
| 
 | |
| #else
 | |
| 	BSFQ R9, R9
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, R9
 | |
| 	LEAL (R10)(R9*1), R10
 | |
| 	JMP  match_nolit_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match4_match_nolit_calcBlockSizeSmall:
 | |
| 	CMPL DI, $0x04
 | |
| 	JB   matchlen_match2_match_nolit_calcBlockSizeSmall
 | |
| 	MOVL (R8)(R10*1), R9
 | |
| 	CMPL (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match2_match_nolit_calcBlockSizeSmall
 | |
| 	LEAL -4(DI), DI
 | |
| 	LEAL 4(R10), R10
 | |
| 
 | |
| matchlen_match2_match_nolit_calcBlockSizeSmall:
 | |
| 	CMPL DI, $0x01
 | |
| 	JE   matchlen_match1_match_nolit_calcBlockSizeSmall
 | |
| 	JB   match_nolit_end_calcBlockSizeSmall
 | |
| 	MOVW (R8)(R10*1), R9
 | |
| 	CMPW (SI)(R10*1), R9
 | |
| 	JNE  matchlen_match1_match_nolit_calcBlockSizeSmall
 | |
| 	LEAL 2(R10), R10
 | |
| 	SUBL $0x02, DI
 | |
| 	JZ   match_nolit_end_calcBlockSizeSmall
 | |
| 
 | |
| matchlen_match1_match_nolit_calcBlockSizeSmall:
 | |
| 	MOVB (R8)(R10*1), R9
 | |
| 	CMPB (SI)(R10*1), R9
 | |
| 	JNE  match_nolit_end_calcBlockSizeSmall
 | |
| 	LEAL 1(R10), R10
 | |
| 
 | |
| match_nolit_end_calcBlockSizeSmall:
 | |
| 	ADDL R10, DX
 | |
| 	MOVL 16(SP), SI
 | |
| 	ADDL $0x04, R10
 | |
| 	MOVL DX, 12(SP)
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_match_nolit_calcBlockSizeSmall:
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_match_nolit_calcBlockSizeSmall
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  two_byte_offset_match_nolit_calcBlockSizeSmall
 | |
| 
 | |
| two_byte_offset_short_match_nolit_calcBlockSizeSmall:
 | |
| 	MOVL R10, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_match_nolit_calcBlockSizeSmall
 | |
| 	ADDQ $0x02, CX
 | |
| 	JMP  match_nolit_emitcopy_end_calcBlockSizeSmall
 | |
| 
 | |
| emit_copy_three_match_nolit_calcBlockSizeSmall:
 | |
| 	ADDQ $0x03, CX
 | |
| 
 | |
| match_nolit_emitcopy_end_calcBlockSizeSmall:
 | |
| 	CMPL DX, 8(SP)
 | |
| 	JAE  emit_remainder_calcBlockSizeSmall
 | |
| 	MOVQ -2(BX)(DX*1), DI
 | |
| 	CMPQ CX, (SP)
 | |
| 	JB   match_nolit_dst_ok_calcBlockSizeSmall
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| match_nolit_dst_ok_calcBlockSizeSmall:
 | |
| 	MOVQ  $0x9e3779b1, R9
 | |
| 	MOVQ  DI, R8
 | |
| 	SHRQ  $0x10, DI
 | |
| 	MOVQ  DI, SI
 | |
| 	SHLQ  $0x20, R8
 | |
| 	IMULQ R9, R8
 | |
| 	SHRQ  $0x37, R8
 | |
| 	SHLQ  $0x20, SI
 | |
| 	IMULQ R9, SI
 | |
| 	SHRQ  $0x37, SI
 | |
| 	LEAL  -2(DX), R9
 | |
| 	LEAQ  (AX)(SI*4), R10
 | |
| 	MOVL  (R10), SI
 | |
| 	MOVL  R9, (AX)(R8*4)
 | |
| 	MOVL  DX, (R10)
 | |
| 	CMPL  (BX)(SI*1), DI
 | |
| 	JEQ   match_nolit_loop_calcBlockSizeSmall
 | |
| 	INCL  DX
 | |
| 	JMP   search_loop_calcBlockSizeSmall
 | |
| 
 | |
| emit_remainder_calcBlockSizeSmall:
 | |
| 	MOVQ src_len+8(FP), AX
 | |
| 	SUBL 12(SP), AX
 | |
| 	LEAQ 3(CX)(AX*1), AX
 | |
| 	CMPQ AX, (SP)
 | |
| 	JB   emit_remainder_ok_calcBlockSizeSmall
 | |
| 	MOVQ $0x00000000, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| emit_remainder_ok_calcBlockSizeSmall:
 | |
| 	MOVQ src_len+8(FP), AX
 | |
| 	MOVL 12(SP), DX
 | |
| 	CMPL DX, AX
 | |
| 	JEQ  emit_literal_done_emit_remainder_calcBlockSizeSmall
 | |
| 	MOVL AX, SI
 | |
| 	MOVL AX, 12(SP)
 | |
| 	LEAQ (BX)(DX*1), AX
 | |
| 	SUBL DX, SI
 | |
| 	LEAL -1(SI), AX
 | |
| 	CMPL AX, $0x3c
 | |
| 	JB   one_byte_emit_remainder_calcBlockSizeSmall
 | |
| 	CMPL AX, $0x00000100
 | |
| 	JB   two_bytes_emit_remainder_calcBlockSizeSmall
 | |
| 	JB   three_bytes_emit_remainder_calcBlockSizeSmall
 | |
| 
 | |
| three_bytes_emit_remainder_calcBlockSizeSmall:
 | |
| 	ADDQ $0x03, CX
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 | |
| 
 | |
| two_bytes_emit_remainder_calcBlockSizeSmall:
 | |
| 	ADDQ $0x02, CX
 | |
| 	CMPL AX, $0x40
 | |
| 	JB   memmove_emit_remainder_calcBlockSizeSmall
 | |
| 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 | |
| 
 | |
| one_byte_emit_remainder_calcBlockSizeSmall:
 | |
| 	ADDQ $0x01, CX
 | |
| 
 | |
| memmove_emit_remainder_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(SI*1), AX
 | |
| 	MOVQ AX, CX
 | |
| 	JMP  emit_literal_done_emit_remainder_calcBlockSizeSmall
 | |
| 
 | |
| memmove_long_emit_remainder_calcBlockSizeSmall:
 | |
| 	LEAQ (CX)(SI*1), AX
 | |
| 	MOVQ AX, CX
 | |
| 
 | |
| emit_literal_done_emit_remainder_calcBlockSizeSmall:
 | |
| 	MOVQ CX, ret+32(FP)
 | |
| 	RET
 | |
| 
 | |
| // func emitLiteral(dst []byte, lit []byte) int
 | |
| // Requires: SSE2
 | |
| TEXT ·emitLiteral(SB), NOSPLIT, $0-56
 | |
| 	MOVQ  lit_len+32(FP), DX
 | |
| 	MOVQ  dst_base+0(FP), AX
 | |
| 	MOVQ  lit_base+24(FP), CX
 | |
| 	TESTQ DX, DX
 | |
| 	JZ    emit_literal_end_standalone_skip
 | |
| 	MOVL  DX, BX
 | |
| 	LEAL  -1(DX), SI
 | |
| 	CMPL  SI, $0x3c
 | |
| 	JB    one_byte_standalone
 | |
| 	CMPL  SI, $0x00000100
 | |
| 	JB    two_bytes_standalone
 | |
| 	CMPL  SI, $0x00010000
 | |
| 	JB    three_bytes_standalone
 | |
| 	CMPL  SI, $0x01000000
 | |
| 	JB    four_bytes_standalone
 | |
| 	MOVB  $0xfc, (AX)
 | |
| 	MOVL  SI, 1(AX)
 | |
| 	ADDQ  $0x05, BX
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   memmove_long_standalone
 | |
| 
 | |
| four_bytes_standalone:
 | |
| 	MOVL SI, DI
 | |
| 	SHRL $0x10, DI
 | |
| 	MOVB $0xf8, (AX)
 | |
| 	MOVW SI, 1(AX)
 | |
| 	MOVB DI, 3(AX)
 | |
| 	ADDQ $0x04, BX
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  memmove_long_standalone
 | |
| 
 | |
| three_bytes_standalone:
 | |
| 	MOVB $0xf4, (AX)
 | |
| 	MOVW SI, 1(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  memmove_long_standalone
 | |
| 
 | |
| two_bytes_standalone:
 | |
| 	MOVB $0xf0, (AX)
 | |
| 	MOVB SI, 1(AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	CMPL SI, $0x40
 | |
| 	JB   memmove_standalone
 | |
| 	JMP  memmove_long_standalone
 | |
| 
 | |
| one_byte_standalone:
 | |
| 	SHLB $0x02, SI
 | |
| 	MOVB SI, (AX)
 | |
| 	ADDQ $0x01, BX
 | |
| 	ADDQ $0x01, AX
 | |
| 
 | |
| memmove_standalone:
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ DX, $0x03
 | |
| 	JB   emit_lit_memmove_standalone_memmove_move_1or2
 | |
| 	JE   emit_lit_memmove_standalone_memmove_move_3
 | |
| 	CMPQ DX, $0x08
 | |
| 	JB   emit_lit_memmove_standalone_memmove_move_4through7
 | |
| 	CMPQ DX, $0x10
 | |
| 	JBE  emit_lit_memmove_standalone_memmove_move_8through16
 | |
| 	CMPQ DX, $0x20
 | |
| 	JBE  emit_lit_memmove_standalone_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_standalone_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_1or2:
 | |
| 	MOVB (CX), SI
 | |
| 	MOVB -1(CX)(DX*1), CL
 | |
| 	MOVB SI, (AX)
 | |
| 	MOVB CL, -1(AX)(DX*1)
 | |
| 	JMP  emit_literal_end_standalone
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_3:
 | |
| 	MOVW (CX), SI
 | |
| 	MOVB 2(CX), CL
 | |
| 	MOVW SI, (AX)
 | |
| 	MOVB CL, 2(AX)
 | |
| 	JMP  emit_literal_end_standalone
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_4through7:
 | |
| 	MOVL (CX), SI
 | |
| 	MOVL -4(CX)(DX*1), CX
 | |
| 	MOVL SI, (AX)
 | |
| 	MOVL CX, -4(AX)(DX*1)
 | |
| 	JMP  emit_literal_end_standalone
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_8through16:
 | |
| 	MOVQ (CX), SI
 | |
| 	MOVQ -8(CX)(DX*1), CX
 | |
| 	MOVQ SI, (AX)
 | |
| 	MOVQ CX, -8(AX)(DX*1)
 | |
| 	JMP  emit_literal_end_standalone
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_17through32:
 | |
| 	MOVOU (CX), X0
 | |
| 	MOVOU -16(CX)(DX*1), X1
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, -16(AX)(DX*1)
 | |
| 	JMP   emit_literal_end_standalone
 | |
| 
 | |
| emit_lit_memmove_standalone_memmove_move_33through64:
 | |
| 	MOVOU (CX), X0
 | |
| 	MOVOU 16(CX), X1
 | |
| 	MOVOU -32(CX)(DX*1), X2
 | |
| 	MOVOU -16(CX)(DX*1), X3
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(DX*1)
 | |
| 	MOVOU X3, -16(AX)(DX*1)
 | |
| 	JMP   emit_literal_end_standalone
 | |
| 	JMP emit_literal_end_standalone
 | |
| 
 | |
| memmove_long_standalone:
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (CX), X0
 | |
| 	MOVOU 16(CX), X1
 | |
| 	MOVOU -32(CX)(DX*1), X2
 | |
| 	MOVOU -16(CX)(DX*1), X3
 | |
| 	MOVQ  DX, DI
 | |
| 	SHRQ  $0x05, DI
 | |
| 	MOVQ  AX, SI
 | |
| 	ANDL  $0x0000001f, SI
 | |
| 	MOVQ  $0x00000040, R8
 | |
| 	SUBQ  SI, R8
 | |
| 	DECQ  DI
 | |
| 	JA    emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(CX)(R8*1), SI
 | |
| 	LEAQ  -32(AX)(R8*1), R9
 | |
| 
 | |
| emit_lit_memmove_long_standalonelarge_big_loop_back:
 | |
| 	MOVOU (SI), X4
 | |
| 	MOVOU 16(SI), X5
 | |
| 	MOVOA X4, (R9)
 | |
| 	MOVOA X5, 16(R9)
 | |
| 	ADDQ  $0x20, R9
 | |
| 	ADDQ  $0x20, SI
 | |
| 	ADDQ  $0x20, R8
 | |
| 	DECQ  DI
 | |
| 	JNA   emit_lit_memmove_long_standalonelarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(CX)(R8*1), X4
 | |
| 	MOVOU -16(CX)(R8*1), X5
 | |
| 	MOVOA X4, -32(AX)(R8*1)
 | |
| 	MOVOA X5, -16(AX)(R8*1)
 | |
| 	ADDQ  $0x20, R8
 | |
| 	CMPQ  DX, R8
 | |
| 	JAE   emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(DX*1)
 | |
| 	MOVOU X3, -16(AX)(DX*1)
 | |
| 	JMP   emit_literal_end_standalone
 | |
| 	JMP emit_literal_end_standalone
 | |
| 
 | |
| emit_literal_end_standalone_skip:
 | |
| 	XORQ BX, BX
 | |
| 
 | |
| emit_literal_end_standalone:
 | |
| 	MOVQ BX, ret+48(FP)
 | |
| 	RET
 | |
| 
 | |
| // func emitRepeat(dst []byte, offset int, length int) int
 | |
| TEXT ·emitRepeat(SB), NOSPLIT, $0-48
 | |
| 	XORQ BX, BX
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ offset+24(FP), CX
 | |
| 	MOVQ length+32(FP), DX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_standalone:
 | |
| 	MOVL DX, SI
 | |
| 	LEAL -4(DX), DX
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_standalone
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_standalone
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JB   repeat_two_offset_standalone
 | |
| 
 | |
| cant_repeat_two_offset_standalone:
 | |
| 	CMPL DX, $0x00000104
 | |
| 	JB   repeat_three_standalone
 | |
| 	CMPL DX, $0x00010100
 | |
| 	JB   repeat_four_standalone
 | |
| 	CMPL DX, $0x0100ffff
 | |
| 	JB   repeat_five_standalone
 | |
| 	LEAL -16842747(DX), DX
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	ADDQ $0x05, BX
 | |
| 	JMP  emit_repeat_again_standalone
 | |
| 
 | |
| repeat_five_standalone:
 | |
| 	LEAL -65536(DX), DX
 | |
| 	MOVL DX, CX
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	SARL $0x10, CX
 | |
| 	MOVB CL, 4(AX)
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  gen_emit_repeat_end
 | |
| 
 | |
| repeat_four_standalone:
 | |
| 	LEAL -256(DX), DX
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	ADDQ $0x04, BX
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  gen_emit_repeat_end
 | |
| 
 | |
| repeat_three_standalone:
 | |
| 	LEAL -4(DX), DX
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB DL, 2(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  gen_emit_repeat_end
 | |
| 
 | |
| repeat_two_standalone:
 | |
| 	SHLL $0x02, DX
 | |
| 	ORL  $0x01, DX
 | |
| 	MOVW DX, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_repeat_end
 | |
| 
 | |
| repeat_two_offset_standalone:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(DX*4), DX
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SARL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, DX
 | |
| 	MOVB DL, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 
 | |
| gen_emit_repeat_end:
 | |
| 	MOVQ BX, ret+40(FP)
 | |
| 	RET
 | |
| 
 | |
| // func emitCopy(dst []byte, offset int, length int) int
 | |
| TEXT ·emitCopy(SB), NOSPLIT, $0-48
 | |
| 	XORQ BX, BX
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ offset+24(FP), CX
 | |
| 	MOVQ length+32(FP), DX
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL CX, $0x00010000
 | |
| 	JB   two_byte_offset_standalone
 | |
| 	CMPL DX, $0x40
 | |
| 	JBE  four_bytes_remain_standalone
 | |
| 	MOVB $0xff, (AX)
 | |
| 	MOVL CX, 1(AX)
 | |
| 	LEAL -64(DX), DX
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	CMPL DX, $0x04
 | |
| 	JB   four_bytes_remain_standalone
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_standalone_emit_copy:
 | |
| 	MOVL DX, SI
 | |
| 	LEAL -4(DX), DX
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_standalone_emit_copy
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_standalone_emit_copy
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JB   repeat_two_offset_standalone_emit_copy
 | |
| 
 | |
| cant_repeat_two_offset_standalone_emit_copy:
 | |
| 	CMPL DX, $0x00000104
 | |
| 	JB   repeat_three_standalone_emit_copy
 | |
| 	CMPL DX, $0x00010100
 | |
| 	JB   repeat_four_standalone_emit_copy
 | |
| 	CMPL DX, $0x0100ffff
 | |
| 	JB   repeat_five_standalone_emit_copy
 | |
| 	LEAL -16842747(DX), DX
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	ADDQ $0x05, BX
 | |
| 	JMP  emit_repeat_again_standalone_emit_copy
 | |
| 
 | |
| repeat_five_standalone_emit_copy:
 | |
| 	LEAL -65536(DX), DX
 | |
| 	MOVL DX, CX
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	SARL $0x10, CX
 | |
| 	MOVB CL, 4(AX)
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_four_standalone_emit_copy:
 | |
| 	LEAL -256(DX), DX
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	ADDQ $0x04, BX
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_three_standalone_emit_copy:
 | |
| 	LEAL -4(DX), DX
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB DL, 2(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_standalone_emit_copy:
 | |
| 	SHLL $0x02, DX
 | |
| 	ORL  $0x01, DX
 | |
| 	MOVW DX, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_offset_standalone_emit_copy:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(DX*4), DX
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SARL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, DX
 | |
| 	MOVB DL, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| four_bytes_remain_standalone:
 | |
| 	TESTL DX, DX
 | |
| 	JZ    gen_emit_copy_end
 | |
| 	XORL  SI, SI
 | |
| 	LEAL  -1(SI)(DX*4), DX
 | |
| 	MOVB  DL, (AX)
 | |
| 	MOVL  CX, 1(AX)
 | |
| 	ADDQ  $0x05, BX
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   gen_emit_copy_end
 | |
| 
 | |
| two_byte_offset_standalone:
 | |
| 	CMPL DX, $0x40
 | |
| 	JBE  two_byte_offset_short_standalone
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JAE  long_offset_short_standalone
 | |
| 	MOVL $0x00000001, SI
 | |
| 	LEAL 16(SI), SI
 | |
| 	MOVB CL, 1(AX)
 | |
| 	MOVL CX, DI
 | |
| 	SHRL $0x08, DI
 | |
| 	SHLL $0x05, DI
 | |
| 	ORL  DI, SI
 | |
| 	MOVB SI, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	SUBL $0x08, DX
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(DX), DX
 | |
| 	JMP  cant_repeat_two_offset_standalone_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_standalone_emit_copy_short_2b:
 | |
| 	MOVL DX, SI
 | |
| 	LEAL -4(DX), DX
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_standalone_emit_copy_short_2b
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_standalone_emit_copy_short_2b
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JB   repeat_two_offset_standalone_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_standalone_emit_copy_short_2b:
 | |
| 	CMPL DX, $0x00000104
 | |
| 	JB   repeat_three_standalone_emit_copy_short_2b
 | |
| 	CMPL DX, $0x00010100
 | |
| 	JB   repeat_four_standalone_emit_copy_short_2b
 | |
| 	CMPL DX, $0x0100ffff
 | |
| 	JB   repeat_five_standalone_emit_copy_short_2b
 | |
| 	LEAL -16842747(DX), DX
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	ADDQ $0x05, BX
 | |
| 	JMP  emit_repeat_again_standalone_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_standalone_emit_copy_short_2b:
 | |
| 	LEAL -65536(DX), DX
 | |
| 	MOVL DX, CX
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	SARL $0x10, CX
 | |
| 	MOVB CL, 4(AX)
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_four_standalone_emit_copy_short_2b:
 | |
| 	LEAL -256(DX), DX
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	ADDQ $0x04, BX
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_three_standalone_emit_copy_short_2b:
 | |
| 	LEAL -4(DX), DX
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB DL, 2(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_standalone_emit_copy_short_2b:
 | |
| 	SHLL $0x02, DX
 | |
| 	ORL  $0x01, DX
 | |
| 	MOVW DX, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_offset_standalone_emit_copy_short_2b:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(DX*4), DX
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SARL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, DX
 | |
| 	MOVB DL, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| long_offset_short_standalone:
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW CX, 1(AX)
 | |
| 	LEAL -60(DX), DX
 | |
| 	ADDQ $0x03, AX
 | |
| 	ADDQ $0x03, BX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_standalone_emit_copy_short:
 | |
| 	MOVL DX, SI
 | |
| 	LEAL -4(DX), DX
 | |
| 	CMPL SI, $0x08
 | |
| 	JBE  repeat_two_standalone_emit_copy_short
 | |
| 	CMPL SI, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_standalone_emit_copy_short
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JB   repeat_two_offset_standalone_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_standalone_emit_copy_short:
 | |
| 	CMPL DX, $0x00000104
 | |
| 	JB   repeat_three_standalone_emit_copy_short
 | |
| 	CMPL DX, $0x00010100
 | |
| 	JB   repeat_four_standalone_emit_copy_short
 | |
| 	CMPL DX, $0x0100ffff
 | |
| 	JB   repeat_five_standalone_emit_copy_short
 | |
| 	LEAL -16842747(DX), DX
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	ADDQ $0x05, BX
 | |
| 	JMP  emit_repeat_again_standalone_emit_copy_short
 | |
| 
 | |
| repeat_five_standalone_emit_copy_short:
 | |
| 	LEAL -65536(DX), DX
 | |
| 	MOVL DX, CX
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	SARL $0x10, CX
 | |
| 	MOVB CL, 4(AX)
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_four_standalone_emit_copy_short:
 | |
| 	LEAL -256(DX), DX
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW DX, 2(AX)
 | |
| 	ADDQ $0x04, BX
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_three_standalone_emit_copy_short:
 | |
| 	LEAL -4(DX), DX
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB DL, 2(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_standalone_emit_copy_short:
 | |
| 	SHLL $0x02, DX
 | |
| 	ORL  $0x01, DX
 | |
| 	MOVW DX, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| repeat_two_offset_standalone_emit_copy_short:
 | |
| 	XORQ SI, SI
 | |
| 	LEAL 1(SI)(DX*4), DX
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SARL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, DX
 | |
| 	MOVB DL, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| two_byte_offset_short_standalone:
 | |
| 	MOVL DX, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL DX, $0x0c
 | |
| 	JAE  emit_copy_three_standalone
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JAE  emit_copy_three_standalone
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SHRL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, SI
 | |
| 	MOVB SI, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end
 | |
| 
 | |
| emit_copy_three_standalone:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (AX)
 | |
| 	MOVW CX, 1(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 
 | |
| gen_emit_copy_end:
 | |
| 	MOVQ BX, ret+40(FP)
 | |
| 	RET
 | |
| 
 | |
| // func emitCopyNoRepeat(dst []byte, offset int, length int) int
 | |
| TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
 | |
| 	XORQ BX, BX
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ offset+24(FP), CX
 | |
| 	MOVQ length+32(FP), DX
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL CX, $0x00010000
 | |
| 	JB   two_byte_offset_standalone_snappy
 | |
| 
 | |
| four_bytes_loop_back_standalone_snappy:
 | |
| 	CMPL DX, $0x40
 | |
| 	JBE  four_bytes_remain_standalone_snappy
 | |
| 	MOVB $0xff, (AX)
 | |
| 	MOVL CX, 1(AX)
 | |
| 	LEAL -64(DX), DX
 | |
| 	ADDQ $0x05, BX
 | |
| 	ADDQ $0x05, AX
 | |
| 	CMPL DX, $0x04
 | |
| 	JB   four_bytes_remain_standalone_snappy
 | |
| 	JMP  four_bytes_loop_back_standalone_snappy
 | |
| 
 | |
| four_bytes_remain_standalone_snappy:
 | |
| 	TESTL DX, DX
 | |
| 	JZ    gen_emit_copy_end_snappy
 | |
| 	XORL  SI, SI
 | |
| 	LEAL  -1(SI)(DX*4), DX
 | |
| 	MOVB  DL, (AX)
 | |
| 	MOVL  CX, 1(AX)
 | |
| 	ADDQ  $0x05, BX
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   gen_emit_copy_end_snappy
 | |
| 
 | |
| two_byte_offset_standalone_snappy:
 | |
| 	CMPL DX, $0x40
 | |
| 	JBE  two_byte_offset_short_standalone_snappy
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW CX, 1(AX)
 | |
| 	LEAL -60(DX), DX
 | |
| 	ADDQ $0x03, AX
 | |
| 	ADDQ $0x03, BX
 | |
| 	JMP  two_byte_offset_standalone_snappy
 | |
| 
 | |
| two_byte_offset_short_standalone_snappy:
 | |
| 	MOVL DX, SI
 | |
| 	SHLL $0x02, SI
 | |
| 	CMPL DX, $0x0c
 | |
| 	JAE  emit_copy_three_standalone_snappy
 | |
| 	CMPL CX, $0x00000800
 | |
| 	JAE  emit_copy_three_standalone_snappy
 | |
| 	LEAL -15(SI), SI
 | |
| 	MOVB CL, 1(AX)
 | |
| 	SHRL $0x08, CX
 | |
| 	SHLL $0x05, CX
 | |
| 	ORL  CX, SI
 | |
| 	MOVB SI, (AX)
 | |
| 	ADDQ $0x02, BX
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  gen_emit_copy_end_snappy
 | |
| 
 | |
| emit_copy_three_standalone_snappy:
 | |
| 	LEAL -2(SI), SI
 | |
| 	MOVB SI, (AX)
 | |
| 	MOVW CX, 1(AX)
 | |
| 	ADDQ $0x03, BX
 | |
| 	ADDQ $0x03, AX
 | |
| 
 | |
| gen_emit_copy_end_snappy:
 | |
| 	MOVQ BX, ret+40(FP)
 | |
| 	RET
 | |
| 
 | |
| // func matchLen(a []byte, b []byte) int
 | |
| // Requires: BMI
 | |
| TEXT ·matchLen(SB), NOSPLIT, $0-56
 | |
| 	MOVQ a_base+0(FP), AX
 | |
| 	MOVQ b_base+24(FP), CX
 | |
| 	MOVQ a_len+8(FP), DX
 | |
| 
 | |
| 	// matchLen
 | |
| 	XORL SI, SI
 | |
| 
 | |
| matchlen_loopback_16_standalone:
 | |
| 	CMPL DX, $0x10
 | |
| 	JB   matchlen_match8_standalone
 | |
| 	MOVQ (AX)(SI*1), BX
 | |
| 	MOVQ 8(AX)(SI*1), DI
 | |
| 	XORQ (CX)(SI*1), BX
 | |
| 	JNZ  matchlen_bsf_8_standalone
 | |
| 	XORQ 8(CX)(SI*1), DI
 | |
| 	JNZ  matchlen_bsf_16standalone
 | |
| 	LEAL -16(DX), DX
 | |
| 	LEAL 16(SI), SI
 | |
| 	JMP  matchlen_loopback_16_standalone
 | |
| 
 | |
| matchlen_bsf_16standalone:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ DI, DI
 | |
| 
 | |
| #else
 | |
| 	BSFQ DI, DI
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, DI
 | |
| 	LEAL 8(SI)(DI*1), SI
 | |
| 	JMP  gen_match_len_end
 | |
| 
 | |
| matchlen_match8_standalone:
 | |
| 	CMPL DX, $0x08
 | |
| 	JB   matchlen_match4_standalone
 | |
| 	MOVQ (AX)(SI*1), BX
 | |
| 	XORQ (CX)(SI*1), BX
 | |
| 	JNZ  matchlen_bsf_8_standalone
 | |
| 	LEAL -8(DX), DX
 | |
| 	LEAL 8(SI), SI
 | |
| 	JMP  matchlen_match4_standalone
 | |
| 
 | |
| matchlen_bsf_8_standalone:
 | |
| #ifdef GOAMD64_v3
 | |
| 	TZCNTQ BX, BX
 | |
| 
 | |
| #else
 | |
| 	BSFQ BX, BX
 | |
| 
 | |
| #endif
 | |
| 	SARQ $0x03, BX
 | |
| 	LEAL (SI)(BX*1), SI
 | |
| 	JMP  gen_match_len_end
 | |
| 
 | |
| matchlen_match4_standalone:
 | |
| 	CMPL DX, $0x04
 | |
| 	JB   matchlen_match2_standalone
 | |
| 	MOVL (AX)(SI*1), BX
 | |
| 	CMPL (CX)(SI*1), BX
 | |
| 	JNE  matchlen_match2_standalone
 | |
| 	LEAL -4(DX), DX
 | |
| 	LEAL 4(SI), SI
 | |
| 
 | |
| matchlen_match2_standalone:
 | |
| 	CMPL DX, $0x01
 | |
| 	JE   matchlen_match1_standalone
 | |
| 	JB   gen_match_len_end
 | |
| 	MOVW (AX)(SI*1), BX
 | |
| 	CMPW (CX)(SI*1), BX
 | |
| 	JNE  matchlen_match1_standalone
 | |
| 	LEAL 2(SI), SI
 | |
| 	SUBL $0x02, DX
 | |
| 	JZ   gen_match_len_end
 | |
| 
 | |
| matchlen_match1_standalone:
 | |
| 	MOVB (AX)(SI*1), BL
 | |
| 	CMPB (CX)(SI*1), BL
 | |
| 	JNE  gen_match_len_end
 | |
| 	LEAL 1(SI), SI
 | |
| 
 | |
| gen_match_len_end:
 | |
| 	MOVQ SI, ret+48(FP)
 | |
| 	RET
 | |
| 
 | |
| // func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 | |
| // Requires: SSE2
 | |
| TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
 | |
| 	XORQ SI, SI
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ dst_len+8(FP), CX
 | |
| 	MOVQ src_base+24(FP), DX
 | |
| 	MOVQ src_len+32(FP), BX
 | |
| 	LEAQ (DX)(BX*1), BX
 | |
| 	LEAQ -8(AX)(CX*1), CX
 | |
| 	XORQ DI, DI
 | |
| 
 | |
| lz4_s2_loop:
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_s2_corrupt
 | |
| 	CMPQ    AX, CX
 | |
| 	JAE     lz4_s2_dstfull
 | |
| 	MOVBQZX (DX), R8
 | |
| 	MOVQ    R8, R9
 | |
| 	MOVQ    R8, R10
 | |
| 	SHRQ    $0x04, R9
 | |
| 	ANDQ    $0x0f, R10
 | |
| 	CMPQ    R8, $0xf0
 | |
| 	JB      lz4_s2_ll_end
 | |
| 
 | |
| lz4_s2_ll_loop:
 | |
| 	INCQ    DX
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_s2_corrupt
 | |
| 	MOVBQZX (DX), R8
 | |
| 	ADDQ    R8, R9
 | |
| 	CMPQ    R8, $0xff
 | |
| 	JEQ     lz4_s2_ll_loop
 | |
| 
 | |
| lz4_s2_ll_end:
 | |
| 	LEAQ  (DX)(R9*1), R8
 | |
| 	ADDQ  $0x04, R10
 | |
| 	CMPQ  R8, BX
 | |
| 	JAE   lz4_s2_corrupt
 | |
| 	INCQ  DX
 | |
| 	INCQ  R8
 | |
| 	TESTQ R9, R9
 | |
| 	JZ    lz4_s2_lits_done
 | |
| 	LEAQ  (AX)(R9*1), R11
 | |
| 	CMPQ  R11, CX
 | |
| 	JAE   lz4_s2_dstfull
 | |
| 	ADDQ  R9, SI
 | |
| 	LEAL  -1(R9), R11
 | |
| 	CMPL  R11, $0x3c
 | |
| 	JB    one_byte_lz4_s2
 | |
| 	CMPL  R11, $0x00000100
 | |
| 	JB    two_bytes_lz4_s2
 | |
| 	CMPL  R11, $0x00010000
 | |
| 	JB    three_bytes_lz4_s2
 | |
| 	CMPL  R11, $0x01000000
 | |
| 	JB    four_bytes_lz4_s2
 | |
| 	MOVB  $0xfc, (AX)
 | |
| 	MOVL  R11, 1(AX)
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   memmove_long_lz4_s2
 | |
| 
 | |
| four_bytes_lz4_s2:
 | |
| 	MOVL R11, R12
 | |
| 	SHRL $0x10, R12
 | |
| 	MOVB $0xf8, (AX)
 | |
| 	MOVW R11, 1(AX)
 | |
| 	MOVB R12, 3(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  memmove_long_lz4_s2
 | |
| 
 | |
| three_bytes_lz4_s2:
 | |
| 	MOVB $0xf4, (AX)
 | |
| 	MOVW R11, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  memmove_long_lz4_s2
 | |
| 
 | |
| two_bytes_lz4_s2:
 | |
| 	MOVB $0xf0, (AX)
 | |
| 	MOVB R11, 1(AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	CMPL R11, $0x40
 | |
| 	JB   memmove_lz4_s2
 | |
| 	JMP  memmove_long_lz4_s2
 | |
| 
 | |
| one_byte_lz4_s2:
 | |
| 	SHLB $0x02, R11
 | |
| 	MOVB R11, (AX)
 | |
| 	ADDQ $0x01, AX
 | |
| 
 | |
| memmove_lz4_s2:
 | |
| 	LEAQ (AX)(R9*1), R11
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_lz4_s2_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_lz4_s2_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_lz4_s2_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_lz4_s2_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_lz4_s2_memmove_move_8:
 | |
| 	MOVQ (DX), R12
 | |
| 	MOVQ R12, (AX)
 | |
| 	JMP  memmove_end_copy_lz4_s2
 | |
| 
 | |
| emit_lit_memmove_lz4_s2_memmove_move_8through16:
 | |
| 	MOVQ (DX), R12
 | |
| 	MOVQ -8(DX)(R9*1), DX
 | |
| 	MOVQ R12, (AX)
 | |
| 	MOVQ DX, -8(AX)(R9*1)
 | |
| 	JMP  memmove_end_copy_lz4_s2
 | |
| 
 | |
| emit_lit_memmove_lz4_s2_memmove_move_17through32:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU -16(DX)(R9*1), X1
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, -16(AX)(R9*1)
 | |
| 	JMP   memmove_end_copy_lz4_s2
 | |
| 
 | |
| emit_lit_memmove_lz4_s2_memmove_move_33through64:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R9*1), X2
 | |
| 	MOVOU -16(DX)(R9*1), X3
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R9*1)
 | |
| 	MOVOU X3, -16(AX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_lz4_s2:
 | |
| 	MOVQ R11, AX
 | |
| 	JMP  lz4_s2_lits_emit_done
 | |
| 
 | |
| memmove_long_lz4_s2:
 | |
| 	LEAQ (AX)(R9*1), R11
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R9*1), X2
 | |
| 	MOVOU -16(DX)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  AX, R12
 | |
| 	ANDL  $0x0000001f, R12
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R12, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
 | |
| 	LEAQ  -32(DX)(R14*1), R12
 | |
| 	LEAQ  -32(AX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_lz4_s2large_big_loop_back:
 | |
| 	MOVOU (R12), X4
 | |
| 	MOVOU 16(R12), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R12
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_lz4_s2large_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32:
 | |
| 	MOVOU -32(DX)(R14*1), X4
 | |
| 	MOVOU -16(DX)(R14*1), X5
 | |
| 	MOVOA X4, -32(AX)(R14*1)
 | |
| 	MOVOA X5, -16(AX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R9*1)
 | |
| 	MOVOU X3, -16(AX)(R9*1)
 | |
| 	MOVQ  R11, AX
 | |
| 
 | |
| lz4_s2_lits_emit_done:
 | |
| 	MOVQ R8, DX
 | |
| 
 | |
| lz4_s2_lits_done:
 | |
| 	CMPQ DX, BX
 | |
| 	JNE  lz4_s2_match
 | |
| 	CMPQ R10, $0x04
 | |
| 	JEQ  lz4_s2_done
 | |
| 	JMP  lz4_s2_corrupt
 | |
| 
 | |
| lz4_s2_match:
 | |
| 	LEAQ    2(DX), R8
 | |
| 	CMPQ    R8, BX
 | |
| 	JAE     lz4_s2_corrupt
 | |
| 	MOVWQZX (DX), R9
 | |
| 	MOVQ    R8, DX
 | |
| 	TESTQ   R9, R9
 | |
| 	JZ      lz4_s2_corrupt
 | |
| 	CMPQ    R9, SI
 | |
| 	JA      lz4_s2_corrupt
 | |
| 	CMPQ    R10, $0x13
 | |
| 	JNE     lz4_s2_ml_done
 | |
| 
 | |
| lz4_s2_ml_loop:
 | |
| 	MOVBQZX (DX), R8
 | |
| 	INCQ    DX
 | |
| 	ADDQ    R8, R10
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_s2_corrupt
 | |
| 	CMPQ    R8, $0xff
 | |
| 	JEQ     lz4_s2_ml_loop
 | |
| 
 | |
| lz4_s2_ml_done:
 | |
| 	ADDQ R10, SI
 | |
| 	CMPQ R9, DI
 | |
| 	JNE  lz4_s2_docopy
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_lz4_s2:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2
 | |
| 
 | |
| repeat_five_lz4_s2:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| lz4_s2_docopy:
 | |
| 	MOVQ R9, DI
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JAE  long_offset_short_lz4_s2
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB R9, 1(AX)
 | |
| 	MOVL R9, R11
 | |
| 	SHRL $0x08, R11
 | |
| 	SHLL $0x05, R11
 | |
| 	ORL  R11, R8
 | |
| 	MOVB R8, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_lz4_s2_emit_copy_short_2b:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2_emit_copy_short_2b
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| long_offset_short_lz4_s2:
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW R9, 1(AX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, AX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_lz4_s2_emit_copy_short:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2_emit_copy_short
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2_emit_copy_short
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2_emit_copy_short
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2_emit_copy_short
 | |
| 
 | |
| repeat_five_lz4_s2_emit_copy_short:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2_emit_copy_short:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| two_byte_offset_short_lz4_s2:
 | |
| 	MOVL R10, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SHRL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R8
 | |
| 	MOVB R8, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| emit_copy_three_lz4_s2:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (AX)
 | |
| 	MOVW R9, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4_s2_loop
 | |
| 
 | |
| lz4_s2_done:
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	SUBQ CX, AX
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	MOVQ AX, dstUsed+56(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4_s2_corrupt:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -1(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4_s2_dstfull:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -2(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| // func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 | |
| // Requires: SSE2
 | |
| TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
 | |
| 	XORQ SI, SI
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ dst_len+8(FP), CX
 | |
| 	MOVQ src_base+24(FP), DX
 | |
| 	MOVQ src_len+32(FP), BX
 | |
| 	LEAQ (DX)(BX*1), BX
 | |
| 	LEAQ -8(AX)(CX*1), CX
 | |
| 	XORQ DI, DI
 | |
| 
 | |
| lz4s_s2_loop:
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_s2_corrupt
 | |
| 	CMPQ    AX, CX
 | |
| 	JAE     lz4s_s2_dstfull
 | |
| 	MOVBQZX (DX), R8
 | |
| 	MOVQ    R8, R9
 | |
| 	MOVQ    R8, R10
 | |
| 	SHRQ    $0x04, R9
 | |
| 	ANDQ    $0x0f, R10
 | |
| 	CMPQ    R8, $0xf0
 | |
| 	JB      lz4s_s2_ll_end
 | |
| 
 | |
| lz4s_s2_ll_loop:
 | |
| 	INCQ    DX
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_s2_corrupt
 | |
| 	MOVBQZX (DX), R8
 | |
| 	ADDQ    R8, R9
 | |
| 	CMPQ    R8, $0xff
 | |
| 	JEQ     lz4s_s2_ll_loop
 | |
| 
 | |
| lz4s_s2_ll_end:
 | |
| 	LEAQ  (DX)(R9*1), R8
 | |
| 	ADDQ  $0x03, R10
 | |
| 	CMPQ  R8, BX
 | |
| 	JAE   lz4s_s2_corrupt
 | |
| 	INCQ  DX
 | |
| 	INCQ  R8
 | |
| 	TESTQ R9, R9
 | |
| 	JZ    lz4s_s2_lits_done
 | |
| 	LEAQ  (AX)(R9*1), R11
 | |
| 	CMPQ  R11, CX
 | |
| 	JAE   lz4s_s2_dstfull
 | |
| 	ADDQ  R9, SI
 | |
| 	LEAL  -1(R9), R11
 | |
| 	CMPL  R11, $0x3c
 | |
| 	JB    one_byte_lz4s_s2
 | |
| 	CMPL  R11, $0x00000100
 | |
| 	JB    two_bytes_lz4s_s2
 | |
| 	CMPL  R11, $0x00010000
 | |
| 	JB    three_bytes_lz4s_s2
 | |
| 	CMPL  R11, $0x01000000
 | |
| 	JB    four_bytes_lz4s_s2
 | |
| 	MOVB  $0xfc, (AX)
 | |
| 	MOVL  R11, 1(AX)
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   memmove_long_lz4s_s2
 | |
| 
 | |
| four_bytes_lz4s_s2:
 | |
| 	MOVL R11, R12
 | |
| 	SHRL $0x10, R12
 | |
| 	MOVB $0xf8, (AX)
 | |
| 	MOVW R11, 1(AX)
 | |
| 	MOVB R12, 3(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  memmove_long_lz4s_s2
 | |
| 
 | |
| three_bytes_lz4s_s2:
 | |
| 	MOVB $0xf4, (AX)
 | |
| 	MOVW R11, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  memmove_long_lz4s_s2
 | |
| 
 | |
| two_bytes_lz4s_s2:
 | |
| 	MOVB $0xf0, (AX)
 | |
| 	MOVB R11, 1(AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	CMPL R11, $0x40
 | |
| 	JB   memmove_lz4s_s2
 | |
| 	JMP  memmove_long_lz4s_s2
 | |
| 
 | |
| one_byte_lz4s_s2:
 | |
| 	SHLB $0x02, R11
 | |
| 	MOVB R11, (AX)
 | |
| 	ADDQ $0x01, AX
 | |
| 
 | |
| memmove_lz4s_s2:
 | |
| 	LEAQ (AX)(R9*1), R11
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R9, $0x08
 | |
| 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8
 | |
| 	CMPQ R9, $0x10
 | |
| 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_8through16
 | |
| 	CMPQ R9, $0x20
 | |
| 	JBE  emit_lit_memmove_lz4s_s2_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_lz4s_s2_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_lz4s_s2_memmove_move_8:
 | |
| 	MOVQ (DX), R12
 | |
| 	MOVQ R12, (AX)
 | |
| 	JMP  memmove_end_copy_lz4s_s2
 | |
| 
 | |
| emit_lit_memmove_lz4s_s2_memmove_move_8through16:
 | |
| 	MOVQ (DX), R12
 | |
| 	MOVQ -8(DX)(R9*1), DX
 | |
| 	MOVQ R12, (AX)
 | |
| 	MOVQ DX, -8(AX)(R9*1)
 | |
| 	JMP  memmove_end_copy_lz4s_s2
 | |
| 
 | |
| emit_lit_memmove_lz4s_s2_memmove_move_17through32:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU -16(DX)(R9*1), X1
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, -16(AX)(R9*1)
 | |
| 	JMP   memmove_end_copy_lz4s_s2
 | |
| 
 | |
| emit_lit_memmove_lz4s_s2_memmove_move_33through64:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R9*1), X2
 | |
| 	MOVOU -16(DX)(R9*1), X3
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R9*1)
 | |
| 	MOVOU X3, -16(AX)(R9*1)
 | |
| 
 | |
| memmove_end_copy_lz4s_s2:
 | |
| 	MOVQ R11, AX
 | |
| 	JMP  lz4s_s2_lits_emit_done
 | |
| 
 | |
| memmove_long_lz4s_s2:
 | |
| 	LEAQ (AX)(R9*1), R11
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R9*1), X2
 | |
| 	MOVOU -16(DX)(R9*1), X3
 | |
| 	MOVQ  R9, R13
 | |
| 	SHRQ  $0x05, R13
 | |
| 	MOVQ  AX, R12
 | |
| 	ANDL  $0x0000001f, R12
 | |
| 	MOVQ  $0x00000040, R14
 | |
| 	SUBQ  R12, R14
 | |
| 	DECQ  R13
 | |
| 	JA    emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
 | |
| 	LEAQ  -32(DX)(R14*1), R12
 | |
| 	LEAQ  -32(AX)(R14*1), R15
 | |
| 
 | |
| emit_lit_memmove_long_lz4s_s2large_big_loop_back:
 | |
| 	MOVOU (R12), X4
 | |
| 	MOVOU 16(R12), X5
 | |
| 	MOVOA X4, (R15)
 | |
| 	MOVOA X5, 16(R15)
 | |
| 	ADDQ  $0x20, R15
 | |
| 	ADDQ  $0x20, R12
 | |
| 	ADDQ  $0x20, R14
 | |
| 	DECQ  R13
 | |
| 	JNA   emit_lit_memmove_long_lz4s_s2large_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
 | |
| 	MOVOU -32(DX)(R14*1), X4
 | |
| 	MOVOU -16(DX)(R14*1), X5
 | |
| 	MOVOA X4, -32(AX)(R14*1)
 | |
| 	MOVOA X5, -16(AX)(R14*1)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	CMPQ  R9, R14
 | |
| 	JAE   emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R9*1)
 | |
| 	MOVOU X3, -16(AX)(R9*1)
 | |
| 	MOVQ  R11, AX
 | |
| 
 | |
| lz4s_s2_lits_emit_done:
 | |
| 	MOVQ R8, DX
 | |
| 
 | |
| lz4s_s2_lits_done:
 | |
| 	CMPQ DX, BX
 | |
| 	JNE  lz4s_s2_match
 | |
| 	CMPQ R10, $0x03
 | |
| 	JEQ  lz4s_s2_done
 | |
| 	JMP  lz4s_s2_corrupt
 | |
| 
 | |
| lz4s_s2_match:
 | |
| 	CMPQ    R10, $0x03
 | |
| 	JEQ     lz4s_s2_loop
 | |
| 	LEAQ    2(DX), R8
 | |
| 	CMPQ    R8, BX
 | |
| 	JAE     lz4s_s2_corrupt
 | |
| 	MOVWQZX (DX), R9
 | |
| 	MOVQ    R8, DX
 | |
| 	TESTQ   R9, R9
 | |
| 	JZ      lz4s_s2_corrupt
 | |
| 	CMPQ    R9, SI
 | |
| 	JA      lz4s_s2_corrupt
 | |
| 	CMPQ    R10, $0x12
 | |
| 	JNE     lz4s_s2_ml_done
 | |
| 
 | |
| lz4s_s2_ml_loop:
 | |
| 	MOVBQZX (DX), R8
 | |
| 	INCQ    DX
 | |
| 	ADDQ    R8, R10
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_s2_corrupt
 | |
| 	CMPQ    R8, $0xff
 | |
| 	JEQ     lz4s_s2_ml_loop
 | |
| 
 | |
| lz4s_s2_ml_done:
 | |
| 	ADDQ R10, SI
 | |
| 	CMPQ R9, DI
 | |
| 	JNE  lz4s_s2_docopy
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_lz4_s2:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2
 | |
| 
 | |
| repeat_five_lz4_s2:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| lz4s_s2_docopy:
 | |
| 	MOVQ R9, DI
 | |
| 
 | |
| 	// emitCopy
 | |
| 	CMPL R10, $0x40
 | |
| 	JBE  two_byte_offset_short_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JAE  long_offset_short_lz4_s2
 | |
| 	MOVL $0x00000001, R8
 | |
| 	LEAL 16(R8), R8
 | |
| 	MOVB R9, 1(AX)
 | |
| 	MOVL R9, R11
 | |
| 	SHRL $0x08, R11
 | |
| 	SHLL $0x05, R11
 | |
| 	ORL  R11, R8
 | |
| 	MOVB R8, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	SUBL $0x08, R10
 | |
| 
 | |
| 	// emitRepeat
 | |
| 	LEAL -4(R10), R10
 | |
| 	JMP  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| emit_repeat_again_lz4_s2_emit_copy_short_2b:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2_emit_copy_short_2b
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2_emit_copy_short_2b
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2_emit_copy_short_2b
 | |
| 
 | |
| repeat_five_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2_emit_copy_short_2b:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2_emit_copy_short_2b:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2_emit_copy_short_2b:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| long_offset_short_lz4_s2:
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW R9, 1(AX)
 | |
| 	LEAL -60(R10), R10
 | |
| 	ADDQ $0x03, AX
 | |
| 
 | |
| 	// emitRepeat
 | |
| emit_repeat_again_lz4_s2_emit_copy_short:
 | |
| 	MOVL R10, R8
 | |
| 	LEAL -4(R10), R10
 | |
| 	CMPL R8, $0x08
 | |
| 	JBE  repeat_two_lz4_s2_emit_copy_short
 | |
| 	CMPL R8, $0x0c
 | |
| 	JAE  cant_repeat_two_offset_lz4_s2_emit_copy_short
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JB   repeat_two_offset_lz4_s2_emit_copy_short
 | |
| 
 | |
| cant_repeat_two_offset_lz4_s2_emit_copy_short:
 | |
| 	CMPL R10, $0x00000104
 | |
| 	JB   repeat_three_lz4_s2_emit_copy_short
 | |
| 	CMPL R10, $0x00010100
 | |
| 	JB   repeat_four_lz4_s2_emit_copy_short
 | |
| 	CMPL R10, $0x0100ffff
 | |
| 	JB   repeat_five_lz4_s2_emit_copy_short
 | |
| 	LEAL -16842747(R10), R10
 | |
| 	MOVL $0xfffb001d, (AX)
 | |
| 	MOVB $0xff, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  emit_repeat_again_lz4_s2_emit_copy_short
 | |
| 
 | |
| repeat_five_lz4_s2_emit_copy_short:
 | |
| 	LEAL -65536(R10), R10
 | |
| 	MOVL R10, R9
 | |
| 	MOVW $0x001d, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	SARL $0x10, R9
 | |
| 	MOVB R9, 4(AX)
 | |
| 	ADDQ $0x05, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_four_lz4_s2_emit_copy_short:
 | |
| 	LEAL -256(R10), R10
 | |
| 	MOVW $0x0019, (AX)
 | |
| 	MOVW R10, 2(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_three_lz4_s2_emit_copy_short:
 | |
| 	LEAL -4(R10), R10
 | |
| 	MOVW $0x0015, (AX)
 | |
| 	MOVB R10, 2(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_lz4_s2_emit_copy_short:
 | |
| 	SHLL $0x02, R10
 | |
| 	ORL  $0x01, R10
 | |
| 	MOVW R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| repeat_two_offset_lz4_s2_emit_copy_short:
 | |
| 	XORQ R8, R8
 | |
| 	LEAL 1(R8)(R10*4), R10
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SARL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| two_byte_offset_short_lz4_s2:
 | |
| 	MOVL R10, R8
 | |
| 	SHLL $0x02, R8
 | |
| 	CMPL R10, $0x0c
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	CMPL R9, $0x00000800
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	LEAL -15(R8), R8
 | |
| 	MOVB R9, 1(AX)
 | |
| 	SHRL $0x08, R9
 | |
| 	SHLL $0x05, R9
 | |
| 	ORL  R9, R8
 | |
| 	MOVB R8, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| emit_copy_three_lz4_s2:
 | |
| 	LEAL -2(R8), R8
 | |
| 	MOVB R8, (AX)
 | |
| 	MOVW R9, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4s_s2_loop
 | |
| 
 | |
| lz4s_s2_done:
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	SUBQ CX, AX
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	MOVQ AX, dstUsed+56(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4s_s2_corrupt:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -1(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4s_s2_dstfull:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -2(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| // func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 | |
| // Requires: SSE2
 | |
| TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
 | |
| 	XORQ SI, SI
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ dst_len+8(FP), CX
 | |
| 	MOVQ src_base+24(FP), DX
 | |
| 	MOVQ src_len+32(FP), BX
 | |
| 	LEAQ (DX)(BX*1), BX
 | |
| 	LEAQ -8(AX)(CX*1), CX
 | |
| 
 | |
| lz4_snappy_loop:
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_snappy_corrupt
 | |
| 	CMPQ    AX, CX
 | |
| 	JAE     lz4_snappy_dstfull
 | |
| 	MOVBQZX (DX), DI
 | |
| 	MOVQ    DI, R8
 | |
| 	MOVQ    DI, R9
 | |
| 	SHRQ    $0x04, R8
 | |
| 	ANDQ    $0x0f, R9
 | |
| 	CMPQ    DI, $0xf0
 | |
| 	JB      lz4_snappy_ll_end
 | |
| 
 | |
| lz4_snappy_ll_loop:
 | |
| 	INCQ    DX
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_snappy_corrupt
 | |
| 	MOVBQZX (DX), DI
 | |
| 	ADDQ    DI, R8
 | |
| 	CMPQ    DI, $0xff
 | |
| 	JEQ     lz4_snappy_ll_loop
 | |
| 
 | |
| lz4_snappy_ll_end:
 | |
| 	LEAQ  (DX)(R8*1), DI
 | |
| 	ADDQ  $0x04, R9
 | |
| 	CMPQ  DI, BX
 | |
| 	JAE   lz4_snappy_corrupt
 | |
| 	INCQ  DX
 | |
| 	INCQ  DI
 | |
| 	TESTQ R8, R8
 | |
| 	JZ    lz4_snappy_lits_done
 | |
| 	LEAQ  (AX)(R8*1), R10
 | |
| 	CMPQ  R10, CX
 | |
| 	JAE   lz4_snappy_dstfull
 | |
| 	ADDQ  R8, SI
 | |
| 	LEAL  -1(R8), R10
 | |
| 	CMPL  R10, $0x3c
 | |
| 	JB    one_byte_lz4_snappy
 | |
| 	CMPL  R10, $0x00000100
 | |
| 	JB    two_bytes_lz4_snappy
 | |
| 	CMPL  R10, $0x00010000
 | |
| 	JB    three_bytes_lz4_snappy
 | |
| 	CMPL  R10, $0x01000000
 | |
| 	JB    four_bytes_lz4_snappy
 | |
| 	MOVB  $0xfc, (AX)
 | |
| 	MOVL  R10, 1(AX)
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   memmove_long_lz4_snappy
 | |
| 
 | |
| four_bytes_lz4_snappy:
 | |
| 	MOVL R10, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (AX)
 | |
| 	MOVW R10, 1(AX)
 | |
| 	MOVB R11, 3(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  memmove_long_lz4_snappy
 | |
| 
 | |
| three_bytes_lz4_snappy:
 | |
| 	MOVB $0xf4, (AX)
 | |
| 	MOVW R10, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  memmove_long_lz4_snappy
 | |
| 
 | |
| two_bytes_lz4_snappy:
 | |
| 	MOVB $0xf0, (AX)
 | |
| 	MOVB R10, 1(AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	CMPL R10, $0x40
 | |
| 	JB   memmove_lz4_snappy
 | |
| 	JMP  memmove_long_lz4_snappy
 | |
| 
 | |
| one_byte_lz4_snappy:
 | |
| 	SHLB $0x02, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x01, AX
 | |
| 
 | |
| memmove_lz4_snappy:
 | |
| 	LEAQ (AX)(R8*1), R10
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_lz4_snappy_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_lz4_snappy_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_lz4_snappy_memmove_move_8:
 | |
| 	MOVQ (DX), R11
 | |
| 	MOVQ R11, (AX)
 | |
| 	JMP  memmove_end_copy_lz4_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4_snappy_memmove_move_8through16:
 | |
| 	MOVQ (DX), R11
 | |
| 	MOVQ -8(DX)(R8*1), DX
 | |
| 	MOVQ R11, (AX)
 | |
| 	MOVQ DX, -8(AX)(R8*1)
 | |
| 	JMP  memmove_end_copy_lz4_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4_snappy_memmove_move_17through32:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU -16(DX)(R8*1), X1
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, -16(AX)(R8*1)
 | |
| 	JMP   memmove_end_copy_lz4_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4_snappy_memmove_move_33through64:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R8*1), X2
 | |
| 	MOVOU -16(DX)(R8*1), X3
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R8*1)
 | |
| 	MOVOU X3, -16(AX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_lz4_snappy:
 | |
| 	MOVQ R10, AX
 | |
| 	JMP  lz4_snappy_lits_emit_done
 | |
| 
 | |
| memmove_long_lz4_snappy:
 | |
| 	LEAQ (AX)(R8*1), R10
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R8*1), X2
 | |
| 	MOVOU -16(DX)(R8*1), X3
 | |
| 	MOVQ  R8, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  AX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DX)(R13*1), R11
 | |
| 	LEAQ  -32(AX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_lz4_snappylarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_lz4_snappylarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DX)(R13*1), X4
 | |
| 	MOVOU -16(DX)(R13*1), X5
 | |
| 	MOVOA X4, -32(AX)(R13*1)
 | |
| 	MOVOA X5, -16(AX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R8, R13
 | |
| 	JAE   emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R8*1)
 | |
| 	MOVOU X3, -16(AX)(R8*1)
 | |
| 	MOVQ  R10, AX
 | |
| 
 | |
| lz4_snappy_lits_emit_done:
 | |
| 	MOVQ DI, DX
 | |
| 
 | |
| lz4_snappy_lits_done:
 | |
| 	CMPQ DX, BX
 | |
| 	JNE  lz4_snappy_match
 | |
| 	CMPQ R9, $0x04
 | |
| 	JEQ  lz4_snappy_done
 | |
| 	JMP  lz4_snappy_corrupt
 | |
| 
 | |
| lz4_snappy_match:
 | |
| 	LEAQ    2(DX), DI
 | |
| 	CMPQ    DI, BX
 | |
| 	JAE     lz4_snappy_corrupt
 | |
| 	MOVWQZX (DX), R8
 | |
| 	MOVQ    DI, DX
 | |
| 	TESTQ   R8, R8
 | |
| 	JZ      lz4_snappy_corrupt
 | |
| 	CMPQ    R8, SI
 | |
| 	JA      lz4_snappy_corrupt
 | |
| 	CMPQ    R9, $0x13
 | |
| 	JNE     lz4_snappy_ml_done
 | |
| 
 | |
| lz4_snappy_ml_loop:
 | |
| 	MOVBQZX (DX), DI
 | |
| 	INCQ    DX
 | |
| 	ADDQ    DI, R9
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4_snappy_corrupt
 | |
| 	CMPQ    DI, $0xff
 | |
| 	JEQ     lz4_snappy_ml_loop
 | |
| 
 | |
| lz4_snappy_ml_done:
 | |
| 	ADDQ R9, SI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_lz4_s2:
 | |
| 	CMPL R9, $0x40
 | |
| 	JBE  two_byte_offset_short_lz4_s2
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW R8, 1(AX)
 | |
| 	LEAL -60(R9), R9
 | |
| 	ADDQ $0x03, AX
 | |
| 	CMPQ AX, CX
 | |
| 	JAE  lz4_snappy_loop
 | |
| 	JMP  two_byte_offset_lz4_s2
 | |
| 
 | |
| two_byte_offset_short_lz4_s2:
 | |
| 	MOVL R9, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R9, $0x0c
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB R8, 1(AX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, DI
 | |
| 	MOVB DI, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4_snappy_loop
 | |
| 
 | |
| emit_copy_three_lz4_s2:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (AX)
 | |
| 	MOVW R8, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4_snappy_loop
 | |
| 
 | |
| lz4_snappy_done:
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	SUBQ CX, AX
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	MOVQ AX, dstUsed+56(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4_snappy_corrupt:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -1(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4_snappy_dstfull:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -2(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| // func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
 | |
| // Requires: SSE2
 | |
| TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
 | |
| 	XORQ SI, SI
 | |
| 	MOVQ dst_base+0(FP), AX
 | |
| 	MOVQ dst_len+8(FP), CX
 | |
| 	MOVQ src_base+24(FP), DX
 | |
| 	MOVQ src_len+32(FP), BX
 | |
| 	LEAQ (DX)(BX*1), BX
 | |
| 	LEAQ -8(AX)(CX*1), CX
 | |
| 
 | |
| lz4s_snappy_loop:
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_snappy_corrupt
 | |
| 	CMPQ    AX, CX
 | |
| 	JAE     lz4s_snappy_dstfull
 | |
| 	MOVBQZX (DX), DI
 | |
| 	MOVQ    DI, R8
 | |
| 	MOVQ    DI, R9
 | |
| 	SHRQ    $0x04, R8
 | |
| 	ANDQ    $0x0f, R9
 | |
| 	CMPQ    DI, $0xf0
 | |
| 	JB      lz4s_snappy_ll_end
 | |
| 
 | |
| lz4s_snappy_ll_loop:
 | |
| 	INCQ    DX
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_snappy_corrupt
 | |
| 	MOVBQZX (DX), DI
 | |
| 	ADDQ    DI, R8
 | |
| 	CMPQ    DI, $0xff
 | |
| 	JEQ     lz4s_snappy_ll_loop
 | |
| 
 | |
| lz4s_snappy_ll_end:
 | |
| 	LEAQ  (DX)(R8*1), DI
 | |
| 	ADDQ  $0x03, R9
 | |
| 	CMPQ  DI, BX
 | |
| 	JAE   lz4s_snappy_corrupt
 | |
| 	INCQ  DX
 | |
| 	INCQ  DI
 | |
| 	TESTQ R8, R8
 | |
| 	JZ    lz4s_snappy_lits_done
 | |
| 	LEAQ  (AX)(R8*1), R10
 | |
| 	CMPQ  R10, CX
 | |
| 	JAE   lz4s_snappy_dstfull
 | |
| 	ADDQ  R8, SI
 | |
| 	LEAL  -1(R8), R10
 | |
| 	CMPL  R10, $0x3c
 | |
| 	JB    one_byte_lz4s_snappy
 | |
| 	CMPL  R10, $0x00000100
 | |
| 	JB    two_bytes_lz4s_snappy
 | |
| 	CMPL  R10, $0x00010000
 | |
| 	JB    three_bytes_lz4s_snappy
 | |
| 	CMPL  R10, $0x01000000
 | |
| 	JB    four_bytes_lz4s_snappy
 | |
| 	MOVB  $0xfc, (AX)
 | |
| 	MOVL  R10, 1(AX)
 | |
| 	ADDQ  $0x05, AX
 | |
| 	JMP   memmove_long_lz4s_snappy
 | |
| 
 | |
| four_bytes_lz4s_snappy:
 | |
| 	MOVL R10, R11
 | |
| 	SHRL $0x10, R11
 | |
| 	MOVB $0xf8, (AX)
 | |
| 	MOVW R10, 1(AX)
 | |
| 	MOVB R11, 3(AX)
 | |
| 	ADDQ $0x04, AX
 | |
| 	JMP  memmove_long_lz4s_snappy
 | |
| 
 | |
| three_bytes_lz4s_snappy:
 | |
| 	MOVB $0xf4, (AX)
 | |
| 	MOVW R10, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  memmove_long_lz4s_snappy
 | |
| 
 | |
| two_bytes_lz4s_snappy:
 | |
| 	MOVB $0xf0, (AX)
 | |
| 	MOVB R10, 1(AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	CMPL R10, $0x40
 | |
| 	JB   memmove_lz4s_snappy
 | |
| 	JMP  memmove_long_lz4s_snappy
 | |
| 
 | |
| one_byte_lz4s_snappy:
 | |
| 	SHLB $0x02, R10
 | |
| 	MOVB R10, (AX)
 | |
| 	ADDQ $0x01, AX
 | |
| 
 | |
| memmove_lz4s_snappy:
 | |
| 	LEAQ (AX)(R8*1), R10
 | |
| 
 | |
| 	// genMemMoveShort
 | |
| 	CMPQ R8, $0x08
 | |
| 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8
 | |
| 	CMPQ R8, $0x10
 | |
| 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_8through16
 | |
| 	CMPQ R8, $0x20
 | |
| 	JBE  emit_lit_memmove_lz4s_snappy_memmove_move_17through32
 | |
| 	JMP  emit_lit_memmove_lz4s_snappy_memmove_move_33through64
 | |
| 
 | |
| emit_lit_memmove_lz4s_snappy_memmove_move_8:
 | |
| 	MOVQ (DX), R11
 | |
| 	MOVQ R11, (AX)
 | |
| 	JMP  memmove_end_copy_lz4s_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
 | |
| 	MOVQ (DX), R11
 | |
| 	MOVQ -8(DX)(R8*1), DX
 | |
| 	MOVQ R11, (AX)
 | |
| 	MOVQ DX, -8(AX)(R8*1)
 | |
| 	JMP  memmove_end_copy_lz4s_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU -16(DX)(R8*1), X1
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, -16(AX)(R8*1)
 | |
| 	JMP   memmove_end_copy_lz4s_snappy
 | |
| 
 | |
| emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R8*1), X2
 | |
| 	MOVOU -16(DX)(R8*1), X3
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R8*1)
 | |
| 	MOVOU X3, -16(AX)(R8*1)
 | |
| 
 | |
| memmove_end_copy_lz4s_snappy:
 | |
| 	MOVQ R10, AX
 | |
| 	JMP  lz4s_snappy_lits_emit_done
 | |
| 
 | |
| memmove_long_lz4s_snappy:
 | |
| 	LEAQ (AX)(R8*1), R10
 | |
| 
 | |
| 	// genMemMoveLong
 | |
| 	MOVOU (DX), X0
 | |
| 	MOVOU 16(DX), X1
 | |
| 	MOVOU -32(DX)(R8*1), X2
 | |
| 	MOVOU -16(DX)(R8*1), X3
 | |
| 	MOVQ  R8, R12
 | |
| 	SHRQ  $0x05, R12
 | |
| 	MOVQ  AX, R11
 | |
| 	ANDL  $0x0000001f, R11
 | |
| 	MOVQ  $0x00000040, R13
 | |
| 	SUBQ  R11, R13
 | |
| 	DECQ  R12
 | |
| 	JA    emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
 | |
| 	LEAQ  -32(DX)(R13*1), R11
 | |
| 	LEAQ  -32(AX)(R13*1), R14
 | |
| 
 | |
| emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
 | |
| 	MOVOU (R11), X4
 | |
| 	MOVOU 16(R11), X5
 | |
| 	MOVOA X4, (R14)
 | |
| 	MOVOA X5, 16(R14)
 | |
| 	ADDQ  $0x20, R14
 | |
| 	ADDQ  $0x20, R11
 | |
| 	ADDQ  $0x20, R13
 | |
| 	DECQ  R12
 | |
| 	JNA   emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
 | |
| 
 | |
| emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
 | |
| 	MOVOU -32(DX)(R13*1), X4
 | |
| 	MOVOU -16(DX)(R13*1), X5
 | |
| 	MOVOA X4, -32(AX)(R13*1)
 | |
| 	MOVOA X5, -16(AX)(R13*1)
 | |
| 	ADDQ  $0x20, R13
 | |
| 	CMPQ  R8, R13
 | |
| 	JAE   emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
 | |
| 	MOVOU X0, (AX)
 | |
| 	MOVOU X1, 16(AX)
 | |
| 	MOVOU X2, -32(AX)(R8*1)
 | |
| 	MOVOU X3, -16(AX)(R8*1)
 | |
| 	MOVQ  R10, AX
 | |
| 
 | |
| lz4s_snappy_lits_emit_done:
 | |
| 	MOVQ DI, DX
 | |
| 
 | |
| lz4s_snappy_lits_done:
 | |
| 	CMPQ DX, BX
 | |
| 	JNE  lz4s_snappy_match
 | |
| 	CMPQ R9, $0x03
 | |
| 	JEQ  lz4s_snappy_done
 | |
| 	JMP  lz4s_snappy_corrupt
 | |
| 
 | |
| lz4s_snappy_match:
 | |
| 	CMPQ    R9, $0x03
 | |
| 	JEQ     lz4s_snappy_loop
 | |
| 	LEAQ    2(DX), DI
 | |
| 	CMPQ    DI, BX
 | |
| 	JAE     lz4s_snappy_corrupt
 | |
| 	MOVWQZX (DX), R8
 | |
| 	MOVQ    DI, DX
 | |
| 	TESTQ   R8, R8
 | |
| 	JZ      lz4s_snappy_corrupt
 | |
| 	CMPQ    R8, SI
 | |
| 	JA      lz4s_snappy_corrupt
 | |
| 	CMPQ    R9, $0x12
 | |
| 	JNE     lz4s_snappy_ml_done
 | |
| 
 | |
| lz4s_snappy_ml_loop:
 | |
| 	MOVBQZX (DX), DI
 | |
| 	INCQ    DX
 | |
| 	ADDQ    DI, R9
 | |
| 	CMPQ    DX, BX
 | |
| 	JAE     lz4s_snappy_corrupt
 | |
| 	CMPQ    DI, $0xff
 | |
| 	JEQ     lz4s_snappy_ml_loop
 | |
| 
 | |
| lz4s_snappy_ml_done:
 | |
| 	ADDQ R9, SI
 | |
| 
 | |
| 	// emitCopy
 | |
| two_byte_offset_lz4_s2:
 | |
| 	CMPL R9, $0x40
 | |
| 	JBE  two_byte_offset_short_lz4_s2
 | |
| 	MOVB $0xee, (AX)
 | |
| 	MOVW R8, 1(AX)
 | |
| 	LEAL -60(R9), R9
 | |
| 	ADDQ $0x03, AX
 | |
| 	CMPQ AX, CX
 | |
| 	JAE  lz4s_snappy_loop
 | |
| 	JMP  two_byte_offset_lz4_s2
 | |
| 
 | |
| two_byte_offset_short_lz4_s2:
 | |
| 	MOVL R9, DI
 | |
| 	SHLL $0x02, DI
 | |
| 	CMPL R9, $0x0c
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	CMPL R8, $0x00000800
 | |
| 	JAE  emit_copy_three_lz4_s2
 | |
| 	LEAL -15(DI), DI
 | |
| 	MOVB R8, 1(AX)
 | |
| 	SHRL $0x08, R8
 | |
| 	SHLL $0x05, R8
 | |
| 	ORL  R8, DI
 | |
| 	MOVB DI, (AX)
 | |
| 	ADDQ $0x02, AX
 | |
| 	JMP  lz4s_snappy_loop
 | |
| 
 | |
| emit_copy_three_lz4_s2:
 | |
| 	LEAL -2(DI), DI
 | |
| 	MOVB DI, (AX)
 | |
| 	MOVW R8, 1(AX)
 | |
| 	ADDQ $0x03, AX
 | |
| 	JMP  lz4s_snappy_loop
 | |
| 
 | |
| lz4s_snappy_done:
 | |
| 	MOVQ dst_base+0(FP), CX
 | |
| 	SUBQ CX, AX
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	MOVQ AX, dstUsed+56(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4s_snappy_corrupt:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -1(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 | |
| 
 | |
| lz4s_snappy_dstfull:
 | |
| 	XORQ AX, AX
 | |
| 	LEAQ -2(AX), SI
 | |
| 	MOVQ SI, uncompressed+48(FP)
 | |
| 	RET
 |